import os import uuid import zipfile import geopandas as gpd from shapely import wkt from shapely.errors import ShapelyError from datetime import datetime def detect_zip_type(zip_path: str) -> str: with zipfile.ZipFile(zip_path, "r") as zip_ref: files = zip_ref.namelist() if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files): return "gdb" if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files): return "gdb" if any(f.lower().endswith(".shp") for f in files): return "shp" return "unknown" def generate_unique_filename(folder="tmp", ext="parquet", digits=6): os.makedirs(folder, exist_ok=True) while True: file_id = file_id = uuid.uuid4().int filename = f"{folder}/{file_id}.{ext}" if not os.path.exists(filename): return filename def generate_job_id(user_id: str) -> str: timestamp = datetime.now().strftime("%Y%m%d%H%M%S") return f"{user_id}_{timestamp}" def dataframe_validation(df_input, tmp_file): """ Fungsi ini berjalan di thread terpisah (CPU bound). Melakukan validasi, cleaning, dan export ke parquet. """ # 1. Copy agar tidak mengubah data asli export_df = df_input.copy() # ========================================================================= # TAHAP 1: SAFE WKT LOADING # ========================================================================= def safe_load_wkt(raw): if not isinstance(raw, str): return None try: return wkt.loads(raw) # 2. GANTI CATCH BLOCK INI # except (WKTReadingError, Exception): <-- LAMA except (ShapelyError, Exception): return None # Terapkan safe load export_df["geom"] = export_df["geometry"].apply(safe_load_wkt) # ========================================================================= # TAHAP 2: FILTER NULL & INVALID GEOMETRY # ========================================================================= # Hapus baris di mana konversi WKT gagal (None) export_df = export_df[export_df["geom"].notnull()] print("df", export_df) if export_df.empty: raise ValueError("Tidak ada data spasial valid yang ditemukan.") # Jadikan GeoDataFrame export_df = gpd.GeoDataFrame(export_df, geometry="geom") # ========================================================================= # TAHAP 3: FIX TOPOLOGY (PENTING!) # ========================================================================= # Cek validitas (misal: Polygon yang garisnya menabrak diri sendiri) # buffer(0) adalah trik standar GIS untuk memperbaiki topologi ringan export_df["geom"] = export_df["geom"].apply( lambda g: g.buffer(0) if not g.is_valid else g ) # Hapus lagi jika setelah di-fix malah jadi kosong (jarang terjadi, tapi aman) export_df = export_df[~export_df["geom"].is_empty] # ========================================================================= # TAHAP 4: FINALISASI (CRS & RENAME) # ========================================================================= export_df = export_df.drop(columns=["geometry"]) # Buang kolom string WKT lama export_df = export_df.set_crs("EPSG:4326", allow_override=True) # Rename kolom atribut ke UPPERCASE, biarkan 'geom' lowercase # .strip() untuk membuang spasi hantu (" ID " -> "ID") export_df = export_df.rename( columns=lambda c: str(c).strip().upper() if c != "geom" else c ) # Simpan ke Parquet export_df.to_parquet(tmp_file) return len(export_df)