106 lines
3.6 KiB
Python
Executable File
106 lines
3.6 KiB
Python
Executable File
import os
|
|
import uuid
|
|
import zipfile
|
|
import geopandas as gpd
|
|
from shapely import wkt
|
|
from shapely.errors import ShapelyError
|
|
from datetime import datetime
|
|
|
|
|
|
def detect_zip_type(zip_path: str) -> str:
|
|
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
files = zip_ref.namelist()
|
|
|
|
if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files):
|
|
return "gdb"
|
|
|
|
if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files):
|
|
return "gdb"
|
|
|
|
if any(f.lower().endswith(".shp") for f in files):
|
|
return "shp"
|
|
|
|
return "unknown"
|
|
|
|
|
|
def generate_unique_filename(folder="tmp", ext="parquet", digits=6):
|
|
os.makedirs(folder, exist_ok=True)
|
|
while True:
|
|
file_id = file_id = uuid.uuid4().int
|
|
filename = f"{folder}/{file_id}.{ext}"
|
|
|
|
if not os.path.exists(filename):
|
|
return filename
|
|
|
|
|
|
def generate_job_id(user_id: str) -> str:
|
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
|
return f"{user_id}_{timestamp}"
|
|
|
|
|
|
def dataframe_validation(df_input, tmp_file):
|
|
"""
|
|
Fungsi ini berjalan di thread terpisah (CPU bound).
|
|
Melakukan validasi, cleaning, dan export ke parquet.
|
|
"""
|
|
# 1. Copy agar tidak mengubah data asli
|
|
export_df = df_input.copy()
|
|
|
|
# =========================================================================
|
|
# TAHAP 1: SAFE WKT LOADING
|
|
# =========================================================================
|
|
def safe_load_wkt(raw):
|
|
if not isinstance(raw, str):
|
|
return None
|
|
try:
|
|
return wkt.loads(raw)
|
|
# 2. GANTI CATCH BLOCK INI
|
|
# except (WKTReadingError, Exception): <-- LAMA
|
|
except (ShapelyError, Exception):
|
|
return None
|
|
|
|
# Terapkan safe load
|
|
export_df["geom"] = export_df["geometry"].apply(safe_load_wkt)
|
|
|
|
# =========================================================================
|
|
# TAHAP 2: FILTER NULL & INVALID GEOMETRY
|
|
# =========================================================================
|
|
# Hapus baris di mana konversi WKT gagal (None)
|
|
export_df = export_df[export_df["geom"].notnull()]
|
|
print("df", export_df)
|
|
if export_df.empty:
|
|
raise ValueError("Tidak ada data spasial valid yang ditemukan.")
|
|
|
|
# Jadikan GeoDataFrame
|
|
export_df = gpd.GeoDataFrame(export_df, geometry="geom")
|
|
|
|
# =========================================================================
|
|
# TAHAP 3: FIX TOPOLOGY (PENTING!)
|
|
# =========================================================================
|
|
# Cek validitas (misal: Polygon yang garisnya menabrak diri sendiri)
|
|
# buffer(0) adalah trik standar GIS untuk memperbaiki topologi ringan
|
|
export_df["geom"] = export_df["geom"].apply(
|
|
lambda g: g.buffer(0) if not g.is_valid else g
|
|
)
|
|
|
|
# Hapus lagi jika setelah di-fix malah jadi kosong (jarang terjadi, tapi aman)
|
|
export_df = export_df[~export_df["geom"].is_empty]
|
|
|
|
# =========================================================================
|
|
# TAHAP 4: FINALISASI (CRS & RENAME)
|
|
# =========================================================================
|
|
export_df = export_df.drop(columns=["geometry"]) # Buang kolom string WKT lama
|
|
export_df = export_df.set_crs("EPSG:4326", allow_override=True)
|
|
|
|
# Rename kolom atribut ke UPPERCASE, biarkan 'geom' lowercase
|
|
# .strip() untuk membuang spasi hantu (" ID " -> "ID")
|
|
export_df = export_df.rename(
|
|
columns=lambda c: str(c).strip().upper() if c != "geom" else c
|
|
)
|
|
|
|
# Simpan ke Parquet
|
|
export_df.to_parquet(tmp_file)
|
|
|
|
return len(export_df)
|
|
|