fixing lat lon logic

2025-10-30 17:14:53 +07:00 · 2025-10-30 17:14:53 +07:00 · 25d652d7f0
commit 25d652d7f0
parent 5dbfa49369
2 changed files with 51 additions and 15 deletions
--- a/services/geometry_detector.py
+++ b/services/geometry_detector.py
@ -132,6 +132,44 @@ def is_geom_empty(g):
 import math
 def normalize_dynamic(val, is_lat=False):
    if pd.isna(val):
        return None
    try:
        v = float(val)
    except:
        return None
    av = abs(v)
    if av == 0:
        return v
    if (-180 <= v <= 180 and not is_lat) or (-90 <= v <= 90 and is_lat):
        return v
    for factor in [1, 10, 100, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9]:
        nv = v / factor
        if (not is_lat and -180 <= nv <= 180) or (is_lat and -90 <= nv <= 90):
            return nv
    return None
 def normalize_lat(val):
    if pd.isna(val): 
        return None
    v = float(val)
    av = abs(v)
    if av > 1e9:   # contoh: -8167413802 (10 digit)
        return v / 1e9
    elif av > 1e8: # fallback jika ada variasi
        return v / 1e8
    else:
        return v
 # ============================================================
 # FUNGSI UTAMA GEOMETRY DETECTION (LAT/LON / PATH)
@ -149,25 +187,18 @@ def detect_and_build_geometry(df: pd.DataFrame, master_polygons: gpd.GeoDataFram
            print(f"[INFO] Detected existing geometry in GeoDataFrame ({geom_count} features, {geom_type}).")
            return df
-    lat_col = next(
+    lat_col = next((c for c in df.columns if re.search(r'\b(lat|latitude|y[_\s]*coord|y$)\b', c.lower())), None)
-        (c for c in df.columns if re.search(r'\b(lat|latitude|y[_\s]*coord|y$)\b', c.lower())), None
+    lon_col = next((c for c in df.columns if re.search(r'\b(lon|long|longitude|x[_\s]*coord|x$)\b', c.lower())), None)
    )
    lon_col = next(
        (c for c in df.columns if re.search(r'\b(lon|long|longitude|x[_\s]*coord|x$)\b', c.lower())), None
    )
    if lat_col and lon_col:
        df[lat_col] = pd.to_numeric(df[lat_col], errors='coerce')
        df[lon_col] = pd.to_numeric(df[lon_col], errors='coerce')
-        lon_median = df[lon_col].abs().median()
+        df[lon_col] = df[lon_col].apply(lambda x: normalize_dynamic(x, is_lat=False))
-        lat_median = df[lat_col].abs().median()
+        df[lat_col] = df[lat_col].apply(normalize_lat)
        if lon_median > 1000 or lat_median > 1000:
            df[lon_col] = df[lon_col] / 1e7
            df[lat_col] = df[lat_col] / 1e7
        gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[lon_col], df[lat_col]), crs="EPSG:4326")
        print("[INFO] Geometry dibangun dari kolom lat/lon.")
        return gdf
    coord_col = next(
--- a/services/reader_csv.py
+++ b/services/reader_csv.py
@ -47,19 +47,24 @@ def read_csv(path: str):
            delimiter = detect_delimiter(path)
            print(f"[INFO] Detected header line: {header_line + 1}, delimiter: '{delimiter}'")
-            df = pd.read_csv(path, header=header_line, sep=delimiter, encoding='utf-8', low_memory=False)
+            df = pd.read_csv(path, header=header_line, sep=delimiter, encoding='utf-8', low_memory=False, thousands=',')
        elif ext in ['.xlsx', '.xls']:
            # === Baca file Excel ===
            print(f"[INFO] Membaca file Excel: {os.path.basename(path)}")
-            df = pd.read_excel(path, header=0)  # default header baris pertama
+            pre_df = pd.read_excel(path, header=0, dtype=str)  # baca semua sebagai string
            df = pre_df.copy()
            for col in df.columns:
                if df[col].str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
                    df[col] = df[col].str.replace(',', '', regex=False)
                    df[col] = pd.to_numeric(df[col], errors='ignore')
        else:
            raise ValueError("Format file tidak dikenali (hanya .csv, .txt, .xlsx, .xls)")
    except Exception as e:
        print(f"[WARN] Gagal membaca file ({e}), fallback ke default")
-        df = pd.read_csv(path, encoding='utf-8', low_memory=False)
+        df = pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
    # Bersihkan kolom dan baris kosong
    df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]