fixing lat lon logic
This commit is contained in:
parent
5dbfa49369
commit
25d652d7f0
|
|
@ -132,6 +132,44 @@ def is_geom_empty(g):
|
|||
|
||||
|
||||
|
||||
import math
|
||||
|
||||
def normalize_dynamic(val, is_lat=False):
|
||||
if pd.isna(val):
|
||||
return None
|
||||
try:
|
||||
v = float(val)
|
||||
except:
|
||||
return None
|
||||
|
||||
av = abs(v)
|
||||
if av == 0:
|
||||
return v
|
||||
|
||||
if (-180 <= v <= 180 and not is_lat) or (-90 <= v <= 90 and is_lat):
|
||||
return v
|
||||
|
||||
for factor in [1, 10, 100, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9]:
|
||||
nv = v / factor
|
||||
if (not is_lat and -180 <= nv <= 180) or (is_lat and -90 <= nv <= 90):
|
||||
return nv
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def normalize_lat(val):
|
||||
if pd.isna(val):
|
||||
return None
|
||||
v = float(val)
|
||||
av = abs(v)
|
||||
if av > 1e9: # contoh: -8167413802 (10 digit)
|
||||
return v / 1e9
|
||||
elif av > 1e8: # fallback jika ada variasi
|
||||
return v / 1e8
|
||||
else:
|
||||
return v
|
||||
|
||||
|
||||
# ============================================================
|
||||
# FUNGSI UTAMA GEOMETRY DETECTION (LAT/LON / PATH)
|
||||
|
|
@ -149,25 +187,18 @@ def detect_and_build_geometry(df: pd.DataFrame, master_polygons: gpd.GeoDataFram
|
|||
print(f"[INFO] Detected existing geometry in GeoDataFrame ({geom_count} features, {geom_type}).")
|
||||
return df
|
||||
|
||||
lat_col = next(
|
||||
(c for c in df.columns if re.search(r'\b(lat|latitude|y[_\s]*coord|y$)\b', c.lower())), None
|
||||
)
|
||||
lon_col = next(
|
||||
(c for c in df.columns if re.search(r'\b(lon|long|longitude|x[_\s]*coord|x$)\b', c.lower())), None
|
||||
)
|
||||
lat_col = next((c for c in df.columns if re.search(r'\b(lat|latitude|y[_\s]*coord|y$)\b', c.lower())), None)
|
||||
lon_col = next((c for c in df.columns if re.search(r'\b(lon|long|longitude|x[_\s]*coord|x$)\b', c.lower())), None)
|
||||
|
||||
if lat_col and lon_col:
|
||||
df[lat_col] = pd.to_numeric(df[lat_col], errors='coerce')
|
||||
df[lon_col] = pd.to_numeric(df[lon_col], errors='coerce')
|
||||
|
||||
lon_median = df[lon_col].abs().median()
|
||||
lat_median = df[lat_col].abs().median()
|
||||
|
||||
if lon_median > 1000 or lat_median > 1000:
|
||||
df[lon_col] = df[lon_col] / 1e7
|
||||
df[lat_col] = df[lat_col] / 1e7
|
||||
df[lon_col] = df[lon_col].apply(lambda x: normalize_dynamic(x, is_lat=False))
|
||||
df[lat_col] = df[lat_col].apply(normalize_lat)
|
||||
|
||||
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[lon_col], df[lat_col]), crs="EPSG:4326")
|
||||
print("[INFO] Geometry dibangun dari kolom lat/lon.")
|
||||
return gdf
|
||||
|
||||
coord_col = next(
|
||||
|
|
|
|||
|
|
@ -47,19 +47,24 @@ def read_csv(path: str):
|
|||
delimiter = detect_delimiter(path)
|
||||
print(f"[INFO] Detected header line: {header_line + 1}, delimiter: '{delimiter}'")
|
||||
|
||||
df = pd.read_csv(path, header=header_line, sep=delimiter, encoding='utf-8', low_memory=False)
|
||||
df = pd.read_csv(path, header=header_line, sep=delimiter, encoding='utf-8', low_memory=False, thousands=',')
|
||||
|
||||
elif ext in ['.xlsx', '.xls']:
|
||||
# === Baca file Excel ===
|
||||
print(f"[INFO] Membaca file Excel: {os.path.basename(path)}")
|
||||
df = pd.read_excel(path, header=0) # default header baris pertama
|
||||
pre_df = pd.read_excel(path, header=0, dtype=str) # baca semua sebagai string
|
||||
df = pre_df.copy()
|
||||
for col in df.columns:
|
||||
if df[col].str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
|
||||
df[col] = df[col].str.replace(',', '', regex=False)
|
||||
df[col] = pd.to_numeric(df[col], errors='ignore')
|
||||
|
||||
else:
|
||||
raise ValueError("Format file tidak dikenali (hanya .csv, .txt, .xlsx, .xls)")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[WARN] Gagal membaca file ({e}), fallback ke default")
|
||||
df = pd.read_csv(path, encoding='utf-8', low_memory=False)
|
||||
df = pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
|
||||
|
||||
# Bersihkan kolom dan baris kosong
|
||||
df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user