update normalize name

This commit is contained in:
dmsanhrProject 2025-11-04 22:19:25 +07:00
parent f25b4f3851
commit 897cd5d7c3

View File

@ -68,6 +68,8 @@ def normalize_name(name: str, level: str = None):
name = name.strip()
if not name:
return None
name = re.sub(r'\s*\([^)]*\)\s*', '', name)
raw = name.lower()
raw = re.sub(r'^(desa|kelurahan|kel|dusun|kampung)\s+', '', raw)
@ -117,7 +119,6 @@ def normalize_name(name: str, level: str = None):
def is_geom_empty(g):
"""True jika geometry None, NaN, atau geometry Shapely kosong."""
if g is None:
@ -134,7 +135,7 @@ def is_geom_empty(g):
import math
def normalize_dynamic(val, is_lat=False):
def normalize_lon(val, is_lat=False):
if pd.isna(val):
return None
try:
@ -194,7 +195,7 @@ def detect_and_build_geometry(df: pd.DataFrame, master_polygons: gpd.GeoDataFram
df[lat_col] = pd.to_numeric(df[lat_col], errors='coerce')
df[lon_col] = pd.to_numeric(df[lon_col], errors='coerce')
df[lon_col] = df[lon_col].apply(lambda x: normalize_dynamic(x, is_lat=False))
df[lon_col] = df[lon_col].apply(lambda x: normalize_lon(x, is_lat=False))
df[lat_col] = df[lat_col].apply(normalize_lat)
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[lon_col], df[lat_col]), crs="EPSG:4326")