optimize join geom detector

This commit is contained in:
dmsanhrProject 2025-10-30 10:37:45 +07:00
parent 756873bc15
commit 317b0c8235

View File

@ -1,12 +1,13 @@
import geopandas as gpd
from shapely.geometry import Point, LineString
import pandas as pd
import numpy as np
import re
from shapely import wkt
from rapidfuzz import process, fuzz
from sqlalchemy import create_engine
from shapely.geometry.base import BaseGeometry
from core.config import REFERENCE_DB_URL, REFERENCE_SCHEMA, REF_COLUMN_MAP
from core.config import REFERENCE_DB_URL, REFERENCE_SCHEMA
# ============================================================
# KONFIGURASI DAN KONSTANTA
@ -243,6 +244,16 @@ def get_reference_polygons(level):
return gdf
# ============================================================
# TEST : Optimize Join
# ============================================================
def build_join_key(df, cols):
arr = df[cols].astype(str).replace("nan", "", regex=False).to_numpy()
return np.char.add.reduce(np.column_stack(
[arr[:, i] + ("|" if i < len(cols) - 1 else "") for i in range(len(cols))]
), axis=1)
# ============================================================
# FUNGSI: AUTO ATTACH POLYGON KE DATAFRAME NON-SPASIAL
# ============================================================
@ -324,8 +335,12 @@ def attach_polygon_geometry_auto(df: pd.DataFrame):
# print(f"[DEBUG] Join kolom DF : {join_cols_df}")
# print(f"[DEBUG] Join kolom REF : {join_cols_ref}")
df["_join_key"] = df[join_cols_df].astype(str).agg("|".join, axis=1)
ref_gdf["_join_key"] = ref_gdf[join_cols_ref].astype(str).agg("|".join, axis=1)
# df["_join_key"] = df[join_cols_df].astype(str).agg("|".join, axis=1)
# ref_gdf["_join_key"] = ref_gdf[join_cols_ref].astype(str).agg("|".join, axis=1)
df["_join_key"] = build_join_key(df, join_cols_df)
ref_gdf["_join_key"] = build_join_key(ref_gdf, join_cols_ref)
# print(f"[INFO] Join key berhasil dibuat dari kolom: {join_cols_df}")