From 317b0c8235724c6798a0172352f5def99d1e9d0e Mon Sep 17 00:00:00 2001 From: dmsanhrProject Date: Thu, 30 Oct 2025 10:37:45 +0700 Subject: [PATCH] optimize join geom detector --- services/geometry_detector.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/services/geometry_detector.py b/services/geometry_detector.py index 4b1e7a6..8e742c8 100644 --- a/services/geometry_detector.py +++ b/services/geometry_detector.py @@ -1,12 +1,13 @@ import geopandas as gpd from shapely.geometry import Point, LineString import pandas as pd +import numpy as np import re from shapely import wkt from rapidfuzz import process, fuzz from sqlalchemy import create_engine from shapely.geometry.base import BaseGeometry -from core.config import REFERENCE_DB_URL, REFERENCE_SCHEMA, REF_COLUMN_MAP +from core.config import REFERENCE_DB_URL, REFERENCE_SCHEMA # ============================================================ # KONFIGURASI DAN KONSTANTA @@ -243,6 +244,16 @@ def get_reference_polygons(level): return gdf +# ============================================================ +# TEST : Optimize Join +# ============================================================ +def build_join_key(df, cols): + arr = df[cols].astype(str).replace("nan", "", regex=False).to_numpy() + return np.char.add.reduce(np.column_stack( + [arr[:, i] + ("|" if i < len(cols) - 1 else "") for i in range(len(cols))] + ), axis=1) + + # ============================================================ # FUNGSI: AUTO ATTACH POLYGON KE DATAFRAME NON-SPASIAL # ============================================================ @@ -324,8 +335,12 @@ def attach_polygon_geometry_auto(df: pd.DataFrame): # print(f"[DEBUG] Join kolom DF : {join_cols_df}") # print(f"[DEBUG] Join kolom REF : {join_cols_ref}") - df["_join_key"] = df[join_cols_df].astype(str).agg("|".join, axis=1) - ref_gdf["_join_key"] = ref_gdf[join_cols_ref].astype(str).agg("|".join, axis=1) + # df["_join_key"] = df[join_cols_df].astype(str).agg("|".join, axis=1) + # ref_gdf["_join_key"] = ref_gdf[join_cols_ref].astype(str).agg("|".join, axis=1) + + df["_join_key"] = build_join_key(df, join_cols_df) + ref_gdf["_join_key"] = build_join_key(ref_gdf, join_cols_ref) + # print(f"[INFO] Join key berhasil dibuat dari kolom: {join_cols_df}")