import os import json import asyncio import pandas as pd from shapely import wkt, wkb from shapely.geometry import MultiPolygon, MultiLineString from sqlalchemy import text from sqlalchemy.exc import SQLAlchemyError # Import koneksi database Anda from database.connection import engine from app.mapset_pipeline.utils.formatters import str_to_date async def generate_unique_table_name(base_name: str) -> str: """Generate nama tabel unik, menambahkan suffix angka jika sudah ada.""" base_name = base_name.lower().replace(" ", "_").replace("-", "_") table_name = base_name counter = 2 async with engine.connect() as conn: while True: # Cek keberadaan tabel di schema public (atau default search path) result = await conn.execute( text("SELECT to_regclass(:tname)"), {"tname": table_name} ) exists = result.scalar() if not exists: return table_name table_name = f"{base_name}_{counter}" counter += 1 async def insert_parquet_to_postgis(filename: str, table_name: str): """ Membaca file parquet sementara, membersihkan data, dan melakukan COPY ke PostGIS menggunakan asyncpg pool untuk performa tinggi. """ from main import db_pool file_path = os.path.join("tmp", filename) if not os.path.exists(file_path): raise FileNotFoundError(f"File temp {file_path} tidak ditemukan") try: loop = asyncio.get_running_loop() # Baca parquet (CPU bound, run in executor jika file sangat besar) df = await loop.run_in_executor(None, pd.read_parquet, file_path) # 1. CLEANING NAMA KOLOM df.columns = [str(col).strip().upper() for col in df.columns] # Standarisasi kolom GEOM if "GEOM" in df.columns: df.rename(columns={"GEOM": "GEOM"}, inplace=True) if "GEOM" not in df.columns: raise ValueError("Kolom GEOM tidak ditemukan dalam Parquet") # 2. PREPARE DATA ROWS clean_rows = [] geom_types = set() # Atribut selain GEOM attr_columns = [col for col in df.columns if col != "GEOM"] for row in df.itertuples(index=False): # --- Handle GEOM --- raw_geom = getattr(row, "GEOM", None) if not raw_geom: continue try: geom = None if isinstance(raw_geom, str): geom = wkt.loads(raw_geom) elif isinstance(raw_geom, bytes): geom = wkb.loads(raw_geom) if not geom: continue # Fix Invalid Geometry if not geom.is_valid: geom = geom.buffer(0) # Force Multi-Geometry agar seragam gtype = geom.geom_type.upper() if gtype == "POLYGON": geom = MultiPolygon([geom]) elif gtype == "LINESTRING": geom = MultiLineString([geom]) geom_types.add(geom.geom_type) # Convert ke EWKT (SRID 4326) ewkt = f"SRID=4326;{geom.wkt}" except Exception: continue # Skip baris dengan geom rusak # --- Handle Attributes (FORCE STRING) --- row_data = [] for col in attr_columns: val = getattr(row, col, None) if val is not None: row_data.append(str(val)) else: row_data.append(None) row_data.append(ewkt) clean_rows.append(tuple(row_data)) if not clean_rows: raise ValueError("Data valid kosong setelah pemrosesan geometry") # 3. DATABASE OPERATIONS final_geom_type = list(geom_types)[0].upper() if geom_types else "GEOM" if "MULTI" not in final_geom_type and final_geom_type != "GEOM": final_geom_type = "MULTI" + final_geom_type # A. CREATE TABLE col_defs = [f'"{col}" TEXT' for col in attr_columns] # Semua atribut jadi TEXT dulu agar aman create_sql = f""" CREATE TABLE {table_name} ( _id SERIAL PRIMARY KEY, {', '.join(col_defs)}, geom TEXT ); """ async with db_pool.acquire() as conn: # Create Table await conn.execute(create_sql) # B. COPY Data (Bulk Insert) target_cols = attr_columns + ['geom'] # asyncpg otomatis meng-quote nama kolom await conn.copy_records_to_table( table_name, records=clean_rows, columns=target_cols ) # C. ALTER COLUMN GEOMETRY & INDEX alter_sql = f""" ALTER TABLE {table_name} ALTER COLUMN geom TYPE geometry({final_geom_type}, 4326) USING ST_Force2D(geom::geometry)::geometry({final_geom_type}, 4326); CREATE INDEX idx_{table_name}_geom ON {table_name} USING GIST (geom); """ await conn.execute(alter_sql) print(f"[SUCCESS] Upload {len(clean_rows)} baris ke tabel {table_name}.") # Hapus file temp setelah sukses try: os.remove(file_path) except OSError: pass return { "table_name": table_name, "row_count": len(clean_rows), "geom_type": final_geom_type } except Exception as e: print(f"[ERROR] Processing parquet to DB: {e}") raise e async def save_author_metadata(payload_author: dict, table_name: str, dataset_title: str, geom_types: list, row_count: int, user_id: int): """ Menyimpan metadata author dan informasi dataset ke tabel backend.author_metadata. """ query = text(""" INSERT INTO backend.author_metadata ( table_title, dataset_title, dataset_abstract, keywords, topic_category, date_created, dataset_status, organization_name, contact_person_name, contact_email, contact_phone, geom_type, user_id, process, geometry_count ) VALUES ( :table_title, :dataset_title, :dataset_abstract, :keywords, :topic_category, :date_created, :dataset_status, :organization_name, :contact_person_name, :contact_email, :contact_phone, :geom_type, :user_id, :process, :geometry_count ) """) params = { "table_title": table_name, "dataset_title": dataset_title, "dataset_abstract": payload_author.get("abstract"), "keywords": payload_author.get("keywords"), "topic_category": ", ".join(payload_author.get("topicCategory", [])), "date_created": str_to_date(payload_author.get("dateCreated")), "dataset_status": payload_author.get("status"), "organization_name": payload_author.get("organization"), "contact_person_name": payload_author.get("contactName"), "contact_email": payload_author.get("contactEmail"), "contact_phone": payload_author.get("contactPhone"), "geom_type": json.dumps(geom_types), "user_id": user_id, "process": 'CLEANSING', "geometry_count": row_count } async with engine.begin() as conn: await conn.execute(query, params) async def call_cleansing_procedure(table_name: str): """ Menjalankan stored procedure cleansing geometry di database. """ try: print(f"[INFO] Memulai cleansing database untuk tabel: {table_name}") async with engine.begin() as conn: # Menggunakan parameter binding yang aman await conn.execute( text("CALL pr_cleansing_satupeta_polygon(:table_name, NULL);"), {"table_name": table_name} ) print(f"[SUCCESS] Cleansing selesai untuk tabel: {table_name}") return "done" except SQLAlchemyError as e: print(f"[ERROR] Cleansing database gagal: {e}") # Kita raise error agar Service tahu kalau proses ini gagal raise RuntimeError(f"Database cleansing failed: {str(e)}")