update latest

2026-02-10 08:54:35 +07:00 · 2026-02-10 08:54:35 +07:00 · c9c641e8e9
commit c9c641e8e9
parent cc4d897862
22 changed files with 2350 additions and 537 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/.gitignore
+++ b/.gitignore
@ -10,6 +10,7 @@ venv/
 pdf/
 data_cache/
 service_tmp/
 tmp/
 testing/
 test-ai/
 uploads/
@ -17,6 +18,7 @@ scrapp/
 logs/
 style_temp/
 services/styles/
 services/pipeline/
 cleansing_func.sql
--- a/api/routers/datasets_router.py
+++ b/api/routers/datasets_router.py
@ -112,9 +112,9 @@ async def job_callback(payload: dict):
    geos_link = publish_layer_to_geoserver(table, job_id)
-    uuid = publish_metadata(table_name=table, geoserver_links=geos_link)
+    uuid = await publish_metadata(table_name=table, geoserver_links=geos_link)
-    update_job_status(table, "FINISHED", job_id)
+    await update_job_status(table, "FINISHED", job_id)
    return {
        "ok": True,
        "uuid": uuid
@ -225,14 +225,17 @@ async def query_cleansing_data(
    table_name: str
 ):
    try:
        print("clean")
        async with engine.begin() as conn:
            await conn.execute(
                text("CALL pr_cleansing_satupeta_polygon(:table_name, NULL);"),
                {"table_name": table_name}
            )
        print("clean-done")
        return "done"
    except SQLAlchemyError as e:
        print("clean-error", e)
        raise RuntimeError(f"Fix geometry failed: {str(e)}")
@ -242,20 +245,22 @@ async def publish_layer(table_name: str, job_id: str):
        geos_link = publish_layer_to_geoserver(table_name, job_id)
-        uuid = publish_metadata(
+        # uuid = await publish_metadata(
-            table_name=table_name,
+        #     table_name=table_name,
-            geoserver_links=geos_link
+        #     geoserver_links=geos_link
-        )
+        # )
        # await update_job_status(table_name, "FINISHED", job_id)
        update_job_status(table_name, "FINISHED", job_id)
        # return uuid
        return {
            "geos_link": geos_link["layer_url"],
-            "uuid": uuid
+            # "uuid": uuid
            "uuid": "123123"
        }
    except Exception as e:
-        update_job_status(table_name, "FAILED", job_id)
+        await update_job_status(table_name, "FAILED", job_id)
        raise RuntimeError(f"Publish layer gagal: {e}") from e
--- a/api/routers/upload_file_router.py
+++ b/api/routers/upload_file_router.py
@ -29,6 +29,7 @@ async def upload_file(payload: PdfRequest):
 class UploadRequest(BaseModel):
    title: str
    path: str
    rows: List[dict]
    columns: List[str]
    author: Dict[str, Any]
--- a/core/config.py
+++ b/core/config.py
@ -11,6 +11,13 @@ SERVICE_KEY = os.getenv("SERVICE_KEY")
 POSTGIS_URL = os.getenv("POSTGIS_URL")
 POSTGIS_SYNC_URL = os.getenv("SYNC_URL")
 DB_DSN = os.getenv("DB_DSN")
 DB_HOST = os.getenv("DB_HOST")
 DB_PORT = os.getenv("DB_PORT")
 DB_NAME = os.getenv("DB_NAME")
 DB_USER = os.getenv("DB_USER")
 DB_PASS = os.getenv("DB_PASS")
 QGIS_URL = os.getenv("QGIS_API_URL")
--- a/database/connection.py
+++ b/database/connection.py
@ -1,7 +1,5 @@
 from sqlalchemy import create_engine
 from sqlalchemy.ext.asyncio import create_async_engine
 from sqlalchemy.orm import sessionmaker
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
 from core.config import POSTGIS_URL, POSTGIS_SYNC_URL
 engine = create_async_engine(POSTGIS_URL, pool_pre_ping=True)
--- a/main.py
+++ b/main.py
@ -1,16 +1,35 @@
 from contextlib import asynccontextmanager
 import asyncpg
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from core.config import API_VERSION, ALLOWED_ORIGINS
+from core.config import API_VERSION, ALLOWED_ORIGINS, DB_DSN
 from api.routers.system_router import router as system_router
 from api.routers.upload_file_router import router as upload_router
 from api.routers.datasets_router import router as dataset_router
 from services.pipeline.api.router import router as pipeline_router
 # from contextlib import asynccontextmanager
 # from utils.qgis_init import init_qgis
 db_pool = None
@asynccontextmanager
 async def lifespan(app: FastAPI):
    # 1. Start: Buat Pool koneksi saat aplikasi nyala
    global db_pool
    print("Creating DB Pool...")
    db_pool = await asyncpg.create_pool(DB_DSN, min_size=5, max_size=20)
    yield
    # 2. Shutdown: Tutup Pool saat aplikasi mati
    print("Closing DB Pool...")
    await db_pool.close()
 app = FastAPI(
    title="ETL Geo Upload Service",
    version=API_VERSION,
-    description="Upload Automation API"
+    description="Upload Automation API",
    lifespan=lifespan
 )
 app.add_middleware(
@ -21,6 +40,7 @@ app.add_middleware(
    allow_headers=["*"],
 )
 # Base.metadata.create_all(bind=engine)
 # qgis setup
@ -56,3 +76,4 @@ app.add_middleware(
 app.include_router(system_router, tags=["System"])
 app.include_router(upload_router, prefix="/upload", tags=["Upload"])
 app.include_router(dataset_router, prefix="/dataset", tags=["Upload"])
 app.include_router(pipeline_router)
--- a/media/Tex/9a4b37ec36e3ac09.tex
+++ b/media/Tex/9a4b37ec36e3ac09.tex
@ -0,0 +1,9 @@
 \documentclass[preview]{standalone}
 \usepackage[english]{babel}
 \usepackage{amsmath}
 \usepackage{amssymb}
 \begin{document}
 \begin{align*}
 A = \pi r^2
 \end{align*}
 \end{document}
--- a/media/texts/4be1eeeac36704e2.svg
+++ b/media/texts/4be1eeeac36704e2.svg
@ -0,0 +1,54 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="854" height="480" viewBox="0 0 854 480">
 <defs>
 <g>
 <g id="glyph-0-0">
 <path d="M 0.214844 -0.242188 C 0.734375 -0.296875 1.0625 -0.394531 1.191406 -0.53125 C 1.320312 -0.667969 1.386719 -1.023438 1.386719 -1.601562 L 1.386719 -7.375 C 1.386719 -7.851562 1.3125 -8.164062 1.167969 -8.308594 C 1.023438 -8.449219 0.707031 -8.539062 0.214844 -8.574219 L 0.214844 -8.820312 L 3.917969 -8.820312 L 3.917969 -8.574219 C 3.429688 -8.539062 3.109375 -8.449219 2.960938 -8.308594 C 2.8125 -8.164062 2.742188 -7.855469 2.742188 -7.375 L 2.742188 -4.777344 L 6.785156 -4.777344 L 6.785156 -7.375 C 6.785156 -7.851562 6.710938 -8.164062 6.570312 -8.308594 C 6.425781 -8.449219 6.105469 -8.539062 5.613281 -8.574219 L 5.613281 -8.820312 L 9.316406 -8.820312 L 9.316406 -8.574219 C 8.824219 -8.539062 8.507812 -8.449219 8.363281 -8.308594 C 8.21875 -8.164062 8.144531 -7.855469 8.144531 -7.375 L 8.144531 -1.445312 C 8.144531 -0.960938 8.21875 -0.648438 8.363281 -0.511719 C 8.507812 -0.375 8.824219 -0.285156 9.316406 -0.242188 L 9.316406 0 L 5.613281 0 L 5.613281 -0.242188 C 6.136719 -0.292969 6.464844 -0.386719 6.589844 -0.527344 C 6.71875 -0.667969 6.785156 -1.023438 6.785156 -1.601562 L 6.785156 -4.191406 L 2.742188 -4.191406 L 2.742188 -1.445312 C 2.742188 -0.960938 2.816406 -0.648438 2.960938 -0.507812 C 3.109375 -0.367188 3.429688 -0.28125 3.917969 -0.242188 L 3.917969 0 L 0.214844 0 Z "/>
 </g>
 <g id="glyph-0-1">
 <path d="M 3.820312 -3.605469 C 3.320312 -3.441406 2.910156 -3.257812 2.585938 -3.058594 C 1.960938 -2.671875 1.648438 -2.234375 1.648438 -1.746094 C 1.648438 -1.351562 1.777344 -1.058594 2.039062 -0.871094 C 2.207031 -0.75 2.394531 -0.691406 2.605469 -0.691406 C 2.890625 -0.691406 3.164062 -0.769531 3.425781 -0.929688 C 3.691406 -1.089844 3.820312 -1.296875 3.820312 -1.542969 Z M 0.488281 -1.296875 C 0.488281 -1.925781 0.804688 -2.449219 1.433594 -2.871094 C 1.832031 -3.132812 2.628906 -3.484375 3.820312 -3.933594 L 3.820312 -4.484375 C 3.820312 -4.929688 3.777344 -5.238281 3.691406 -5.410156 C 3.542969 -5.699219 3.238281 -5.847656 2.773438 -5.847656 C 2.550781 -5.847656 2.339844 -5.789062 2.140625 -5.675781 C 1.941406 -5.558594 1.84375 -5.398438 1.84375 -5.195312 C 1.84375 -5.144531 1.851562 -5.054688 1.875 -4.929688 C 1.898438 -4.808594 1.90625 -4.730469 1.90625 -4.695312 C 1.90625 -4.453125 1.828125 -4.28125 1.667969 -4.1875 C 1.574219 -4.128906 1.46875 -4.101562 1.339844 -4.101562 C 1.144531 -4.101562 0.996094 -4.164062 0.890625 -4.292969 C 0.789062 -4.421875 0.734375 -4.5625 0.734375 -4.71875 C 0.734375 -5.023438 0.921875 -5.339844 1.296875 -5.671875 C 1.671875 -6.003906 2.222656 -6.171875 2.949219 -6.171875 C 3.792969 -6.171875 4.363281 -5.898438 4.660156 -5.351562 C 4.820312 -5.050781 4.902344 -4.617188 4.902344 -4.042969 L 4.902344 -1.433594 C 4.902344 -1.179688 4.917969 -1.007812 4.953125 -0.910156 C 5.011719 -0.742188 5.128906 -0.65625 5.304688 -0.65625 C 5.40625 -0.65625 5.488281 -0.671875 5.554688 -0.703125 C 5.617188 -0.734375 5.730469 -0.808594 5.890625 -0.925781 L 5.890625 -0.585938 C 5.753906 -0.417969 5.601562 -0.277344 5.441406 -0.167969 C 5.199219 -0.00390625 4.953125 0.078125 4.699219 0.078125 C 4.40625 0.078125 4.191406 -0.015625 4.058594 -0.207031 C 3.925781 -0.398438 3.855469 -0.628906 3.839844 -0.890625 C 3.511719 -0.605469 3.230469 -0.394531 2.996094 -0.253906 C 2.601562 -0.0195312 2.222656 0.0976562 1.867188 0.0976562 C 1.496094 0.0976562 1.171875 -0.0351562 0.898438 -0.296875 C 0.625 -0.558594 0.488281 -0.890625 0.488281 -1.296875 Z "/>
 </g>
 <g id="glyph-0-2">
 <path d="M 0.273438 -0.183594 C 0.675781 -0.222656 0.949219 -0.300781 1.09375 -0.425781 C 1.238281 -0.550781 1.308594 -0.792969 1.308594 -1.152344 L 1.308594 -7.511719 C 1.308594 -7.800781 1.285156 -7.996094 1.238281 -8.105469 C 1.152344 -8.289062 0.972656 -8.378906 0.710938 -8.378906 C 0.648438 -8.378906 0.582031 -8.371094 0.511719 -8.359375 C 0.441406 -8.347656 0.347656 -8.328125 0.242188 -8.300781 L 0.242188 -8.515625 C 0.828125 -8.671875 1.53125 -8.878906 2.355469 -9.140625 C 2.386719 -9.140625 2.40625 -9.128906 2.410156 -9.101562 C 2.417969 -9.074219 2.421875 -9.019531 2.421875 -8.933594 L 2.421875 -1.125 C 2.421875 -0.75 2.488281 -0.503906 2.617188 -0.394531 C 2.746094 -0.285156 3.015625 -0.210938 3.425781 -0.183594 L 3.425781 0 L 0.273438 0 Z "/>
 </g>
 <g id="glyph-0-3">
 <path d="M 0.339844 -3.019531 C 0.339844 -3.882812 0.613281 -4.613281 1.160156 -5.210938 C 1.710938 -5.808594 2.417969 -6.105469 3.28125 -6.105469 C 4.140625 -6.105469 4.851562 -5.824219 5.417969 -5.261719 C 5.980469 -4.695312 6.261719 -3.945312 6.261719 -3.007812 C 6.261719 -2.144531 5.988281 -1.394531 5.441406 -0.753906 C 4.894531 -0.117188 4.1875 0.203125 3.320312 0.203125 C 2.488281 0.203125 1.78125 -0.105469 1.203125 -0.714844 C 0.625 -1.328125 0.339844 -2.097656 0.339844 -3.019531 Z M 3.097656 -5.714844 C 2.753906 -5.714844 2.457031 -5.601562 2.207031 -5.378906 C 1.773438 -4.984375 1.554688 -4.300781 1.554688 -3.332031 C 1.554688 -2.558594 1.730469 -1.839844 2.078125 -1.171875 C 2.429688 -0.503906 2.914062 -0.167969 3.535156 -0.167969 C 4.019531 -0.167969 4.394531 -0.394531 4.65625 -0.839844 C 4.921875 -1.285156 5.050781 -1.871094 5.050781 -2.597656 C 5.050781 -3.347656 4.886719 -4.054688 4.550781 -4.71875 C 4.214844 -5.382812 3.734375 -5.714844 3.097656 -5.714844 Z "/>
 </g>
 <g id="glyph-0-4">
 <rect x="0" y="0" width="0" height="0" mask="url(#mask-0)"/>
 </g>
 <g id="glyph-0-5">
 <path d="M 0.148438 -0.242188 C 0.699219 -0.296875 1.054688 -0.421875 1.210938 -0.613281 C 1.367188 -0.808594 1.445312 -1.257812 1.445312 -1.960938 L 1.445312 -7.375 C 1.445312 -7.859375 1.371094 -8.171875 1.21875 -8.316406 C 1.066406 -8.460938 0.71875 -8.546875 0.183594 -8.574219 L 0.183594 -8.820312 L 2.820312 -8.820312 L 5.90625 -2.097656 L 8.855469 -8.820312 L 11.511719 -8.820312 L 11.511719 -8.574219 C 11.015625 -8.539062 10.695312 -8.449219 10.554688 -8.304688 C 10.410156 -8.160156 10.339844 -7.847656 10.339844 -7.375 L 10.339844 -1.445312 C 10.339844 -0.960938 10.410156 -0.648438 10.554688 -0.511719 C 10.695312 -0.375 11.015625 -0.285156 11.511719 -0.242188 L 11.511719 0 L 7.773438 0 L 7.773438 -0.242188 C 8.3125 -0.285156 8.648438 -0.378906 8.777344 -0.53125 C 8.910156 -0.679688 8.976562 -1.039062 8.976562 -1.601562 L 8.976562 -7.589844 L 5.566406 0 L 5.382812 0 L 2.03125 -7.277344 L 2.03125 -1.960938 C 2.03125 -1.230469 2.136719 -0.753906 2.351562 -0.535156 C 2.488281 -0.390625 2.800781 -0.292969 3.28125 -0.242188 L 3.28125 0 L 0.148438 0 Z "/>
 </g>
 <g id="glyph-0-6">
 <path d="M 0.242188 -0.183594 C 0.550781 -0.222656 0.765625 -0.296875 0.886719 -0.414062 C 1.011719 -0.527344 1.074219 -0.785156 1.074219 -1.183594 L 1.074219 -4.484375 C 1.074219 -4.761719 1.046875 -4.957031 0.996094 -5.070312 C 0.914062 -5.234375 0.746094 -5.320312 0.488281 -5.320312 C 0.449219 -5.320312 0.410156 -5.316406 0.367188 -5.3125 C 0.328125 -5.308594 0.277344 -5.300781 0.214844 -5.292969 L 0.214844 -5.519531 C 0.394531 -5.574219 0.8125 -5.707031 1.476562 -5.925781 L 2.089844 -6.125 C 2.121094 -6.125 2.136719 -6.117188 2.144531 -6.09375 C 2.152344 -6.070312 2.15625 -6.042969 2.15625 -6.003906 L 2.15625 -5.046875 C 2.554688 -5.417969 2.867188 -5.675781 3.09375 -5.8125 C 3.429688 -6.027344 3.78125 -6.132812 4.148438 -6.132812 C 4.441406 -6.132812 4.710938 -6.046875 4.953125 -5.878906 C 5.421875 -5.550781 5.65625 -4.960938 5.65625 -4.113281 L 5.65625 -1.074219 C 5.65625 -0.761719 5.71875 -0.535156 5.847656 -0.398438 C 5.972656 -0.257812 6.183594 -0.1875 6.476562 -0.183594 L 6.476562 0 L 3.699219 0 L 3.699219 -0.183594 C 4.015625 -0.226562 4.234375 -0.3125 4.363281 -0.445312 C 4.488281 -0.578125 4.550781 -0.867188 4.550781 -1.308594 L 4.550781 -4.089844 C 4.550781 -4.460938 4.480469 -4.769531 4.34375 -5.015625 C 4.203125 -5.261719 3.949219 -5.382812 3.574219 -5.382812 C 3.316406 -5.382812 3.058594 -5.296875 2.792969 -5.125 C 2.644531 -5.023438 2.453125 -4.859375 2.21875 -4.628906 L 2.21875 -0.984375 C 2.21875 -0.671875 2.289062 -0.460938 2.429688 -0.355469 C 2.566406 -0.25 2.785156 -0.191406 3.085938 -0.183594 L 3.085938 0 L 0.242188 0 Z "/>
 </g>
 <g id="glyph-0-7">
 <path d="M 1.09375 -8.40625 C 1.09375 -8.59375 1.160156 -8.753906 1.289062 -8.886719 C 1.417969 -9.019531 1.578125 -9.089844 1.769531 -9.089844 C 1.957031 -9.089844 2.117188 -9.023438 2.25 -8.890625 C 2.382812 -8.757812 2.449219 -8.597656 2.449219 -8.40625 C 2.449219 -8.21875 2.382812 -8.058594 2.25 -7.925781 C 2.117188 -7.792969 1.957031 -7.726562 1.769531 -7.726562 C 1.578125 -7.726562 1.417969 -7.792969 1.289062 -7.925781 C 1.160156 -8.058594 1.09375 -8.21875 1.09375 -8.40625 Z M 0.261719 -0.183594 C 0.726562 -0.226562 1.019531 -0.304688 1.140625 -0.417969 C 1.261719 -0.535156 1.320312 -0.847656 1.320312 -1.355469 L 1.320312 -4.460938 C 1.320312 -4.742188 1.300781 -4.9375 1.261719 -5.046875 C 1.199219 -5.222656 1.0625 -5.3125 0.851562 -5.3125 C 0.804688 -5.3125 0.757812 -5.308594 0.710938 -5.300781 C 0.667969 -5.292969 0.535156 -5.257812 0.320312 -5.195312 L 0.320312 -5.398438 L 0.597656 -5.488281 C 1.359375 -5.734375 1.886719 -5.921875 2.1875 -6.046875 C 2.308594 -6.101562 2.386719 -6.125 2.421875 -6.125 C 2.429688 -6.09375 2.433594 -6.0625 2.433594 -6.027344 L 2.433594 -1.355469 C 2.433594 -0.859375 2.496094 -0.550781 2.613281 -0.421875 C 2.734375 -0.296875 3.003906 -0.21875 3.425781 -0.183594 L 3.425781 0 L 0.261719 0 Z "/>
 </g>
 <g id="glyph-0-8">
 <path d="M 0.214844 -0.167969 C 0.554688 -0.199219 0.777344 -0.257812 0.890625 -0.339844 C 1.066406 -0.464844 1.152344 -0.714844 1.152344 -1.09375 L 1.152344 -4.460938 C 1.152344 -4.78125 1.109375 -4.992188 1.023438 -5.089844 C 0.941406 -5.191406 0.800781 -5.242188 0.605469 -5.242188 C 0.515625 -5.242188 0.445312 -5.238281 0.398438 -5.226562 C 0.355469 -5.21875 0.300781 -5.203125 0.242188 -5.183594 L 0.242188 -5.410156 L 0.710938 -5.566406 C 0.878906 -5.621094 1.15625 -5.726562 1.542969 -5.871094 C 1.929688 -6.019531 2.132812 -6.09375 2.15625 -6.09375 C 2.175781 -6.09375 2.191406 -6.082031 2.195312 -6.0625 C 2.199219 -6.039062 2.199219 -6 2.199219 -5.9375 L 2.199219 -5.058594 C 2.628906 -5.449219 3 -5.71875 3.3125 -5.867188 C 3.625 -6.019531 3.949219 -6.09375 4.277344 -6.09375 C 4.722656 -6.09375 5.082031 -5.941406 5.34375 -5.636719 C 5.484375 -5.472656 5.597656 -5.25 5.691406 -4.96875 C 6.011719 -5.292969 6.292969 -5.535156 6.53125 -5.691406 C 6.941406 -5.960938 7.363281 -6.09375 7.792969 -6.09375 C 8.492188 -6.09375 8.957031 -5.808594 9.191406 -5.242188 C 9.328125 -4.921875 9.394531 -4.410156 9.394531 -3.71875 L 9.394531 -1.015625 C 9.394531 -0.707031 9.460938 -0.496094 9.597656 -0.386719 C 9.734375 -0.277344 9.980469 -0.203125 10.339844 -0.167969 L 10.339844 0 L 7.402344 0 L 7.402344 -0.183594 C 7.78125 -0.21875 8.027344 -0.292969 8.148438 -0.410156 C 8.265625 -0.527344 8.328125 -0.765625 8.328125 -1.125 L 8.328125 -3.933594 C 8.328125 -4.355469 8.28125 -4.664062 8.191406 -4.863281 C 8.03125 -5.21875 7.714844 -5.398438 7.246094 -5.398438 C 6.964844 -5.398438 6.683594 -5.304688 6.40625 -5.117188 C 6.246094 -5.007812 6.046875 -4.835938 5.8125 -4.597656 L 5.8125 -1.261719 C 5.8125 -0.910156 5.875 -0.644531 6 -0.460938 C 6.125 -0.28125 6.382812 -0.183594 6.785156 -0.167969 L 6.785156 0 L 3.796875 0 L 3.796875 -0.167969 C 4.207031 -0.222656 4.46875 -0.320312 4.582031 -0.46875 C 4.695312 -0.617188 4.753906 -0.980469 4.753906 -1.554688 L 4.753906 -3.378906 C 4.753906 -4.046875 4.710938 -4.507812 4.621094 -4.757812 C 4.480469 -5.183594 4.175781 -5.398438 3.710938 -5.398438 C 3.445312 -5.398438 3.1875 -5.324219 2.929688 -5.179688 C 2.671875 -5.035156 2.449219 -4.84375 2.253906 -4.609375 L 2.253906 -1.046875 C 2.253906 -0.71875 2.308594 -0.492188 2.425781 -0.363281 C 2.539062 -0.238281 2.789062 -0.171875 3.175781 -0.167969 L 3.175781 0 L 0.214844 0 Z "/>
 </g>
 <g id="glyph-0-9">
 <path d="M 2.433594 0.144531 C 2.246094 0.144531 2.082031 0.078125 1.941406 -0.0507812 C 1.796875 -0.183594 1.726562 -0.347656 1.726562 -0.554688 C 1.726562 -0.75 1.792969 -0.917969 1.933594 -1.0625 C 2.074219 -1.203125 2.242188 -1.277344 2.449219 -1.277344 C 2.65625 -1.277344 2.828125 -1.203125 2.96875 -1.058594 C 3.109375 -0.914062 3.175781 -0.746094 3.175781 -0.554688 C 3.175781 -0.363281 3.105469 -0.199219 2.960938 -0.0625 C 2.820312 0.0742188 2.644531 0.144531 2.433594 0.144531 Z M 3.144531 -7.949219 C 3.144531 -7.871094 3.140625 -7.792969 3.136719 -7.71875 C 3.132812 -7.640625 3.125 -7.550781 3.113281 -7.449219 L 2.820312 -4.980469 L 2.511719 -2.34375 L 2.34375 -2.34375 L 2.21875 -3.777344 C 2.183594 -4.183594 2.089844 -4.964844 1.933594 -6.113281 C 1.792969 -7.117188 1.726562 -7.722656 1.726562 -7.929688 C 1.726562 -8.199219 1.777344 -8.445312 1.882812 -8.671875 C 1.984375 -8.898438 2.179688 -9.011719 2.460938 -9.011719 C 2.75 -9.011719 2.953125 -8.855469 3.058594 -8.546875 C 3.117188 -8.386719 3.144531 -8.1875 3.144531 -7.949219 Z "/>
 </g>
 </g>
 <image id="source-9" x="0" y="0" width="0" height="0"/>
 <mask id="mask-0">
 <use xlink:href="#source-9"/>
 </mask>
 </defs>
 <g fill="rgb(100%, 100%, 100%)" fill-opacity="1">
 <use xlink:href="#glyph-0-0" x="30" y="30"/>
 <use xlink:href="#glyph-0-1" x="40" y="30"/>
 <use xlink:href="#glyph-0-2" x="46" y="30"/>
 <use xlink:href="#glyph-0-3" x="50" y="30"/>
 <use xlink:href="#glyph-0-4" x="57" y="30"/>
 <use xlink:href="#glyph-0-5" x="60" y="30"/>
 <use xlink:href="#glyph-0-1" x="72" y="30"/>
 <use xlink:href="#glyph-0-6" x="78" y="30"/>
 <use xlink:href="#glyph-0-7" x="85" y="30"/>
 <use xlink:href="#glyph-0-8" x="89" y="30"/>
 <use xlink:href="#glyph-0-9" x="99" y="30"/>
 </g>
 </svg>
--- a/requirements.txt
+++ b/requirements.txt
@ -18,3 +18,5 @@ asyncpg
 psycopg2
 python-multipart==0.0.22
 pyarrow==21.0.0
 subprocess
 openpyxl
--- a/services/.DS_Store
+++ b/services/.DS_Store
--- a/services/datasets/metadata.py
+++ b/services/datasets/metadata.py
@ -1,9 +1,9 @@
 from datetime import datetime
 from sqlalchemy import text
-from database.connection import sync_engine
+from database.connection import engine
 from utils.logger_config import log_activity
-def update_job_status(table_name: str, status: str, job_id: str = None):
+async def update_job_status(table_name: str, status: str, job_id: str = None):
    query = text("""
        UPDATE backend.author_metadata
        SET process = :status,
@ -17,12 +17,7 @@ def update_job_status(table_name: str, status: str, job_id: str = None):
        "table_name": table_name
    }
-    with sync_engine.begin() as conn:
+    async with engine.begin() as conn:
-        conn.execute(query, params)
+        await conn.execute(query, params)
    print(f"[DB] Metadata '{table_name}' updated to status '{status}'")
--- a/services/datasets/pub.py
+++ b/services/datasets/pub.py
@ -2,7 +2,7 @@ from fastapi import HTTPException
 import requests
 from sqlalchemy import text
 from core.config import GEONETWORK_PASS, GEONETWORK_URL, GEONETWORK_USER
-from database.connection import sync_engine as engine
+from database.connection import engine
 from datetime import datetime
 from uuid import uuid4
 import re
@ -54,7 +54,7 @@ def fix_xml_urls(xml: str) -> str:
-def get_extent(table_name: str):
+async def get_extent(table_name: str):
    sql = f"""
    SELECT 
@ -62,24 +62,22 @@ def get_extent(table_name: str):
      ST_XMax(extent), ST_YMax(extent)
    FROM (
        SELECT ST_Extent(geom) AS extent 
-        FROM public.{table_name}
+        FROM public."{table_name}"
    ) AS box;
    """
-    conn = engine.connect()
+    async with engine.connect() as conn:
-    try:
+        result = await conn.execute(sql)
-        row = conn.execute(text(sql)).fetchone()
+        row = result.fetchone()
    finally:
        conn.close()
    if not row or row[0] is None:
        return None
    # return {
-    #     "xmin": float(row[0]),
+    #     "xmin": row[0], 
-    #     "ymin": float(row[1]),
+    #     "ymin": row[1],
-    #     "xmax": float(row[2]),
+    #     "xmax": row[2], 
-    #     "ymax": float(row[3])
+    #     "ymax": row[3]
    # }
    return {
@ -89,7 +87,7 @@ def get_extent(table_name: str):
        "ymax": -5.4819     # north
    }
-def get_author_metadata(table_name: str):
+async def get_author_metadata(table_name: str):
    sql = """
        SELECT am.table_title, am.dataset_title, am.dataset_abstract, am.keywords, am.date_created,
@ -106,15 +104,14 @@ def get_author_metadata(table_name: str):
        LIMIT 1
    """
-    conn = engine.connect()
+    async with engine.connect() as conn:
-    try:
+        result = await conn.execute(sql, {"table": table_name})
-        row = conn.execute(text(sql), {"table": table_name}).fetchone()
+        row = result.fetchone()
    finally:
        conn.close()
    if not row:
        raise Exception(f"Tidak ada metadata untuk tabel: {table_name}")
    # SQLAlchemy Async row support ._mapping untuk convert ke dict
    return dict(row._mapping)
@ -628,10 +625,10 @@ def upload_metadata_to_geonetwork(xml_metadata: str):
-def publish_metadata(table_name: str, geoserver_links: dict):
+async def publish_metadata(table_name: str, geoserver_links: dict):
-    extent = get_extent(table_name)
+    extent = await get_extent(table_name)
-    meta = get_author_metadata(table_name)
+    meta = await get_author_metadata(table_name)
    xml = generate_metadata_xml(
        table_name=meta["dataset_title"],
        meta=meta,
--- a/services/datasets/publish_geonetwork.py
+++ b/services/datasets/publish_geonetwork.py
@ -2,7 +2,7 @@ from fastapi import HTTPException
 import requests
 from sqlalchemy import text
 from core.config import GEONETWORK_PASS, GEONETWORK_URL, GEONETWORK_USER
-from database.connection import sync_engine as engine
+from database.connection import engine
 from datetime import datetime
 from uuid import uuid4
 import re
@ -54,7 +54,7 @@ def fix_xml_urls(xml: str) -> str:
-def get_extent(table_name: str):
+async def get_extent(table_name: str):
    sql = f"""
    SELECT 
@ -62,24 +62,22 @@ def get_extent(table_name: str):
      ST_XMax(extent), ST_YMax(extent)
    FROM (
        SELECT ST_Extent(geom) AS extent 
-        FROM public.{table_name}
+        FROM public."{table_name}"
    ) AS box;
    """
-    conn = engine.connect()
+    async with engine.connect() as conn:
-    try:
+        result = await conn.execute(sql)
-        row = conn.execute(text(sql)).fetchone()
+        row = result.fetchone()
    finally:
        conn.close()
    if not row or row[0] is None:
        return None
    # return {
-    #     "xmin": float(row[0]),
+    #     "xmin": row[0], 
-    #     "ymin": float(row[1]),
+    #     "ymin": row[1],
-    #     "xmax": float(row[2]),
+    #     "xmax": row[2], 
-    #     "ymax": float(row[3])
+    #     "ymax": row[3]
    # }
    return {
@ -89,7 +87,7 @@ def get_extent(table_name: str):
        "ymax": -5.4819     # north
    }
-def get_author_metadata(table_name: str):
+async def get_author_metadata(table_name: str):
    sql = """
        SELECT am.table_title, am.dataset_title, am.dataset_abstract, am.keywords, am.date_created,
@ -106,15 +104,14 @@ def get_author_metadata(table_name: str):
 		LIMIT 1
    """
-    conn = engine.connect()
+    async with engine.connect() as conn:
-    try:
+        result = await conn.execute(sql, {"table": table_name})
-        row = conn.execute(text(sql), {"table": table_name}).fetchone()
+        row = result.fetchone()
    finally:
        conn.close()
    if not row:
        raise Exception(f"Tidak ada metadata untuk tabel: {table_name}")
    # SQLAlchemy Async row support ._mapping untuk convert ke dict
    return dict(row._mapping)
@ -626,10 +623,10 @@ def upload_metadata_to_geonetwork(xml_metadata: str):
-def publish_metadata(table_name: str, geoserver_links: dict):
+async def publish_metadata(table_name: str, geoserver_links: dict):
-    extent = get_extent(table_name)
+    extent = await get_extent(table_name)
-    meta = get_author_metadata(table_name)
+    meta = await get_author_metadata(table_name)
    xml = generate_metadata_xml(
        table_name=meta["dataset_title"],
        meta=meta,
--- a/services/upload_file/.DS_Store
+++ b/services/upload_file/.DS_Store
--- a/services/upload_file/readers/reader_csv.py
+++ b/services/upload_file/readers/reader_csv.py
@ -32,10 +32,96 @@ def detect_delimiter(path, sample_size=2048):
        return ','
 # def read_csv(path: str, sheet: str = None):
 #     ext = os.path.splitext(path)[1].lower()
 #     try:
 #         if ext in ['.csv']:
 #             header_line = detect_header_line(path)
 #             delimiter = detect_delimiter(path)
 #             print(f"[INFO] Detected header line: {header_line + 1}, delimiter: '{delimiter}'")
 #             df = pd.read_csv(
 #                 path,
 #                 header=header_line,
 #                 sep=delimiter,
 #                 encoding='utf-8',
 #                 low_memory=False,
 #                 thousands=','
 #             )
 #         elif ext in ['.xlsx', '.xls']:
 #             print(f"[INFO] Membaca file Excel: {os.path.basename(path)}")
 #             xls = pd.ExcelFile(path)
 #             print(f"[INFO] Ditemukan {len(xls.sheet_names)} sheet: {xls.sheet_names}")
 #             if sheet:
 #                 if sheet not in xls.sheet_names:
 #                     raise ValueError(f"Sheet '{sheet}' tidak ditemukan dalam file {os.path.basename(path)}")
 #                 print(f"[INFO] Membaca sheet yang ditentukan: '{sheet}'")
 #                 df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str)
 #                 df = df.dropna(how='all').dropna(axis=1, how='all')
 #             else:
 #                 print("[INFO] Tidak ada sheet yang ditentukan, mencari sheet paling relevan...")
 #                 best_sheet = None
 #                 best_score = -1
 #                 best_df = None
 #                 for sheet_name in xls.sheet_names:
 #                     try:
 #                         temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str)
 #                         temp_df = temp_df.dropna(how='all').dropna(axis=1, how='all')
 #                         if len(temp_df) == 0 or len(temp_df.columns) < 2:
 #                             continue
 #                         # hitung skor relevansi
 #                         text_ratio = temp_df.applymap(lambda x: isinstance(x, str)).sum().sum() / (temp_df.size or 1)
 #                         row_score = len(temp_df)
 #                         score = (row_score * 0.7) + (text_ratio * 100)
 #                         if score > best_score:
 #                             best_score = score
 #                             best_sheet = sheet_name
 #                             best_df = temp_df
 #                     except Exception as e:
 #                         print(f"[WARN] Gagal membaca sheet {sheet_name}: {e}")
 #                         continue
 #                 if best_df is not None:
 #                     print(f"[INFO] Sheet terpilih: '{best_sheet}' dengan skor {best_score:.2f}")
 #                     df = best_df
 #                 else:
 #                     raise ValueError("Tidak ada sheet valid yang dapat dibaca.")
 #             for col in df.columns:
 #                 if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
 #                     df[col] = df[col].astype(str).str.replace(',', '', regex=False)
 #                     df[col] = pd.to_numeric(df[col], errors='ignore')
 #         else:
 #             raise ValueError("Format file tidak dikenali (hanya .csv, .xlsx, .xls)")
 #     except Exception as e:
 #         print(f"[WARN] Gagal membaca file ({e}), fallback ke default reader.")
 #         df = pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
 #     df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]
 #     df.columns = [str(c).strip() for c in df.columns]
 #     df = df.dropna(how='all')
 #     return df
 def read_csv(path: str, sheet: str = None):
    ext = os.path.splitext(path)[1].lower()
    df = pd.DataFrame() # Inisialisasi default
    try:
        # --- BLOK PEMBACAAN FILE ---
        if ext in ['.csv']:
            header_line = detect_header_line(path)
            delimiter = detect_delimiter(path)
@ -52,17 +138,19 @@ def read_csv(path: str, sheet: str = None):
        elif ext in ['.xlsx', '.xls']:
            print(f"[INFO] Membaca file Excel: {os.path.basename(path)}")
-            xls = pd.ExcelFile(path)
+            xls = pd.ExcelFile(path, engine='openpyxl') # Pakai engine openpyxl
            print(f"[INFO] Ditemukan {len(xls.sheet_names)} sheet: {xls.sheet_names}")
            if sheet:
                if sheet not in xls.sheet_names:
-                    raise ValueError(f"Sheet '{sheet}' tidak ditemukan dalam file {os.path.basename(path)}")
+                    raise ValueError(f"Sheet '{sheet}' tidak ditemukan.")
                print(f"[INFO] Membaca sheet yang ditentukan: '{sheet}'")
-                df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str)
+                # Tambahkan engine='openpyxl'
                df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str, engine='openpyxl')
                df = df.dropna(how='all').dropna(axis=1, how='all')
            else:
                # Logika pencarian sheet terbaik (tidak berubah, hanya indentasi)
                print("[INFO] Tidak ada sheet yang ditentukan, mencari sheet paling relevan...")
                best_sheet = None
                best_score = -1
@ -70,13 +158,12 @@ def read_csv(path: str, sheet: str = None):
                for sheet_name in xls.sheet_names:
                    try:
-                        temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str)
+                        temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str, engine='openpyxl')
                        temp_df = temp_df.dropna(how='all').dropna(axis=1, how='all')
                        if len(temp_df) == 0 or len(temp_df.columns) < 2:
                            continue
                        # hitung skor relevansi
                        text_ratio = temp_df.applymap(lambda x: isinstance(x, str)).sum().sum() / (temp_df.size or 1)
                        row_score = len(temp_df)
                        score = (row_score * 0.7) + (text_ratio * 100)
@ -85,7 +172,6 @@ def read_csv(path: str, sheet: str = None):
                            best_score = score
                            best_sheet = sheet_name
                            best_df = temp_df
                    except Exception as e:
                        print(f"[WARN] Gagal membaca sheet {sheet_name}: {e}")
                        continue
@ -96,21 +182,47 @@ def read_csv(path: str, sheet: str = None):
                else:
                    raise ValueError("Tidak ada sheet valid yang dapat dibaca.")
            for col in df.columns:
                if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
                    df[col] = df[col].astype(str).str.replace(',', '', regex=False)
                    df[col] = pd.to_numeric(df[col], errors='ignore')
        else:
            raise ValueError("Format file tidak dikenali (hanya .csv, .xlsx, .xls)")
        # --- BLOK PEMBERSIHAN (Dilakukan setelah file sukses terbaca) ---
        # Kita bungkus ini agar error konversi angka TIDAK menggagalkan pembacaan file
        if not df.empty:
            df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]
            df.columns = [str(c).strip() for c in df.columns]
            df = df.dropna(how='all')
            # Konversi Angka yang Lebih Aman
            for col in df.columns:
                try:
                    # Cek apakah kolom terlihat seperti angka
                    if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
                        # Bersihkan koma
                        clean_col = df[col].astype(str).str.replace(',', '', regex=False)
                        # Gunakan errors='coerce' agar jika ada error value (NaN/REF), dia jadi NaN, bukan crash
                        df[col] = pd.to_numeric(clean_col, errors='coerce')
                except Exception as ex:
                    # Jika konversi gagal, biarkan sebagai string/object dan lanjut ke kolom berikutnya
                    print(f"[WARN] Gagal konversi numerik pada kolom '{col}': {ex}")
                    pass
        return df
    except Exception as e:
-        print(f"[WARN] Gagal membaca file ({e}), fallback ke default reader.")
+        # --- ERROR HANDLING YANG BENAR ---
-        df = pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
+        print(f"[WARN] Gagal membaca file utama ({e}).")
-    df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]
+        # Hanya lakukan fallback CSV jika file aslinya MEMANG CSV (atau txt)
-    df.columns = [str(c).strip() for c in df.columns]
+        # Jangan paksa baca .xlsx pakai read_csv
-    df = df.dropna(how='all')
+        if ext in ['.csv', '.txt']:
            print("[INFO] Mencoba fallback ke default CSV reader...")
            try:
                return pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
            except Exception as e2:
                print(f"[ERROR] Fallback CSV juga gagal: {e2}")
        # Jika file Excel gagal dibaca, return DataFrame kosong atau raise error
        print("[ERROR] Tidak dapat memulihkan pembacaan file Excel.")
        return pd.DataFrame()
    return df
--- a/services/upload_file/readers/reader_pdf.py
+++ b/services/upload_file/readers/reader_pdf.py
@ -1,7 +1,7 @@
 import re
 import pdfplumber
 import pandas as pd
-from services.upload_file.utils.pdf_cleaner import get_number_column_index, get_start_end_number, normalize_number_column, row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column
+from services.upload_file.readers.utils.pdf_cleaner import get_number_column_index, get_start_end_number, normalize_number_column, row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column
 from services.upload_file.upload_exceptions import PDFReadError
 from utils.logger_config import setup_logger
--- a/services/upload_file/readers/utils/pdf_cleaner.py
+++ b/services/upload_file/readers/utils/pdf_cleaner.py
--- a/services/upload_file/upload.py
+++ b/services/upload_file/upload.py
@ -1,5 +1,9 @@
 import json
 import os
 import random
 import subprocess
 import uuid
 import asyncpg
 import pandas as pd
 import geopandas as gpd
 import numpy as np
@ -12,13 +16,14 @@ from shapely.geometry.base import BaseGeometry
 from shapely.geometry import base as shapely_base
 from fastapi import Depends, File, Form, UploadFile, HTTPException
 from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main
-from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES, GEONETWORK_URL
+from core.config import DB_DSN, DB_HOST, DB_NAME, DB_PASS, DB_PORT, DB_USER, UPLOAD_FOLDER, MAX_FILE_MB, GEONETWORK_URL
 from services.upload_file.ai_generate import send_metadata
 from services.upload_file.readers.reader_csv import read_csv
 from services.upload_file.readers.reader_shp import read_shp
 from services.upload_file.readers.reader_gdb import read_gdb
 from services.upload_file.readers.reader_mpk import read_mpk
 from services.upload_file.readers.reader_pdf import convert_df, read_pdf
 from services.upload_file.utils.df_validation import process_dataframe_synchronous
 from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto
 from database.connection import engine, sync_engine
 from pydantic import BaseModel
@ -71,155 +76,13 @@ def detect_zip_type(zip_path: str) -> str:
    return "unknown"
 # def detect_zip_type(zip_path: str) -> str:
 #     with zipfile.ZipFile(zip_path, "r") as zip_ref:
 #         files = zip_ref.namelist()
-#      # -------------------------------------------------------------
+async def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
 #     # 1) DETECT FileGDB
 #     # -------------------------------------------------------------
 #     is_gdb = (
 #         any(".gdb/" in f.lower() for f in files)
 #         or any(f.lower().endswith(ext) for ext in 
 #                [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files)
 #     )
 #     if is_gdb:
 #         print("\n[INFO] ZIP terdeteksi berisi FileGDB.")
 #         with tempfile.TemporaryDirectory() as temp_dir:
 #             # extract ZIP
 #             with zipfile.ZipFile(zip_path, "r") as zip_ref:
 #                 zip_ref.extractall(temp_dir)
 #             # find folder *.gdb
 #             gdb_path = None
 #             for root, dirs, _ in os.walk(temp_dir):
 #                 for d in dirs:
 #                     if d.lower().endswith(".gdb"):
 #                         gdb_path = os.path.join(root, d)
 #                         break
 #             if not gdb_path:
 #                 print("[ERROR] Folder .gdb tidak ditemukan.")
 #                 return "gdb"
 #             print(f"[INFO] GDB Path: {gdb_path}")
 #             # Cari seluruh file .gdbtable
 #             table_files = [
 #                 os.path.join(gdb_path, f)
 #                 for f in os.listdir(gdb_path)
 #                 if f.lower().endswith(".gdbtable")
 #             ]
 #             if not table_files:
 #                 print("[ERROR] Tidak ada file .gdbtable ditemukan.")
 #                 return "gdb"
 #             # Scan semua table file untuk mencari SpatialReference
 #             found_crs = False
 #             for table_file in table_files:
 #                 try:
 #                     with open(table_file, "rb") as f:
 #                         raw = f.read(15000)  # baca awal file, cukup untuk header JSON
 #                     text = raw.decode("utf-8", errors="ignore")
 #                     start = text.find("{")
 #                     end = text.rfind("}") + 1
 #                     if start == -1 or end == -1:
 #                         continue
 #                     json_str = text[start:end]
 #                     meta = json.loads(json_str)
 #                     spatial_ref = meta.get("SpatialReference")
 #                     if not spatial_ref:
 #                         continue
 #                     wkt = spatial_ref.get("WKT")
 #                     if not wkt:
 #                         continue
 #                     print(f"[FOUND] CRS metadata pada: {os.path.basename(table_file)}")
 #                     print(f"[CRS WKT] {wkt[:200]}...")
 #                     # Convert to EPSG
 #                     try:
 #                         epsg = CRS.from_wkt(wkt).to_epsg()
 #                         print(f"[EPSG] {epsg}")
 #                     except:
 #                         print("[EPSG] Tidak ditemukan EPSG.")
 #                     found_crs = True
 #                     break
 #                 except Exception:
 #                     continue
 #             if not found_crs:
 #                 print("[WARNING] Tidak ditemukan CRS di file .gdbtable manapun.")
 #         return "gdb"
 #     # -----------------------------------------------------
 #     # 2. DETEKSI SHP
 #     # -----------------------------------------------------
 #     if any(f.lower().endswith(".shp") for f in files):
 #         print("\n[INFO] ZIP terdeteksi berisi SHP.")
 #         # cari file .prj
 #         prj_files = [f for f in files if f.lower().endswith(".prj")]
 #         if not prj_files:
 #             print("[WARNING] Tidak ada file .prj → CRS tidak diketahui.")
 #             return "shp"
 #         with zipfile.ZipFile(zip_path, "r") as zip_ref:
 #             with tempfile.TemporaryDirectory() as temp_dir:
 #                 prj_path = os.path.join(temp_dir, os.path.basename(prj_files[0]))
 #                 zip_ref.extract(prj_files[0], temp_dir)
 #                 # baca isi prj
 #                 with open(prj_path, "r") as f:
 #                     prj_text = f.read()
 #                 try:
 #                     crs = CRS.from_wkt(prj_text)
 #                     print(f"[CRS WKT] {crs.to_wkt()[:200]}...")
 #                     epsg = crs.to_epsg()
 #                     if epsg:
 #                         print(f"[EPSG] {epsg}")
 #                     else:
 #                         print("[EPSG] Tidak ditemukan dalam database EPSG.")
 #                 except Exception as e:
 #                     print("[ERROR] Gagal membaca CRS dari file PRJ:", e)
 #         return "shp"
 #     # -----------------------------------------------------
 #     # 3. UNKNOWN
 #     # -----------------------------------------------------
 #     return "unknown"
 def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
    result = detect_and_build_geometry(df, master_polygons=None)
    if not hasattr(result, "geometry") or result.geometry.isna().all():
        result = attach_polygon_geometry_auto(result)
    # if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
    #     geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
    #         if not result.empty else "None"
    #     null_geom = result.geometry.isna().sum()
    def normalize_geom_type(geom_type):
        if geom_type.startswith("Multi"):
            return geom_type.replace("Multi", "")
@ -295,18 +158,12 @@ def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
        "tipe_data_spasial": geom_type,
        "deskripsi_singkat": fileDesc,
        "struktur_atribut_data": {},
        # "metadata": {
        #     "judul": "",
        #     "abstrak": "",
        #     "tujuan": "",
        #     "keyword": [],
        #     "kategori": [],
        #     "kategori_mapset": ""
        # }
    }
    ai_suggest = send_metadata(ai_context)
    # ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
-    # print(ai_suggest)
+
    tmp_file = generate_unique_filename()
    await asyncio.to_thread(process_dataframe_synchronous, result, tmp_file)
    response = {
        "message": "File berhasil dibaca dan dianalisis.",
@ -321,7 +178,8 @@ def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
        "warnings": warnings,
        "warning_rows": warning_safe,
        "preview": preview_safe,
-        "metadata_suggest": ai_suggest
+        "metadata_suggest": ai_suggest,
        "tmp_path": tmp_file
    }
    # return successRes(content=response)
@ -393,7 +251,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
        if df is None or (hasattr(df, "empty") and df.empty):
            return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
-        res = process_data(df, ext, fname, fileDesc)
+        res = await process_data(df, ext, fname, fileDesc)
        tmp_path.unlink(missing_ok=True)
@ -427,7 +285,7 @@ async def handle_process_pdf(payload: PdfRequest):
        if df is None or (hasattr(df, "empty") and df.empty):
            return errorRes(message="Tidak ada tabel")
-        res = process_data(df, '.pdf', payload.fileName, payload.fileDesc)        
+        res = await process_data(df, '.pdf', payload.fileName, payload.fileDesc)        
        return successRes(data=res)
    except Exception as e:
@ -447,6 +305,7 @@ async def handle_process_pdf(payload: PdfRequest):
 class UploadRequest(BaseModel):
    title: str
    path: str
    rows: List[dict]
    columns: List[str]
    author: Dict[str, Any]
@ -483,6 +342,14 @@ def str_to_date(raw_date: str):
 def generate_unique_filename(folder="tmp", ext="parquet", digits=6):
    os.makedirs(folder, exist_ok=True)
    while True:
        file_id = file_id = uuid.uuid4().int
        filename = f"{folder}/{file_id}.{ext}"
        if not os.path.exists(filename):
            return filename
 def generate_job_id(user_id: str) -> str:
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
@ -502,9 +369,135 @@ def save_xml_to_sld(xml_string, filename):
    return file_path
 async def process_parquet_upload(filename: str, table_name: str):
    from main import db_pool
    file_path = os.path.join("tmp", filename)
    if not os.path.exists(file_path):
        print(f"File {file_path} tidak ditemukan")
        return
    try:
        loop = asyncio.get_running_loop()
        df = await loop.run_in_executor(None, pd.read_parquet, file_path)
        # =====================================================================
        # 1. CLEANING NAMA KOLOM (PENTING!)
        # =====================================================================
        df.columns = [str(col).strip().upper() for col in df.columns]
        # Cek kolom GEOM (bisa GEOM atau geom setelah upper)
        # Kita standarkan nama kolom GEOM di DF menjadi "GEOM" untuk memudahkan logic
        if "GEOM" in df.columns:
            df.rename(columns={"GEOM": "GEOM"}, inplace=True)
        if "GEOM" not in df.columns:
            raise Exception("Kolom GEOM tidak ditemukan")
        # =====================================================================
        # 2. PERSIAPAN DATA (Row Processing)
        # =====================================================================
        clean_rows = []
        geom_types = set()
        # Ambil semua kolom atribut selain GEOM
        # Pastikan list ini yang dipakai untuk CREATE TABLE dan COPY (SINKRON)
        attr_columns = [col for col in df.columns if col != "GEOM"]
        for row in df.itertuples(index=False):
            # --- Handle GEOM ---
            raw_geom = getattr(row, "GEOM", None)
            if not raw_geom: continue
            try:
                geom = None
                if isinstance(raw_geom, str):
                    geom = wkt.loads(raw_geom)
                elif isinstance(raw_geom, bytes):
                    from shapely import wkb
                    geom = wkb.loads(raw_geom)
                if not geom: continue
                if not geom.is_valid: geom = geom.buffer(0)
                gtype = geom.geom_type.upper()
                if gtype == "POLYGON": geom = MultiPolygon([geom])
                elif gtype == "LINESTRING": geom = MultiLineString([geom])
                geom_types.add(geom.geom_type)
                ewkt = f"SRID=4326;{geom.wkt}"
            except Exception:
                continue
            # --- Handle Attributes (FORCE STRING) ---
            row_data = []
            for col in attr_columns:
                # getattr menggunakan nama kolom uppercase dari attr_columns
                val = getattr(row, col, None)
                if val is not None:
                    row_data.append(str(val)) # Convert int/float ke string
                else:
                    row_data.append(None)
            row_data.append(ewkt)
            clean_rows.append(tuple(row_data))
        if not clean_rows:
            raise Exception("Data valid kosong")
        # =====================================================================
        # 3. DATABASE OPERATIONS
        # =====================================================================
        final_geom_type = list(geom_types)[0].upper() if geom_types else "GEOM"
        if "MULTI" not in final_geom_type and final_geom_type != "GEOM":
             final_geom_type = "MULTI" + final_geom_type
        # A. BUILD DDL (CREATE TABLE)
        # Kita pakai f-string quotes f'"{col}"' agar di DB jadi UPPERCASE ("ID", "NAMA")
        col_defs = [f'"{col}" TEXT' for col in attr_columns]
        create_sql = f"""
            CREATE TABLE {table_name} (
                _id SERIAL PRIMARY KEY,    -- lowercase default
                {', '.join(col_defs)},     -- UPPERCASE (Hasil loop attr_columns)
                geom TEXT                  -- lowercase
            );
        """
        async with db_pool.acquire() as conn:
            # Drop table jika ada (untuk safety dev, production hati-hati)
            # await conn.execute(f"DROP TABLE IF EXISTS {table_name}")
            # 1. Create Table
            await conn.execute(create_sql)
            # 2. COPY Data
            # target_cols harus PERSIS sama dengan attr_columns
            # asyncpg akan meng-quote string ini otomatis ("ID", "NAMA", "geom")
            target_cols = attr_columns + ['geom']
            await conn.copy_records_to_table(
                table_name,
                records=clean_rows,
                columns=target_cols
            )
            # 3. Alter ke GEOM 2D
            alter_sql = f"""
                ALTER TABLE {table_name} 
                ALTER COLUMN geom TYPE geometry({final_geom_type}, 4326) 
                USING ST_Force2D(geom::geometry)::geometry({final_geom_type}, 4326);
                CREATE INDEX idx_{table_name}_geom ON {table_name} USING GIST (geom);
            """
            await conn.execute(alter_sql)
        print(f"Sukses upload {len(clean_rows)} baris ke {table_name}.")
        os.remove(file_path)
    except Exception as e:
        print(f"Error processing parquet: {e}")
        # Log error
 async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
@ -580,26 +573,10 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
            raise HTTPException(400, f"CRS {detected_crs} tidak valid")
        # =====================================================================
-        # 7. SIMPAN KE POSTGIS (synchronous)
+        # 7. SIMPAN KE POSTGIS
        # =====================================================================
-        loop = asyncio.get_running_loop()
+        job_id = generate_job_id(str(user_id))
-        await loop.run_in_executor(
+        await process_parquet_upload(payload.path, table_name)
            None,
            lambda: gdf.to_postgis(
                table_name,
                sync_engine,
                if_exists="replace",
                index=False
            )
        )
        # =====================================================================
        # 8. ADD PRIMARY KEY (wajib untuk QGIS API)
        # =====================================================================
        async with engine.begin() as conn:
            await conn.execute(text(
                f'ALTER TABLE "{table_name}" ADD COLUMN _ID SERIAL PRIMARY KEY;'
            ))
        # =====================================================================
        # 9. SIMPAN METADATA (geom_type, author metadata)
@ -646,7 +623,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
                "dataset_title": payload.title,
                "dataset_abstract": author.get("abstract"),
                "keywords": author.get("keywords"),
                # "topic_category": author.get("topicCategory"),
                "topic_category": ", ".join(author.get("topicCategory")),
                "date_created": str_to_date(author.get("dateCreated")),
                "dataset_status": author.get("status"),
@ -660,7 +636,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
                "geometry_count": row_count
            })
        # =====================================================================
        # 10. LOGGING
        # =====================================================================
@ -671,7 +646,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
            details={"table_name": table_name, "rows": len(gdf)}
        )
        job_id = generate_job_id(str(user_id))
        result = {
            "job_id": job_id,
            "job_status": "wait",
@ -685,9 +659,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
        }
        save_xml_to_sld(payload.style, job_id)
        # await report_progress(job_id, "upload", 20, "Upload selesai")
        # cleansing_data(table_name, job_id)
        cleansing = await query_cleansing_data(table_name)
        result['job_status'] = cleansing
@ -698,13 +669,13 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
            "name": payload.title,
            "description": author.get("abstract"),
            "scale": "1:25000",
-            "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
+            'projection_system_id': '0196c746-d1ba-7f1c-9706-5df738679cc7', 
            "category_id": author.get("mapsetCategory"),
            "data_status": "sementara",
-            "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
+            'classification_id': '01968b4b-d3f9-76c9-888c-ee887ac31ce4', 
-            "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
+            'producer_id': '01968b54-0000-7a67-bd10-975b8923b93e', 
            "layer_type": unified_geom_type[0],
-            "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
+            'source_id': ['019c03ef-35e1-738b-858d-871dc7d1e4d6'], 
            "layer_url": publish['geos_link'],
            "metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}",
            "coverage_level": "provinsi",
@ -713,12 +684,11 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
            "data_version": "2026",
            "is_popular": False,
            "is_active": True,
-            "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
+            'regional_id': '01968b53-a910-7a67-bd10-975b8923b92e', 
            "notes": "Mapset baru dibuat",
            "status_validation": "on_verification",
        }
        print("mapset data",mapset)
        await upload_to_main(mapset)
        return successRes(data=result)
@ -731,265 +701,3 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
            details={"error": str(e)}
        )
        raise HTTPException(status_code=500, detail=str(e))
 # async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
 #     try:
 #         job_id = generate_job_id(str(user_id))
 #         result = {
 #             "job_id": job_id,
 #             "job_status": "done",
 #             "table_name": "just for test",
 #             "status": "success",
 #             "message": f"Tabel test berhasil dibuat.",
 #             "total_rows": 10,
 #             "geometry_type": "Polygon",
 #             "crs": "EPSG 4326",
 #             "metadata_uuid": "-"
 #         }
 #         mapset = {
 #             "name": "Resiko Letusan Gunung Arjuno",
 #             "description": "Testing Automation Upload",
 #             "scale": "1:25000",
 #             "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
 #             "category_id": "0196c80c-855f-77f9-abd0-0c8a30b8c2f5",
 #             "data_status": "sementara",
 #             "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
 #             "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
 #             "layer_type": "polygon",
 #             "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
 #             "layer_url": "http://192.168.60.24:8888/geoserver/wms?service=WMS&version=1.1.0&request=GetMap&layers=labai:risiko_letusan_gunung_arjuno_bromo&bbox=110.89528623700005,-8.780412043999945,116.26994997700001,-5.042971664999925&width=768&height=534&srs=EPSG:4326&styles=&format=application/openlayers",
 #             "metadata_url": "http://192.168.60.24:7777/geonetwork/srv/eng/catalog.search#/metadata/9e5e2f09-13ef-49b5-bb49-1cb12136f63b",
 #             "coverage_level": "provinsi",
 #             "coverage_area": "kabupaten",
 #             "data_update_period": "Tahunan",
 #             "data_version": "2026",
 #             "is_popular": False,
 #             "is_active": True,
 #             "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
 #             "notes": "Mapset baru dibuat",
 #             "status_validation": "on_verification",
 #         }
 #         await upload_to_main(mapset)
 #         return successRes(data=result)
 #     except Exception as e:
 #         print("errot", e)
 # ===================================
 # partition +VIEW
 # ===================================
 # Daftar prefix WKT yang valid
 # VALID_WKT_PREFIXES = ("POINT", "LINESTRING", "POLYGON", "MULTIPOLYGON", "MULTILINESTRING")
 def slugify(value: str) -> str:
    """Mengubah judul dataset jadi nama aman untuk VIEW"""
    return re.sub(r'[^a-zA-Z0-9]+', '_', value.lower()).strip('_')
 # Partition + VIEW
 # async def create_dataset_view_from_metadata(conn, metadata_id: int, user_id: int, title: str):
 #     norm_title = slugify(title)
 #     view_name = f"v_user_{user_id}_{norm_title}"
 #     base_table = f"test_partition_user_{user_id}"
 #     # Ambil daftar field
 #     result = await conn.execute(text("SELECT fields FROM dataset_metadata WHERE id=:mid"), {"mid": metadata_id})
 #     fields_json = result.scalar_one_or_none()
 #     base_columns = {"id", "user_id", "metadata_id", "geom"}
 #     columns_sql = ""
 #     field_list = []
 #     if fields_json:
 #         fields = json.loads(fields_json) if isinstance(fields_json, str) else fields_json
 #         field_list = fields
 #         for f in field_list:
 #             safe_col = slugify(f)
 #             alias_name = safe_col if safe_col not in base_columns else f"attr_{safe_col}"
 #             # CAST otomatis
 #             if safe_col in ["longitude", "latitude", "lon", "lat"]:
 #                 columns_sql += f", (p.attributes->>'{f}')::float AS {alias_name}"
 #             else:
 #                 columns_sql += f", p.attributes->>'{f}' AS {alias_name}"
 #     # Drop view lama
 #     await conn.execute(text(f"DROP VIEW IF EXISTS {view_name} CASCADE;"))
 #     # 🔥 Buat VIEW baru yang punya FID unik
 #     create_view_query = f"""
 #     CREATE OR REPLACE VIEW {view_name} AS
 #     SELECT 
 #         row_number() OVER() AS fid,        -- FID unik untuk QGIS
 #         p.id, 
 #         p.user_id, 
 #         p.metadata_id, 
 #         p.geom
 #         {columns_sql},
 #         m.title, 
 #         m.year, 
 #         m.description
 #     FROM {base_table} p
 #     JOIN dataset_metadata m ON m.id = p.metadata_id
 #     WHERE p.metadata_id = {metadata_id};
 #     """
 #     await conn.execute(text(create_view_query))
 #     # Register geometry untuk QGIS
 #     await conn.execute(text(f"DELETE FROM geometry_columns WHERE f_table_name = '{view_name}';"))
 #     await conn.execute(text(f"""
 #         INSERT INTO geometry_columns
 #         (f_table_schema, f_table_name, f_geometry_column, coord_dimension, srid, type)
 #         VALUES ('public', '{view_name}', 'geom', 2, 4326, 'GEOMETRY');
 #     """))
 #     print(f"[INFO] VIEW {view_name} dibuat dengan FID unik dan kompatibel dengan QGIS.")
 # async def handle_to_postgis(payload, engine, user_id: int = 3):
 #     """
 #     Menangani upload data spasial ke PostGIS (dengan partition per user).
 #     - Jika partisi belum ada, akan dibuat otomatis
 #     - Metadata dataset disimpan di tabel dataset_metadata
 #     - Data spasial dimasukkan ke tabel partisi (test_partition_user_{id})
 #     - VIEW otomatis dibuat untuk QGIS
 #     """
 #     try:
 #         df = pd.DataFrame(payload.rows)
 #         print(f"[INFO] Diterima {len(df)} baris data dari frontend.")
 #         # --- Validasi kolom geometry ---
 #         if "geometry" not in df.columns:
 #             raise errorRes(status_code=400, message="Kolom 'geometry' tidak ditemukan dalam data.")
 #         # --- Parsing geometry ke objek shapely ---
 #         df["geometry"] = df["geometry"].apply(
 #             lambda g: wkt.loads(g)
 #             if isinstance(g, str) and g.strip().upper().startswith(VALID_WKT_PREFIXES)
 #             else None
 #         )
 #         # --- Buat GeoDataFrame ---
 #         gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
 #         # --- Metadata info dari payload ---
 #         # dataset_title = getattr(payload, "dataset_title", None)
 #         # dataset_year = getattr(payload, "dataset_year", None)
 #         # dataset_desc = getattr(payload, "dataset_description", None)
 #         dataset_title = "hujan 2045"
 #         dataset_year = 2045
 #         dataset_desc = "test metadata"
 #         if not dataset_title:
 #             raise errorRes(status_code=400, detail="Field 'dataset_title' wajib ada untuk metadata.")
 #         async with engine.begin() as conn:
 #             fields = [col for col in df.columns if col != "geometry"]
 #             # 💾 1️⃣ Simpan Metadata Dataset
 #             print("[INFO] Menyimpan metadata dataset...")
 #             result = await conn.execute(
 #                 text("""
 #                     INSERT INTO dataset_metadata (user_id, title, year, description, fields, created_at)
 #                     VALUES (:user_id, :title, :year, :desc, :fields, :created_at)
 #                     RETURNING id;
 #                 """),
 #                 {
 #                     "user_id": user_id,
 #                     "title": dataset_title,
 #                     "year": dataset_year,
 #                     "desc": dataset_desc,
 #                     "fields": json.dumps(fields), 
 #                     "created_at": datetime.utcnow(),
 #                 },
 #             )
 #             metadata_id = result.scalar_one()
 #             print(f"[INFO] Metadata disimpan dengan ID {metadata_id}")
 #             # ⚙️ 2️⃣ Auto-create Partisi Jika Belum Ada
 #             print(f"[INFO] Memastikan partisi test_partition_user_{user_id} tersedia...")
 #             await conn.execute(
 #                 text(f"""
 #                 DO $$
 #                 BEGIN
 #                     IF NOT EXISTS (
 #                         SELECT 1 FROM pg_tables WHERE tablename = 'test_partition_user_{user_id}'
 #                     ) THEN
 #                         EXECUTE format('
 #                             CREATE TABLE test_partition_user_%s
 #                             PARTITION OF test_partition
 #                             FOR VALUES IN (%s);
 #                         ', {user_id}, {user_id});
 #                         EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_geom ON test_partition_user_%s USING GIST (geom);', {user_id}, {user_id});
 #                         EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_metadata ON test_partition_user_%s (metadata_id);', {user_id}, {user_id});
 #                     END IF;
 #                 END
 #                 $$;
 #                 """)
 #             )
 #             # 🧩 3️⃣ Insert Data Spasial ke Partisi
 #             print(f"[INFO] Memasukkan data ke test_partition_user_{user_id} ...")
 #             insert_count = 0
 #             for _, row in gdf.iterrows():
 #                 geom_wkt = row["geometry"].wkt if row["geometry"] is not None else None
 #                 attributes = row.drop(labels=["geometry"]).to_dict()
 #                 await conn.execute(
 #                     text("""
 #                         INSERT INTO test_partition (user_id, metadata_id, geom, attributes, created_at)
 #                         VALUES (:user_id, :metadata_id, ST_Force2D(ST_GeomFromText(:geom, 4326)),
 #                                 CAST(:attr AS jsonb), :created_at);
 #                     """),
 #                     {
 #                         "user_id": user_id,
 #                         "metadata_id": metadata_id,
 #                         "geom": geom_wkt,
 #                         "attr": json.dumps(attributes),
 #                         "created_at": datetime.utcnow(),
 #                     },
 #                 )
 #                 insert_count += 1
 #             # 🧩 4️⃣ Membuat VIEW untuk dataset baru di QGIS
 #             await create_dataset_view_from_metadata(conn, metadata_id, user_id, dataset_title)
 #         print(f"[INFO] ✅ Berhasil memasukkan {insert_count} baris ke partisi user_id={user_id} (metadata_id={metadata_id}).")
 #         return {
 #             "status": "success",
 #             "user_id": user_id,
 #             "metadata_id": metadata_id,
 #             "dataset_title": dataset_title,
 #             "inserted_rows": insert_count,
 #             "geometry_type": list(gdf.geom_type.unique()),
 #         }
 #     except Exception as e:
 #         print(f"[ERROR] Gagal upload ke PostGIS partition: {e}")
 #         raise errorRes(status_code=500, message="Gagal upload ke PostGIS partition", details=str(e))    
--- a/services/upload_file/upload_new.py
+++ b/services/upload_file/upload_new.py
@ -0,0 +1,843 @@
 import json
 import os
 import random
 import subprocess
 import uuid
 import asyncpg
 import pandas as pd
 import geopandas as gpd
 import numpy as np
 import re
 import zipfile
 import tempfile
 import asyncio
 from pyproj import CRS
 from shapely.geometry.base import BaseGeometry
 from shapely.geometry import base as shapely_base
 from fastapi import Depends, File, Form, UploadFile, HTTPException
 from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main
 from core.config import DB_DSN, DB_HOST, DB_NAME, DB_PASS, DB_PORT, DB_USER, UPLOAD_FOLDER, MAX_FILE_MB, GEONETWORK_URL
 from services.upload_file.ai_generate import send_metadata
 from services.upload_file.readers.reader_csv import read_csv
 from services.upload_file.readers.reader_shp import read_shp
 from services.upload_file.readers.reader_gdb import read_gdb
 from services.upload_file.readers.reader_mpk import read_mpk
 from services.upload_file.readers.reader_pdf import convert_df, read_pdf
 from services.upload_file.utils.df_validation import process_dataframe_synchronous
 from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto
 from database.connection import engine, sync_engine
 from pydantic import BaseModel
 from typing import Any, Dict, List, Optional
 from shapely import MultiLineString, MultiPolygon, wkt
 from sqlalchemy import text
 from datetime import datetime
 from response import successRes, errorRes
 from utils.logger_config import log_activity
 # Base.metadata.create_all(bind=engine)
 def is_geom_empty(g):
    if g is None:
        return True
    if isinstance(g, float) and pd.isna(g):
        return True
    if isinstance(g, BaseGeometry):
        return g.is_empty
    return False
 def safe_json(value):
    """Konversi aman untuk semua tipe numpy/pandas/shapely ke tipe JSON-serializable"""
    if isinstance(value, (np.int64, np.int32)):
        return int(value)
    if isinstance(value, (np.float64, np.float32)):
        return float(value)
    if isinstance(value, pd.Timestamp):
        return value.isoformat()
    if isinstance(value, shapely_base.BaseGeometry):
        return str(value)  # convert to WKT string
    if pd.isna(value):
        return None
    return value
 def detect_zip_type(zip_path: str) -> str:
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        files = zip_ref.namelist()
    if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files):
        return "gdb"
    if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files):
        return "gdb"
    if any(f.lower().endswith(".shp") for f in files):
        return "shp"
    return "unknown"
 async def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
    result = detect_and_build_geometry(df, master_polygons=None)
    if not hasattr(result, "geometry") or result.geometry.isna().all():
        result = attach_polygon_geometry_auto(result)
    # if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
    #     geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
    #         if not result.empty else "None"
    #     null_geom = result.geometry.isna().sum()
    def normalize_geom_type(geom_type):
        if geom_type.startswith("Multi"):
            return geom_type.replace("Multi", "")
        return geom_type
    if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
        geom_types = (
            result.geometry
            .dropna()
            .geom_type
            .apply(normalize_geom_type)
            .unique()
        )
        geom_type = geom_types[0] if len(geom_types) > 0 else "None"
        null_geom = result.geometry.isna().sum()
        print(f"[INFO] Tipe Geometry: {geom_type}")
        print(f"[INFO] Jumlah geometry kosong: {null_geom}")
    else:
        res = {
            "message": "Tidak menemukan tabel yang relevan.",
            "file_type": ext,
            "rows": 0,
            "columns": 0,
            "geometry_valid": 0,
            "geometry_empty": 0,
            "geometry_valid_percent": 0,
            "warnings": [],
            "warning_examples": [],
            "preview": []
        }
        return errorRes(message="Tidak berhasil mencocokan geometry pada tabel." ,details=res, status_code=422)
    result = result.replace([pd.NA, float('inf'), float('-inf')], None)
    if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
        result['geometry'] = result['geometry'].apply(
            lambda g: g.wkt if g is not None else None
        )
    empty_count = result['geometry'].apply(is_geom_empty).sum()
    valid_count = len(result) - empty_count
    match_percentage = (valid_count / len(result)) * 100
    warnings = []
    if empty_count > 0:
        warnings.append(
            f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
            f"({100 - match_percentage:.2f}% data gagal cocok)."
        )
    if empty_count > 0:
        examples = result[result['geometry'].apply(is_geom_empty)].head(500)
        warning_examples = examples.to_dict(orient="records")
    else:
        warning_examples = []
    # preview_data = result.head(15).to_dict(orient="records")
    preview_data = result.to_dict(orient="records")
    preview_safe = [
        {k: safe_json(v) for k, v in row.items()} for row in preview_data
    ]
    warning_safe = [
        {k: safe_json(v) for k, v in row.items()} for row in warning_examples
    ]
    ai_context = {
        "nama_file_peta": filename,
        "nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim",
        "tipe_data_spasial": geom_type,
        "deskripsi_singkat": fileDesc,
        "struktur_atribut_data": {},
        # "metadata": {
        #     "judul": "",
        #     "abstrak": "",
        #     "tujuan": "",
        #     "keyword": [],
        #     "kategori": [],
        #     "kategori_mapset": ""
        # }
    }
    ai_suggest = send_metadata(ai_context)
    # ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
    # print(ai_suggest)
    tmp_file = generate_unique_filename()
    # tmp_file = f"tmp/{filename}.parquet"
    # export_df = result.copy()
    # export_df["geom"] = export_df["geometry"].apply(wkt.loads)
    # export_df = export_df.drop(columns=["geometry"])
    # export_df = export_df.set_geometry("geom")
    # export_df = export_df.set_crs("EPSG:4326")
    # export_df = export_df.rename(
    #     columns=lambda c: c.upper() if c != "geom" else c
    # )
    # await asyncio.to_thread(export_df.to_parquet, tmp_file)
    await asyncio.to_thread(process_dataframe_synchronous, result, tmp_file)
    response = {
        "message": "File berhasil dibaca dan dianalisis.",
        "file_name": filename,
        "file_type": ext,
        "rows": int(len(result)),
        "columns": list(map(str, result.columns)),
        "geometry_valid": int(valid_count),
        "geometry_empty": int(empty_count),
        "geometry_valid_percent": float(round(match_percentage, 2)),
        "geometry_type": geom_type,
        "warnings": warnings,
        "warning_rows": warning_safe,
        "preview": preview_safe,
        "metadata_suggest": ai_suggest,
        "tmp_path": tmp_file
    }
    # return successRes(content=response)
    return response
 async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form(""), fileDesc: Optional[str] = Form("")):
    fname = file.filename
    ext = os.path.splitext(fname)[1].lower()
    contents = await file.read()
    size_mb = len(contents) / (1024*1024)
    if size_mb > MAX_FILE_MB:
        raise errorRes(status_code=413, message="Ukuran File Terlalu Besar")
    tmp_path = UPLOAD_FOLDER / fname
    with open(tmp_path, "wb") as f:
        f.write(contents)
    try:
        df = None
        print('ext', ext)
        if ext == ".csv":
            df = read_csv(str(tmp_path))
        elif ext == ".xlsx":
            df = read_csv(str(tmp_path), sheet)
        elif ext == ".mpk":
            df = read_mpk(str(tmp_path))
        elif ext == ".pdf":
            tbl = read_pdf(tmp_path, page)
            if len(tbl) == 0:
                res = {
                    "message": "Tidak ditemukan tabel valid pada halaman yang dipilih",
                    "tables": {},
                    "file_type": ext
                }
                return successRes(message="Tidak ditemukan tabel valid pada halaman yang dipilih", data=res)
            elif len(tbl) > 1:
                res = {
                    "message": "File berhasil dibaca dan dianalisis.",
                    "tables": tbl,
                    "file_type": ext
                }
                return successRes(data=res, message="File berhasil dibaca dan dianalisis.")
            else:
                df = convert_df(tbl[0])
        elif ext == ".zip":
            zip_type = detect_zip_type(str(tmp_path))
            if zip_type == "shp":
                print("[INFO] ZIP terdeteksi sebagai Shapefile.")
                df = read_shp(str(tmp_path))
            elif zip_type == "gdb":
                print("[INFO] ZIP terdeteksi sebagai Geodatabase (GDB).")
                df = read_gdb(str(tmp_path))
            else:
                return successRes(message="ZIP file tidak mengandung SHP / GDB valid.")
        else:
            raise errorRes(status_code=400, message="Unsupported file type")
        if df is None or (hasattr(df, "empty") and df.empty):
            return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
        res = await process_data(df, ext, fname, fileDesc)
        tmp_path.unlink(missing_ok=True)
        return successRes(data=res)
    except Exception as e:
        print(f"[ERROR] {e}")
        return errorRes(
            message="Internal Server Error",
            details=str(e),
            status_code=500
        )
    # finally:
    #     db_session.close()
 class PdfRequest(BaseModel):
    title: str
    columns: List[str]
    rows: List[List]
    fileName: str
    fileDesc: str
 async def handle_process_pdf(payload: PdfRequest):
    try:
        df = convert_df(payload.model_dump())
        if df is None or (hasattr(df, "empty") and df.empty):
            return errorRes(message="Tidak ada tabel")
        res = await process_data(df, '.pdf', payload.fileName, payload.fileDesc)        
        return successRes(data=res)
    except Exception as e:
        print(f"[ERROR] {e}")
        return errorRes(message="Internal Server Error", details= str(e), status_code=500)
    # finally:
    #     db_session.close()
 class UploadRequest(BaseModel):
    title: str
    path: str
    rows: List[dict]
    columns: List[str]
    author: Dict[str, Any]
    style: str
 # generate _2 if exist
 async def generate_unique_table_name(base_name: str):
    base_name = base_name.lower().replace(" ", "_").replace("-", "_")
    table_name = base_name
    counter = 2
    async with engine.connect() as conn:
        while True:
            result = await conn.execute(
                text("SELECT to_regclass(:tname)"), 
                {"tname": table_name}
            )
            exists = result.scalar()
            if not exists:
                return table_name
            table_name = f"{base_name}_{counter}"
            counter += 1
 def str_to_date(raw_date: str):
    if raw_date:
        try:
            return datetime.strptime(raw_date, "%Y-%m-%d").date()
        except Exception as e:
            print("[WARNING] Tidak bisa parse dateCreated:", e)
            return None
 def generate_unique_filename(folder="tmp", ext="parquet", digits=6):
    os.makedirs(folder, exist_ok=True)
    while True:
        file_id = file_id = uuid.uuid4().int
        filename = f"{folder}/{file_id}.{ext}"
        if not os.path.exists(filename):
            return filename
 def generate_job_id(user_id: str) -> str:
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    return f"{user_id}_{timestamp}"
 def save_xml_to_sld(xml_string, filename):
    folder_path = 'style_temp'
    os.makedirs(folder_path, exist_ok=True)
    file_path = os.path.join(folder_path, f"{filename}.sld")
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(xml_string)
    return file_path
 async def process_parquet_upload(filename: str, table_name: str):
    from main import db_pool
    file_path = os.path.join("tmp", filename)
    if not os.path.exists(file_path):
        print(f"File {file_path} tidak ditemukan")
        return
    try:
        loop = asyncio.get_running_loop()
        df = await loop.run_in_executor(None, pd.read_parquet, file_path)
        # =====================================================================
        # 1. CLEANING NAMA KOLOM (PENTING!)
        # =====================================================================
        df.columns = [str(col).strip().upper() for col in df.columns]
        # Cek kolom GEOM (bisa GEOM atau geom setelah upper)
        # Kita standarkan nama kolom GEOM di DF menjadi "GEOM" untuk memudahkan logic
        if "GEOM" in df.columns:
            df.rename(columns={"GEOM": "GEOM"}, inplace=True)
        if "GEOM" not in df.columns:
            raise Exception("Kolom GEOM tidak ditemukan")
        # =====================================================================
        # 2. PERSIAPAN DATA (Row Processing)
        # =====================================================================
        clean_rows = []
        geom_types = set()
        # Ambil semua kolom atribut selain GEOM
        # Pastikan list ini yang dipakai untuk CREATE TABLE dan COPY (SINKRON)
        attr_columns = [col for col in df.columns if col != "GEOM"]
        for row in df.itertuples(index=False):
            # --- Handle GEOM ---
            raw_geom = getattr(row, "GEOM", None)
            if not raw_geom: continue
            try:
                geom = None
                if isinstance(raw_geom, str):
                    geom = wkt.loads(raw_geom)
                elif isinstance(raw_geom, bytes):
                    from shapely import wkb
                    geom = wkb.loads(raw_geom)
                if not geom: continue
                if not geom.is_valid: geom = geom.buffer(0)
                gtype = geom.geom_type.upper()
                if gtype == "POLYGON": geom = MultiPolygon([geom])
                elif gtype == "LINESTRING": geom = MultiLineString([geom])
                geom_types.add(geom.geom_type)
                ewkt = f"SRID=4326;{geom.wkt}"
            except Exception:
                continue
            # --- Handle Attributes (FORCE STRING) ---
            row_data = []
            for col in attr_columns:
                # getattr menggunakan nama kolom uppercase dari attr_columns
                val = getattr(row, col, None)
                if val is not None:
                    row_data.append(str(val)) # Convert int/float ke string
                else:
                    row_data.append(None)
            row_data.append(ewkt)
            clean_rows.append(tuple(row_data))
        if not clean_rows:
            raise Exception("Data valid kosong")
        # =====================================================================
        # 3. DATABASE OPERATIONS
        # =====================================================================
        final_geom_type = list(geom_types)[0].upper() if geom_types else "GEOM"
        if "MULTI" not in final_geom_type and final_geom_type != "GEOM":
             final_geom_type = "MULTI" + final_geom_type
        # A. BUILD DDL (CREATE TABLE)
        # Kita pakai f-string quotes f'"{col}"' agar di DB jadi UPPERCASE ("ID", "NAMA")
        col_defs = [f'"{col}" TEXT' for col in attr_columns]
        create_sql = f"""
            CREATE TABLE {table_name} (
                _id SERIAL PRIMARY KEY,    -- lowercase default
                {', '.join(col_defs)},     -- UPPERCASE (Hasil loop attr_columns)
                geom TEXT                  -- lowercase
            );
        """
        async with db_pool.acquire() as conn:
            # Drop table jika ada (untuk safety dev, production hati-hati)
            # await conn.execute(f"DROP TABLE IF EXISTS {table_name}")
            # 1. Create Table
            await conn.execute(create_sql)
            # 2. COPY Data
            # target_cols harus PERSIS sama dengan attr_columns
            # asyncpg akan meng-quote string ini otomatis ("ID", "NAMA", "geom")
            target_cols = attr_columns + ['geom']
            await conn.copy_records_to_table(
                table_name,
                records=clean_rows,
                columns=target_cols
            )
            # 3. Alter ke GEOM 2D
            alter_sql = f"""
                ALTER TABLE {table_name} 
                ALTER COLUMN geom TYPE geometry({final_geom_type}, 4326) 
                USING ST_Force2D(geom::geometry)::geometry({final_geom_type}, 4326);
                CREATE INDEX idx_{table_name}_geom ON {table_name} USING GIST (geom);
            """
            await conn.execute(alter_sql)
        print(f"Sukses upload {len(clean_rows)} baris ke {table_name}.")
        os.remove(file_path)
    except Exception as e:
        print(f"Error processing parquet: {e}")
        # Log error
 async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
    try:
        table_name = await generate_unique_table_name(payload.title)
        # DataFrame
        df = pd.DataFrame(payload.rows)
        df.columns = [col.upper() for col in df.columns]
        if "GEOMETRY" not in df.columns:
            raise HTTPException(400, "Kolom GEOMETRY tidak ditemukan")
        # =====================================================================
        # 1. LOAD WKT → SHAPELY
        # =====================================================================
        def safe_load_wkt(g):
            if not isinstance(g, str):
                return None
            try:
                geom = wkt.loads(g)
                return geom
            except:
                return None
        df["GEOMETRY"] = df["GEOMETRY"].apply(safe_load_wkt)
        df = df.rename(columns={"GEOMETRY": "geom"})
        # =====================================================================
        # 2. DROP ROW geometry NULL
        # =====================================================================
        df = df[df["geom"].notnull()]
        if df.empty:
            raise HTTPException(400, "Semua geometry invalid atau NULL")
        # =====================================================================
        # 3. VALIDATE geometry (very important)
        # =====================================================================
        df["geom"] = df["geom"].apply(lambda g: g if g.is_valid else g.buffer(0))
        # =====================================================================
        # 4. SERAGAMKAN TIPE GEOMETRY (Polygon→MultiPolygon, Line→MultiLine)
        # =====================================================================
        def unify_geometry_type(g):
            gtype = g.geom_type.upper()
            if gtype == "POLYGON":
                return MultiPolygon([g])
            if gtype == "LINESTRING":
                return MultiLineString([g])
            return g  # sudah MULTI atau POINT
        df["geom"] = df["geom"].apply(unify_geometry_type)
        # =====================================================================
        # 5. DETEKSI CRS DARI METADATA / INPUT / DEFAULT
        # =====================================================================
        detected_crs = payload.author.get("crs")
        detected = payload.author.get("crs")
        print('crs', detected)
        if not detected_crs:
            detected_crs = "EPSG:4326"
        detected_crs = 'EPSG:4326'
        # Buat GeoDataFrame
        gdf = gpd.GeoDataFrame(df, geometry="geom", crs=detected_crs)
        row_count = len(gdf)
        # =====================================================================
        # 6. VERIFY CRS (SRID) VALID di PROJ / PostGIS
        # =====================================================================
        try:
            _ = gdf.to_crs(gdf.crs)   # test CRS valid
        except:
            raise HTTPException(400, f"CRS {detected_crs} tidak valid")
        # =====================================================================
        # 7. SIMPAN KE POSTGIS (synchronous)
        # =====================================================================
        # print("run")
        # # 1. Konfigurasi Database (Ambil dari Config/Env Variable)
        # # Jangan hardcode password di sini
        # db_host = DB_HOST
        # db_port = DB_PORT
        # db_name = DB_NAME
        # db_user = DB_USER
        # db_pass = DB_PASS
        # # Connection string untuk OGR (tanpa password agar aman)
        # conn_str = f"PG:host={db_host} port={db_port} dbname={db_name} user={db_user}"
        # # 2. Siapkan Environment Variable khusus untuk subprocess ini
        # # Password dimasukkan lewat ENV agar tidak muncul di process list server
        # env = os.environ.copy()
        # env["PGPASSWORD"] = db_pass
        # temp_parquet = f"tmp/123123.parquet" 
        # # 3. Jalankan ogr2ogr secara Async
        # # Command: ogr2ogr -f PostgreSQL "PG:..." input.parquet -nln nama_tabel -overwrite
        # process = await asyncio.create_subprocess_exec(
        #     "ogr2ogr",
        #     "-f", "PostgreSQL",
        #     conn_str,
        #     temp_parquet,
        #     "-nln", table_name,
        #     "-overwrite",
        #     "-nlt", "PROMOTE_TO_MULTI", 
        #     "-a_srs", "EPSG:4326",
        #     "-lco", "GEOMETRY_NAME=geom", # Nama kolom di DB
        #     "-lco", "FID=_id",            # Nama Primary Key
        #     "-lco", "SPATIAL_INDEX=YES",
        #     "-lco", "LAUNDER=NO",
        #     "--config", "PG_USE_COPY", "YES",
        #     "-dim", "2",
        #     stdout=asyncio.subprocess.PIPE,
        #     stderr=asyncio.subprocess.PIPE,
        #     env=env  # Pass password lewat sini
        # )
        # # 4. Tunggu proses selesai
        # stdout, stderr = await process.communicate()
        # # 5. Cek apakah sukses
        # if process.returncode != 0:
        #     error_msg = stderr.decode().strip()
        #     print(f"OGR2OGR Error: {error_msg}")
        #     raise HTTPException(500, detail=f"Gagal import ke DB: {error_msg}")
        # if os.path.exists(temp_parquet):
        #     os.remove(temp_parquet)
        job_id = generate_job_id(str(user_id))
        await process_parquet_upload(payload.path, table_name)
        # =====================================================================
        # 9. SIMPAN METADATA (geom_type, author metadata)
        # =====================================================================
        unified_geom_type = list(gdf.geom_type.unique())
        author = payload.author
        async with engine.begin() as conn:
            await conn.execute(text("""
                INSERT INTO backend.author_metadata (
                    table_title,
                    dataset_title,
                    dataset_abstract,
                    keywords,
                    topic_category,
                    date_created,
                    dataset_status,
                    organization_name,
                    contact_person_name,
                    contact_email,
                    contact_phone,
                    geom_type,
                    user_id,
                    process,
                    geometry_count
                ) VALUES (
                    :table_title,
                    :dataset_title,
                    :dataset_abstract,
                    :keywords,
                    :topic_category,
                    :date_created,
                    :dataset_status,
                    :organization_name,
                    :contact_person_name,
                    :contact_email,
                    :contact_phone,
                    :geom_type,
                    :user_id,
                    :process,
                    :geometry_count
                )
            """), {
                "table_title": table_name,
                "dataset_title": payload.title,
                "dataset_abstract": author.get("abstract"),
                "keywords": author.get("keywords"),
                "topic_category": ", ".join(author.get("topicCategory")),
                "date_created": str_to_date(author.get("dateCreated")),
                "dataset_status": author.get("status"),
                "organization_name": author.get("organization"),
                "contact_person_name": author.get("contactName"),
                "contact_email": author.get("contactEmail"),
                "contact_phone": author.get("contactPhone"),
                "geom_type": json.dumps(unified_geom_type),
                "user_id": user_id,
                "process": 'CLEANSING',
                "geometry_count": row_count
            })
        # =====================================================================
        # 10. LOGGING
        # =====================================================================
        await log_activity(
            user_id=user_id,
            action_type="UPLOAD",
            action_title=f"Upload dataset {table_name}",
            details={"table_name": table_name, "rows": len(gdf)}
        )
        # job_id = generate_job_id(str(user_id))
        result = {
            "job_id": job_id,
            "job_status": "wait",
            "table_name": table_name,
            "status": "success",
            "message": f"Tabel '{table_name}' berhasil dibuat.",
            "total_rows": len(gdf),
            "geometry_type": unified_geom_type,
            "crs": detected_crs,
            "metadata_uuid": ""
        }
        save_xml_to_sld(payload.style, job_id)
        # await report_progress(job_id, "upload", 20, "Upload selesai")
        # cleansing_data(table_name, job_id)
        cleansing = await query_cleansing_data(table_name)
        result['job_status'] = cleansing
        publish = await publish_layer(table_name, job_id)
        result['metadata_uuid'] = publish['uuid']
        mapset = {
            "name": payload.title,
            "description": author.get("abstract"),
            "scale": "1:25000",
            'projection_system_id': '0196c746-d1ba-7f1c-9706-5df738679cc7', 
            "category_id": author.get("mapsetCategory"),
            "data_status": "sementara",
            'classification_id': '01968b4b-d3f9-76c9-888c-ee887ac31ce4', 
            'producer_id': '01968b54-0000-7a67-bd10-975b8923b93e', 
            "layer_type": unified_geom_type[0],
            'source_id': ['019c03ef-35e1-738b-858d-871dc7d1e4d6'], 
            "layer_url": publish['geos_link'],
            "metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}",
            "coverage_level": "provinsi",
            "coverage_area": "kabupaten",
            "data_update_period": "Tahunan",
            "data_version": "2026",
            "is_popular": False,
            "is_active": True,
            'regional_id': '01968b53-a910-7a67-bd10-975b8923b92e', 
            "notes": "Mapset baru dibuat",
            "status_validation": "on_verification",
        }
        print("mapset data",mapset)
        await upload_to_main(mapset)
        return successRes(data=result)
    except Exception as e:
        await log_activity(
            user_id=user_id,
            action_type="ERROR",
            action_title="Upload gagal",
            details={"error": str(e)}
        )
        raise HTTPException(status_code=500, detail=str(e))
 # async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
 #     try:
 #         os.remove(payload.path)
 #         job_id = generate_job_id(str(user_id))
 #         result = {
 #             "job_id": job_id,
 #             "job_status": "done",
 #             "table_name": "just for test",
 #             "status": "success",
 #             "message": f"Tabel test berhasil dibuat.",
 #             "total_rows": 10,
 #             "geometry_type": "Polygon",
 #             "crs": "EPSG 4326",
 #             "metadata_uuid": "-"
 #         }
 #         # mapset = {
 #         #     'name': 'TEST Risiko Letusan Gunung Arjuno', 
 #         #     'description': 'Peta ini menampilkan area yang berpotensi mengalami letusan Gunung Arjuno dan Gunung Bromo di Provinsi Jawa Timur. Data disusun dalam bentuk poligon yang mengindikasikan tingkat risiko berdasarkan sejarah aktivitas dan karakteristik geologi di wilayah tersebut.', 
 #         #     'scale': '1:25000', 
 #         #     'projection_system_id': '0196c746-d1ba-7f1c-9706-5df738679cc7', 
 #         #     'category_id': '0196c80c-855f-77f9-abd0-0c8a30b8c2f5', 
 #         #     'data_status': 'sementara', 
 #         #     'classification_id': '01968b4b-d3f9-76c9-888c-ee887ac31ce4', 
 #         #     'producer_id': '01968b54-0000-7a67-bd10-975b8923b93e', 
 #         #     'layer_type': 'MultiPolygon', 
 #         #     'source_id': ['019c03ef-35e1-738b-858d-871dc7d1e4d6'], 
 #         #     'layer_url': 'http://192.168.60.24:8888/geoserver/labai/wms?service=WMS&version=1.1.0&request=GetMap&layers=labai:test_risiko_letusan_gunung_arjuno&styles=&bbox=110.89528623700005%2C-8.780412043999945%2C116.26994997700001%2C-5.042971664999925&width=768&height=384&srs=EPSG:4326&format=application/openlayers', 
 #         #     'metadata_url': 'http://192.168.60.24:7777/geonetwork/srv/eng/catalog.search#/metadata/123123', 
 #         #     'coverage_level': 'provinsi', 
 #         #     'coverage_area': 'kabupaten', 
 #         #     'data_update_period': 'Tahunan', 
 #         #     'data_version': '2026', 
 #         #     'is_popular': False, 
 #         #     'is_active': True, 
 #         #     'regional_id': '01968b53-a910-7a67-bd10-975b8923b92e', 
 #         #     'notes': 'Mapset baru dibuat', 
 #         #     'status_validation': 'on_verification'
 #         # }
 #         # await upload_to_main(mapset)
 #         return successRes(data=result)
 #     except Exception as e:
 #         print("errot", e)
--- a/services/upload_file/upload_old.py
+++ b/services/upload_file/upload_old.py
@ -0,0 +1,992 @@
 import json
 import os
 import pandas as pd
 import geopandas as gpd
 import numpy as np
 import re
 import zipfile
 import tempfile
 import asyncio
 from pyproj import CRS
 from shapely.geometry.base import BaseGeometry
 from shapely.geometry import base as shapely_base
 from fastapi import Depends, File, Form, UploadFile, HTTPException
 from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main
 from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES, GEONETWORK_URL
 from services.upload_file.ai_generate import send_metadata
 from services.upload_file.readers.reader_csv import read_csv
 from services.upload_file.readers.reader_shp import read_shp
 from services.upload_file.readers.reader_gdb import read_gdb
 from services.upload_file.readers.reader_mpk import read_mpk
 from services.upload_file.readers.reader_pdf import convert_df, read_pdf
 from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto
 from database.connection import engine, sync_engine
 from pydantic import BaseModel
 from typing import Any, Dict, List, Optional
 from shapely import MultiLineString, MultiPolygon, wkt
 from sqlalchemy import text
 from datetime import datetime
 from response import successRes, errorRes
 from utils.logger_config import log_activity
 # Base.metadata.create_all(bind=engine)
 def is_geom_empty(g):
    if g is None:
        return True
    if isinstance(g, float) and pd.isna(g):
        return True
    if isinstance(g, BaseGeometry):
        return g.is_empty
    return False
 def safe_json(value):
    """Konversi aman untuk semua tipe numpy/pandas/shapely ke tipe JSON-serializable"""
    if isinstance(value, (np.int64, np.int32)):
        return int(value)
    if isinstance(value, (np.float64, np.float32)):
        return float(value)
    if isinstance(value, pd.Timestamp):
        return value.isoformat()
    if isinstance(value, shapely_base.BaseGeometry):
        return str(value)  # convert to WKT string
    if pd.isna(value):
        return None
    return value
 def detect_zip_type(zip_path: str) -> str:
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        files = zip_ref.namelist()
    if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files):
        return "gdb"
    if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files):
        return "gdb"
    if any(f.lower().endswith(".shp") for f in files):
        return "shp"
    return "unknown"
 # def detect_zip_type(zip_path: str) -> str:
 #     with zipfile.ZipFile(zip_path, "r") as zip_ref:
 #         files = zip_ref.namelist()
 #      # -------------------------------------------------------------
 #     # 1) DETECT FileGDB
 #     # -------------------------------------------------------------
 #     is_gdb = (
 #         any(".gdb/" in f.lower() for f in files)
 #         or any(f.lower().endswith(ext) for ext in 
 #                [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files)
 #     )
 #     if is_gdb:
 #         print("\n[INFO] ZIP terdeteksi berisi FileGDB.")
 #         with tempfile.TemporaryDirectory() as temp_dir:
 #             # extract ZIP
 #             with zipfile.ZipFile(zip_path, "r") as zip_ref:
 #                 zip_ref.extractall(temp_dir)
 #             # find folder *.gdb
 #             gdb_path = None
 #             for root, dirs, _ in os.walk(temp_dir):
 #                 for d in dirs:
 #                     if d.lower().endswith(".gdb"):
 #                         gdb_path = os.path.join(root, d)
 #                         break
 #             if not gdb_path:
 #                 print("[ERROR] Folder .gdb tidak ditemukan.")
 #                 return "gdb"
 #             print(f"[INFO] GDB Path: {gdb_path}")
 #             # Cari seluruh file .gdbtable
 #             table_files = [
 #                 os.path.join(gdb_path, f)
 #                 for f in os.listdir(gdb_path)
 #                 if f.lower().endswith(".gdbtable")
 #             ]
 #             if not table_files:
 #                 print("[ERROR] Tidak ada file .gdbtable ditemukan.")
 #                 return "gdb"
 #             # Scan semua table file untuk mencari SpatialReference
 #             found_crs = False
 #             for table_file in table_files:
 #                 try:
 #                     with open(table_file, "rb") as f:
 #                         raw = f.read(15000)  # baca awal file, cukup untuk header JSON
 #                     text = raw.decode("utf-8", errors="ignore")
 #                     start = text.find("{")
 #                     end = text.rfind("}") + 1
 #                     if start == -1 or end == -1:
 #                         continue
 #                     json_str = text[start:end]
 #                     meta = json.loads(json_str)
 #                     spatial_ref = meta.get("SpatialReference")
 #                     if not spatial_ref:
 #                         continue
 #                     wkt = spatial_ref.get("WKT")
 #                     if not wkt:
 #                         continue
 #                     print(f"[FOUND] CRS metadata pada: {os.path.basename(table_file)}")
 #                     print(f"[CRS WKT] {wkt[:200]}...")
 #                     # Convert to EPSG
 #                     try:
 #                         epsg = CRS.from_wkt(wkt).to_epsg()
 #                         print(f"[EPSG] {epsg}")
 #                     except:
 #                         print("[EPSG] Tidak ditemukan EPSG.")
 #                     found_crs = True
 #                     break
 #                 except Exception:
 #                     continue
 #             if not found_crs:
 #                 print("[WARNING] Tidak ditemukan CRS di file .gdbtable manapun.")
 #         return "gdb"
 #     # -----------------------------------------------------
 #     # 2. DETEKSI SHP
 #     # -----------------------------------------------------
 #     if any(f.lower().endswith(".shp") for f in files):
 #         print("\n[INFO] ZIP terdeteksi berisi SHP.")
 #         # cari file .prj
 #         prj_files = [f for f in files if f.lower().endswith(".prj")]
 #         if not prj_files:
 #             print("[WARNING] Tidak ada file .prj → CRS tidak diketahui.")
 #             return "shp"
 #         with zipfile.ZipFile(zip_path, "r") as zip_ref:
 #             with tempfile.TemporaryDirectory() as temp_dir:
 #                 prj_path = os.path.join(temp_dir, os.path.basename(prj_files[0]))
 #                 zip_ref.extract(prj_files[0], temp_dir)
 #                 # baca isi prj
 #                 with open(prj_path, "r") as f:
 #                     prj_text = f.read()
 #                 try:
 #                     crs = CRS.from_wkt(prj_text)
 #                     print(f"[CRS WKT] {crs.to_wkt()[:200]}...")
 #                     epsg = crs.to_epsg()
 #                     if epsg:
 #                         print(f"[EPSG] {epsg}")
 #                     else:
 #                         print("[EPSG] Tidak ditemukan dalam database EPSG.")
 #                 except Exception as e:
 #                     print("[ERROR] Gagal membaca CRS dari file PRJ:", e)
 #         return "shp"
 #     # -----------------------------------------------------
 #     # 3. UNKNOWN
 #     # -----------------------------------------------------
 #     return "unknown"
 def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
    result = detect_and_build_geometry(df, master_polygons=None)
    if not hasattr(result, "geometry") or result.geometry.isna().all():
        result = attach_polygon_geometry_auto(result)
    # if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
    #     geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
    #         if not result.empty else "None"
    #     null_geom = result.geometry.isna().sum()
    def normalize_geom_type(geom_type):
        if geom_type.startswith("Multi"):
            return geom_type.replace("Multi", "")
        return geom_type
    if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
        geom_types = (
            result.geometry
            .dropna()
            .geom_type
            .apply(normalize_geom_type)
            .unique()
        )
        geom_type = geom_types[0] if len(geom_types) > 0 else "None"
        null_geom = result.geometry.isna().sum()
        print(f"[INFO] Tipe Geometry: {geom_type}")
        print(f"[INFO] Jumlah geometry kosong: {null_geom}")
    else:
        res = {
            "message": "Tidak menemukan tabel yang relevan.",
            "file_type": ext,
            "rows": 0,
            "columns": 0,
            "geometry_valid": 0,
            "geometry_empty": 0,
            "geometry_valid_percent": 0,
            "warnings": [],
            "warning_examples": [],
            "preview": []
        }
        return errorRes(message="Tidak berhasil mencocokan geometry pada tabel." ,details=res, status_code=422)
    result = result.replace([pd.NA, float('inf'), float('-inf')], None)
    if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
        result['geometry'] = result['geometry'].apply(
            lambda g: g.wkt if g is not None else None
        )
    empty_count = result['geometry'].apply(is_geom_empty).sum()
    valid_count = len(result) - empty_count
    match_percentage = (valid_count / len(result)) * 100
    warnings = []
    if empty_count > 0:
        warnings.append(
            f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
            f"({100 - match_percentage:.2f}% data gagal cocok)."
        )
    if empty_count > 0:
        examples = result[result['geometry'].apply(is_geom_empty)].head(500)
        warning_examples = examples.to_dict(orient="records")
    else:
        warning_examples = []
    # preview_data = result.head(15).to_dict(orient="records")
    preview_data = result.to_dict(orient="records")
    preview_safe = [
        {k: safe_json(v) for k, v in row.items()} for row in preview_data
    ]
    warning_safe = [
        {k: safe_json(v) for k, v in row.items()} for row in warning_examples
    ]
    ai_context = {
        "nama_file_peta": filename,
        "nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim",
        "tipe_data_spasial": geom_type,
        "deskripsi_singkat": fileDesc,
        "struktur_atribut_data": {},
        # "metadata": {
        #     "judul": "",
        #     "abstrak": "",
        #     "tujuan": "",
        #     "keyword": [],
        #     "kategori": [],
        #     "kategori_mapset": ""
        # }
    }
    ai_suggest = send_metadata(ai_context)
    # ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
    # print(ai_suggest)
    response = {
        "message": "File berhasil dibaca dan dianalisis.",
        "file_name": filename,
        "file_type": ext,
        "rows": int(len(result)),
        "columns": list(map(str, result.columns)),
        "geometry_valid": int(valid_count),
        "geometry_empty": int(empty_count),
        "geometry_valid_percent": float(round(match_percentage, 2)),
        "geometry_type": geom_type,
        "warnings": warnings,
        "warning_rows": warning_safe,
        "preview": preview_safe,
        "metadata_suggest": ai_suggest
    }
    # return successRes(content=response)
    return response
 async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form(""), fileDesc: Optional[str] = Form("")):
    fname = file.filename
    ext = os.path.splitext(fname)[1].lower()
    contents = await file.read()
    size_mb = len(contents) / (1024*1024)
    if size_mb > MAX_FILE_MB:
        raise errorRes(status_code=413, message="Ukuran File Terlalu Besar")
    tmp_path = UPLOAD_FOLDER / fname
    with open(tmp_path, "wb") as f:
        f.write(contents)
    try:
        df = None
        print('ext', ext)
        if ext == ".csv":
            df = read_csv(str(tmp_path))
        elif ext == ".xlsx":
            df = read_csv(str(tmp_path), sheet)
        elif ext == ".mpk":
            df = read_mpk(str(tmp_path))
        elif ext == ".pdf":
            tbl = read_pdf(tmp_path, page)
            if len(tbl) == 0:
                res = {
                    "message": "Tidak ditemukan tabel valid pada halaman yang dipilih",
                    "tables": {},
                    "file_type": ext
                }
                return successRes(message="Tidak ditemukan tabel valid pada halaman yang dipilih", data=res)
            elif len(tbl) > 1:
                res = {
                    "message": "File berhasil dibaca dan dianalisis.",
                    "tables": tbl,
                    "file_type": ext
                }
                return successRes(data=res, message="File berhasil dibaca dan dianalisis.")
            else:
                df = convert_df(tbl[0])
        elif ext == ".zip":
            zip_type = detect_zip_type(str(tmp_path))
            if zip_type == "shp":
                print("[INFO] ZIP terdeteksi sebagai Shapefile.")
                df = read_shp(str(tmp_path))
            elif zip_type == "gdb":
                print("[INFO] ZIP terdeteksi sebagai Geodatabase (GDB).")
                df = read_gdb(str(tmp_path))
            else:
                return successRes(message="ZIP file tidak mengandung SHP / GDB valid.")
        else:
            raise errorRes(status_code=400, message="Unsupported file type")
        if df is None or (hasattr(df, "empty") and df.empty):
            return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
        res = process_data(df, ext, fname, fileDesc)
        tmp_path.unlink(missing_ok=True)
        return successRes(data=res)
    except Exception as e:
        print(f"[ERROR] {e}")
        return errorRes(
            message="Internal Server Error",
            details=str(e),
            status_code=500
        )
    # finally:
    #     db_session.close()
 class PdfRequest(BaseModel):
    title: str
    columns: List[str]
    rows: List[List]
    fileName: str
    fileDesc: str
 async def handle_process_pdf(payload: PdfRequest):
    try:
        df = convert_df(payload.model_dump())
        if df is None or (hasattr(df, "empty") and df.empty):
            return errorRes(message="Tidak ada tabel")
        res = process_data(df, '.pdf', payload.fileName, payload.fileDesc)        
        return successRes(data=res)
    except Exception as e:
        print(f"[ERROR] {e}")
        return errorRes(message="Internal Server Error", details= str(e), status_code=500)
    # finally:
    #     db_session.close()
 class UploadRequest(BaseModel):
    title: str
    rows: List[dict]
    columns: List[str]
    author: Dict[str, Any]
    style: str
 # generate _2 if exist
 async def generate_unique_table_name(base_name: str):
    base_name = base_name.lower().replace(" ", "_").replace("-", "_")
    table_name = base_name
    counter = 2
    async with engine.connect() as conn:
        while True:
            result = await conn.execute(
                text("SELECT to_regclass(:tname)"), 
                {"tname": table_name}
            )
            exists = result.scalar()
            if not exists:
                return table_name
            table_name = f"{base_name}_{counter}"
            counter += 1
 def str_to_date(raw_date: str):
    if raw_date:
        try:
            return datetime.strptime(raw_date, "%Y-%m-%d").date()
        except Exception as e:
            print("[WARNING] Tidak bisa parse dateCreated:", e)
            return None
 def generate_job_id(user_id: str) -> str:
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    return f"{user_id}_{timestamp}"
 def save_xml_to_sld(xml_string, filename):
    folder_path = 'style_temp'
    os.makedirs(folder_path, exist_ok=True)
    file_path = os.path.join(folder_path, f"{filename}.sld")
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(xml_string)
    return file_path
 async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
    try:
        table_name = await generate_unique_table_name(payload.title)
        # DataFrame
        df = pd.DataFrame(payload.rows)
        df.columns = [col.upper() for col in df.columns]
        if "GEOMETRY" not in df.columns:
            raise HTTPException(400, "Kolom GEOMETRY tidak ditemukan")
        # =====================================================================
        # 1. LOAD WKT → SHAPELY
        # =====================================================================
        def safe_load_wkt(g):
            if not isinstance(g, str):
                return None
            try:
                geom = wkt.loads(g)
                return geom
            except:
                return None
        df["GEOMETRY"] = df["GEOMETRY"].apply(safe_load_wkt)
        df = df.rename(columns={"GEOMETRY": "geom"})
        # =====================================================================
        # 2. DROP ROW geometry NULL
        # =====================================================================
        df = df[df["geom"].notnull()]
        if df.empty:
            raise HTTPException(400, "Semua geometry invalid atau NULL")
        # =====================================================================
        # 3. VALIDATE geometry (very important)
        # =====================================================================
        df["geom"] = df["geom"].apply(lambda g: g if g.is_valid else g.buffer(0))
        # =====================================================================
        # 4. SERAGAMKAN TIPE GEOMETRY (Polygon→MultiPolygon, Line→MultiLine)
        # =====================================================================
        def unify_geometry_type(g):
            gtype = g.geom_type.upper()
            if gtype == "POLYGON":
                return MultiPolygon([g])
            if gtype == "LINESTRING":
                return MultiLineString([g])
            return g  # sudah MULTI atau POINT
        df["geom"] = df["geom"].apply(unify_geometry_type)
        # =====================================================================
        # 5. DETEKSI CRS DARI METADATA / INPUT / DEFAULT
        # =====================================================================
        detected_crs = payload.author.get("crs")
        detected = payload.author.get("crs")
        print('crs', detected)
        if not detected_crs:
            detected_crs = "EPSG:4326"
        detected_crs = 'EPSG:4326'
        # Buat GeoDataFrame
        gdf = gpd.GeoDataFrame(df, geometry="geom", crs=detected_crs)
        row_count = len(gdf)
        # =====================================================================
        # 6. VERIFY CRS (SRID) VALID di PROJ / PostGIS
        # =====================================================================
        try:
            _ = gdf.to_crs(gdf.crs)   # test CRS valid
        except:
            raise HTTPException(400, f"CRS {detected_crs} tidak valid")
        # =====================================================================
        # 7. SIMPAN KE POSTGIS (synchronous)
        # =====================================================================
        await asyncio.to_thread(
            gdf.to_postgis,
            table_name,
            sync_engine,
            if_exists="replace",
            index=False
        )
        # =====================================================================
        # 8. ADD PRIMARY KEY (wajib untuk QGIS API)
        # =====================================================================
        async with engine.begin() as conn:
            await conn.execute(text(
                f'ALTER TABLE "{table_name}" ADD COLUMN _ID SERIAL PRIMARY KEY;'
            ))
        # =====================================================================
        # 9. SIMPAN METADATA (geom_type, author metadata)
        # =====================================================================
        unified_geom_type = list(gdf.geom_type.unique())
        author = payload.author
        async with engine.begin() as conn:
            await conn.execute(text("""
                INSERT INTO backend.author_metadata (
                    table_title,
                    dataset_title,
                    dataset_abstract,
                    keywords,
                    topic_category,
                    date_created,
                    dataset_status,
                    organization_name,
                    contact_person_name,
                    contact_email,
                    contact_phone,
                    geom_type,
                    user_id,
                    process,
                    geometry_count
                ) VALUES (
                    :table_title,
                    :dataset_title,
                    :dataset_abstract,
                    :keywords,
                    :topic_category,
                    :date_created,
                    :dataset_status,
                    :organization_name,
                    :contact_person_name,
                    :contact_email,
                    :contact_phone,
                    :geom_type,
                    :user_id,
                    :process,
                    :geometry_count
                )
            """), {
                "table_title": table_name,
                "dataset_title": payload.title,
                "dataset_abstract": author.get("abstract"),
                "keywords": author.get("keywords"),
                # "topic_category": author.get("topicCategory"),
                "topic_category": ", ".join(author.get("topicCategory")),
                "date_created": str_to_date(author.get("dateCreated")),
                "dataset_status": author.get("status"),
                "organization_name": author.get("organization"),
                "contact_person_name": author.get("contactName"),
                "contact_email": author.get("contactEmail"),
                "contact_phone": author.get("contactPhone"),
                "geom_type": json.dumps(unified_geom_type),
                "user_id": user_id,
                "process": 'CLEANSING',
                "geometry_count": row_count
            })
        # =====================================================================
        # 10. LOGGING
        # =====================================================================
        await log_activity(
            user_id=user_id,
            action_type="UPLOAD",
            action_title=f"Upload dataset {table_name}",
            details={"table_name": table_name, "rows": len(gdf)}
        )
        job_id = generate_job_id(str(user_id))
        result = {
            "job_id": job_id,
            "job_status": "wait",
            "table_name": table_name,
            "status": "success",
            "message": f"Tabel '{table_name}' berhasil dibuat.",
            "total_rows": len(gdf),
            "geometry_type": unified_geom_type,
            "crs": detected_crs,
            "metadata_uuid": ""
        }
        save_xml_to_sld(payload.style, job_id)
        # await report_progress(job_id, "upload", 20, "Upload selesai")
        # cleansing_data(table_name, job_id)
        cleansing = await query_cleansing_data(table_name)
        result['job_status'] = cleansing
        publish = await publish_layer(table_name, job_id)
        result['metadata_uuid'] = publish['uuid']
        mapset = {
            "name": payload.title,
            "description": author.get("abstract"),
            "scale": "1:25000",
            "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
            "category_id": author.get("mapsetCategory"),
            "data_status": "sementara",
            "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
            "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
            "layer_type": unified_geom_type[0],
            "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
            "layer_url": publish['geos_link'],
            "metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}",
            "coverage_level": "provinsi",
            "coverage_area": "kabupaten",
            "data_update_period": "Tahunan",
            "data_version": "2026",
            "is_popular": False,
            "is_active": True,
            "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
            "notes": "Mapset baru dibuat",
            "status_validation": "on_verification",
        }
        print("mapset data",mapset)
        await upload_to_main(mapset)
        return successRes(data=result)
    except Exception as e:
        await log_activity(
            user_id=user_id,
            action_type="ERROR",
            action_title="Upload gagal",
            details={"error": str(e)}
        )
        raise HTTPException(status_code=500, detail=str(e))
 # async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
 #     try:
 #         job_id = generate_job_id(str(user_id))
 #         result = {
 #             "job_id": job_id,
 #             "job_status": "done",
 #             "table_name": "just for test",
 #             "status": "success",
 #             "message": f"Tabel test berhasil dibuat.",
 #             "total_rows": 10,
 #             "geometry_type": "Polygon",
 #             "crs": "EPSG 4326",
 #             "metadata_uuid": "-"
 #         }
 #         mapset = {
 #             "name": "Resiko Letusan Gunung Arjuno",
 #             "description": "Testing Automation Upload",
 #             "scale": "1:25000",
 #             "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
 #             "category_id": "0196c80c-855f-77f9-abd0-0c8a30b8c2f5",
 #             "data_status": "sementara",
 #             "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
 #             "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
 #             "layer_type": "polygon",
 #             "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
 #             "layer_url": "http://192.168.60.24:8888/geoserver/wms?service=WMS&version=1.1.0&request=GetMap&layers=labai:risiko_letusan_gunung_arjuno_bromo&bbox=110.89528623700005,-8.780412043999945,116.26994997700001,-5.042971664999925&width=768&height=534&srs=EPSG:4326&styles=&format=application/openlayers",
 #             "metadata_url": "http://192.168.60.24:7777/geonetwork/srv/eng/catalog.search#/metadata/9e5e2f09-13ef-49b5-bb49-1cb12136f63b",
 #             "coverage_level": "provinsi",
 #             "coverage_area": "kabupaten",
 #             "data_update_period": "Tahunan",
 #             "data_version": "2026",
 #             "is_popular": False,
 #             "is_active": True,
 #             "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
 #             "notes": "Mapset baru dibuat",
 #             "status_validation": "on_verification",
 #         }
 #         await upload_to_main(mapset)
 #         return successRes(data=result)
 #     except Exception as e:
 #         print("errot", e)
 # ===================================
 # partition +VIEW
 # ===================================
 # Daftar prefix WKT yang valid
 # VALID_WKT_PREFIXES = ("POINT", "LINESTRING", "POLYGON", "MULTIPOLYGON", "MULTILINESTRING")
 def slugify(value: str) -> str:
    """Mengubah judul dataset jadi nama aman untuk VIEW"""
    return re.sub(r'[^a-zA-Z0-9]+', '_', value.lower()).strip('_')
 # Partition + VIEW
 # async def create_dataset_view_from_metadata(conn, metadata_id: int, user_id: int, title: str):
 #     norm_title = slugify(title)
 #     view_name = f"v_user_{user_id}_{norm_title}"
 #     base_table = f"test_partition_user_{user_id}"
 #     # Ambil daftar field
 #     result = await conn.execute(text("SELECT fields FROM dataset_metadata WHERE id=:mid"), {"mid": metadata_id})
 #     fields_json = result.scalar_one_or_none()
 #     base_columns = {"id", "user_id", "metadata_id", "geom"}
 #     columns_sql = ""
 #     field_list = []
 #     if fields_json:
 #         fields = json.loads(fields_json) if isinstance(fields_json, str) else fields_json
 #         field_list = fields
 #         for f in field_list:
 #             safe_col = slugify(f)
 #             alias_name = safe_col if safe_col not in base_columns else f"attr_{safe_col}"
 #             # CAST otomatis
 #             if safe_col in ["longitude", "latitude", "lon", "lat"]:
 #                 columns_sql += f", (p.attributes->>'{f}')::float AS {alias_name}"
 #             else:
 #                 columns_sql += f", p.attributes->>'{f}' AS {alias_name}"
 #     # Drop view lama
 #     await conn.execute(text(f"DROP VIEW IF EXISTS {view_name} CASCADE;"))
 #     # 🔥 Buat VIEW baru yang punya FID unik
 #     create_view_query = f"""
 #     CREATE OR REPLACE VIEW {view_name} AS
 #     SELECT 
 #         row_number() OVER() AS fid,        -- FID unik untuk QGIS
 #         p.id, 
 #         p.user_id, 
 #         p.metadata_id, 
 #         p.geom
 #         {columns_sql},
 #         m.title, 
 #         m.year, 
 #         m.description
 #     FROM {base_table} p
 #     JOIN dataset_metadata m ON m.id = p.metadata_id
 #     WHERE p.metadata_id = {metadata_id};
 #     """
 #     await conn.execute(text(create_view_query))
 #     # Register geometry untuk QGIS
 #     await conn.execute(text(f"DELETE FROM geometry_columns WHERE f_table_name = '{view_name}';"))
 #     await conn.execute(text(f"""
 #         INSERT INTO geometry_columns
 #         (f_table_schema, f_table_name, f_geometry_column, coord_dimension, srid, type)
 #         VALUES ('public', '{view_name}', 'geom', 2, 4326, 'GEOMETRY');
 #     """))
 #     print(f"[INFO] VIEW {view_name} dibuat dengan FID unik dan kompatibel dengan QGIS.")
 # async def handle_to_postgis(payload, engine, user_id: int = 3):
 #     """
 #     Menangani upload data spasial ke PostGIS (dengan partition per user).
 #     - Jika partisi belum ada, akan dibuat otomatis
 #     - Metadata dataset disimpan di tabel dataset_metadata
 #     - Data spasial dimasukkan ke tabel partisi (test_partition_user_{id})
 #     - VIEW otomatis dibuat untuk QGIS
 #     """
 #     try:
 #         df = pd.DataFrame(payload.rows)
 #         print(f"[INFO] Diterima {len(df)} baris data dari frontend.")
 #         # --- Validasi kolom geometry ---
 #         if "geometry" not in df.columns:
 #             raise errorRes(status_code=400, message="Kolom 'geometry' tidak ditemukan dalam data.")
 #         # --- Parsing geometry ke objek shapely ---
 #         df["geometry"] = df["geometry"].apply(
 #             lambda g: wkt.loads(g)
 #             if isinstance(g, str) and g.strip().upper().startswith(VALID_WKT_PREFIXES)
 #             else None
 #         )
 #         # --- Buat GeoDataFrame ---
 #         gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
 #         # --- Metadata info dari payload ---
 #         # dataset_title = getattr(payload, "dataset_title", None)
 #         # dataset_year = getattr(payload, "dataset_year", None)
 #         # dataset_desc = getattr(payload, "dataset_description", None)
 #         dataset_title = "hujan 2045"
 #         dataset_year = 2045
 #         dataset_desc = "test metadata"
 #         if not dataset_title:
 #             raise errorRes(status_code=400, detail="Field 'dataset_title' wajib ada untuk metadata.")
 #         async with engine.begin() as conn:
 #             fields = [col for col in df.columns if col != "geometry"]
 #             # 💾 1️⃣ Simpan Metadata Dataset
 #             print("[INFO] Menyimpan metadata dataset...")
 #             result = await conn.execute(
 #                 text("""
 #                     INSERT INTO dataset_metadata (user_id, title, year, description, fields, created_at)
 #                     VALUES (:user_id, :title, :year, :desc, :fields, :created_at)
 #                     RETURNING id;
 #                 """),
 #                 {
 #                     "user_id": user_id,
 #                     "title": dataset_title,
 #                     "year": dataset_year,
 #                     "desc": dataset_desc,
 #                     "fields": json.dumps(fields), 
 #                     "created_at": datetime.utcnow(),
 #                 },
 #             )
 #             metadata_id = result.scalar_one()
 #             print(f"[INFO] Metadata disimpan dengan ID {metadata_id}")
 #             # ⚙️ 2️⃣ Auto-create Partisi Jika Belum Ada
 #             print(f"[INFO] Memastikan partisi test_partition_user_{user_id} tersedia...")
 #             await conn.execute(
 #                 text(f"""
 #                 DO $$
 #                 BEGIN
 #                     IF NOT EXISTS (
 #                         SELECT 1 FROM pg_tables WHERE tablename = 'test_partition_user_{user_id}'
 #                     ) THEN
 #                         EXECUTE format('
 #                             CREATE TABLE test_partition_user_%s
 #                             PARTITION OF test_partition
 #                             FOR VALUES IN (%s);
 #                         ', {user_id}, {user_id});
 #                         EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_geom ON test_partition_user_%s USING GIST (geom);', {user_id}, {user_id});
 #                         EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_metadata ON test_partition_user_%s (metadata_id);', {user_id}, {user_id});
 #                     END IF;
 #                 END
 #                 $$;
 #                 """)
 #             )
 #             # 🧩 3️⃣ Insert Data Spasial ke Partisi
 #             print(f"[INFO] Memasukkan data ke test_partition_user_{user_id} ...")
 #             insert_count = 0
 #             for _, row in gdf.iterrows():
 #                 geom_wkt = row["geometry"].wkt if row["geometry"] is not None else None
 #                 attributes = row.drop(labels=["geometry"]).to_dict()
 #                 await conn.execute(
 #                     text("""
 #                         INSERT INTO test_partition (user_id, metadata_id, geom, attributes, created_at)
 #                         VALUES (:user_id, :metadata_id, ST_Force2D(ST_GeomFromText(:geom, 4326)),
 #                                 CAST(:attr AS jsonb), :created_at);
 #                     """),
 #                     {
 #                         "user_id": user_id,
 #                         "metadata_id": metadata_id,
 #                         "geom": geom_wkt,
 #                         "attr": json.dumps(attributes),
 #                         "created_at": datetime.utcnow(),
 #                     },
 #                 )
 #                 insert_count += 1
 #             # 🧩 4️⃣ Membuat VIEW untuk dataset baru di QGIS
 #             await create_dataset_view_from_metadata(conn, metadata_id, user_id, dataset_title)
 #         print(f"[INFO] ✅ Berhasil memasukkan {insert_count} baris ke partisi user_id={user_id} (metadata_id={metadata_id}).")
 #         return {
 #             "status": "success",
 #             "user_id": user_id,
 #             "metadata_id": metadata_id,
 #             "dataset_title": dataset_title,
 #             "inserted_rows": insert_count,
 #             "geometry_type": list(gdf.geom_type.unique()),
 #         }
 #     except Exception as e:
 #         print(f"[ERROR] Gagal upload ke PostGIS partition: {e}")
 #         raise errorRes(status_code=500, message="Gagal upload ke PostGIS partition", details=str(e))    
--- a/services/upload_file/utils/df_validation.py
+++ b/services/upload_file/utils/df_validation.py
@ -0,0 +1,70 @@
 import pandas as pd
 import geopandas as gpd
 from shapely import wkt
 from shapely.errors import WKTReadingError
 def process_dataframe_synchronous(df_input, tmp_file):
    """
    Fungsi ini berjalan di thread terpisah (CPU bound).
    Melakukan validasi, cleaning, dan export ke parquet.
    """
    # 1. Copy agar tidak mengubah data asli
    export_df = df_input.copy()
    # =========================================================================
    # TAHAP 1: SAFE WKT LOADING
    # =========================================================================
    def safe_load_wkt(raw):
        if not isinstance(raw, str):
            return None
        try:
            return wkt.loads(raw)
        except (WKTReadingError, Exception):
            return None # Return None jika WKT corrupt
    # Terapkan safe load
    export_df["geom"] = export_df["geometry"].apply(safe_load_wkt)
    # =========================================================================
    # TAHAP 2: FILTER NULL & INVALID GEOMETRY
    # =========================================================================
    # Hapus baris di mana konversi WKT gagal (None)
    export_df = export_df[export_df["geom"].notnull()]
    print("df", export_df)
    if export_df.empty:
        raise ValueError("Tidak ada data spasial valid yang ditemukan.")
    # Jadikan GeoDataFrame
    export_df = gpd.GeoDataFrame(export_df, geometry="geom")
    # =========================================================================
    # TAHAP 3: FIX TOPOLOGY (PENTING!)
    # =========================================================================
    # Cek validitas (misal: Polygon yang garisnya menabrak diri sendiri)
    # buffer(0) adalah trik standar GIS untuk memperbaiki topologi ringan
    export_df["geom"] = export_df["geom"].apply(
        lambda g: g.buffer(0) if not g.is_valid else g
    )
    # Hapus lagi jika setelah di-fix malah jadi kosong (jarang terjadi, tapi aman)
    export_df = export_df[~export_df["geom"].is_empty]
    # =========================================================================
    # TAHAP 4: FINALISASI (CRS & RENAME)
    # =========================================================================
    export_df = export_df.drop(columns=["geometry"]) # Buang kolom string WKT lama
    export_df = export_df.set_crs("EPSG:4326", allow_override=True)
    # Rename kolom atribut ke UPPERCASE, biarkan 'geom' lowercase
    # .strip() untuk membuang spasi hantu (" ID " -> "ID")
    export_df = export_df.rename(
        columns=lambda c: str(c).strip().upper() if c != "geom" else c
    )
    # Simpan ke Parquet
    export_df.to_parquet(tmp_file)
    return len(export_df)
 # --- Cara Pemanggilan di Async Function ---
 # await asyncio.to_thread(process_dataframe_synchronous, result, tmp_file)