update latest

This commit is contained in:
DmsAnhr 2026-02-10 08:54:35 +07:00
parent cc4d897862
commit c9c641e8e9
22 changed files with 2350 additions and 537 deletions

BIN
.DS_Store vendored

Binary file not shown.

2
.gitignore vendored
View File

@ -10,6 +10,7 @@ venv/
pdf/ pdf/
data_cache/ data_cache/
service_tmp/ service_tmp/
tmp/
testing/ testing/
test-ai/ test-ai/
uploads/ uploads/
@ -17,6 +18,7 @@ scrapp/
logs/ logs/
style_temp/ style_temp/
services/styles/ services/styles/
services/pipeline/
cleansing_func.sql cleansing_func.sql

View File

@ -112,9 +112,9 @@ async def job_callback(payload: dict):
geos_link = publish_layer_to_geoserver(table, job_id) geos_link = publish_layer_to_geoserver(table, job_id)
uuid = publish_metadata(table_name=table, geoserver_links=geos_link) uuid = await publish_metadata(table_name=table, geoserver_links=geos_link)
update_job_status(table, "FINISHED", job_id) await update_job_status(table, "FINISHED", job_id)
return { return {
"ok": True, "ok": True,
"uuid": uuid "uuid": uuid
@ -225,14 +225,17 @@ async def query_cleansing_data(
table_name: str table_name: str
): ):
try: try:
print("clean")
async with engine.begin() as conn: async with engine.begin() as conn:
await conn.execute( await conn.execute(
text("CALL pr_cleansing_satupeta_polygon(:table_name, NULL);"), text("CALL pr_cleansing_satupeta_polygon(:table_name, NULL);"),
{"table_name": table_name} {"table_name": table_name}
) )
print("clean-done")
return "done" return "done"
except SQLAlchemyError as e: except SQLAlchemyError as e:
print("clean-error", e)
raise RuntimeError(f"Fix geometry failed: {str(e)}") raise RuntimeError(f"Fix geometry failed: {str(e)}")
@ -242,20 +245,22 @@ async def publish_layer(table_name: str, job_id: str):
geos_link = publish_layer_to_geoserver(table_name, job_id) geos_link = publish_layer_to_geoserver(table_name, job_id)
uuid = publish_metadata( # uuid = await publish_metadata(
table_name=table_name, # table_name=table_name,
geoserver_links=geos_link # geoserver_links=geos_link
) # )
# await update_job_status(table_name, "FINISHED", job_id)
update_job_status(table_name, "FINISHED", job_id)
# return uuid # return uuid
return { return {
"geos_link": geos_link["layer_url"], "geos_link": geos_link["layer_url"],
"uuid": uuid # "uuid": uuid
"uuid": "123123"
} }
except Exception as e: except Exception as e:
update_job_status(table_name, "FAILED", job_id) await update_job_status(table_name, "FAILED", job_id)
raise RuntimeError(f"Publish layer gagal: {e}") from e raise RuntimeError(f"Publish layer gagal: {e}") from e

View File

@ -29,6 +29,7 @@ async def upload_file(payload: PdfRequest):
class UploadRequest(BaseModel): class UploadRequest(BaseModel):
title: str title: str
path: str
rows: List[dict] rows: List[dict]
columns: List[str] columns: List[str]
author: Dict[str, Any] author: Dict[str, Any]

View File

@ -11,6 +11,13 @@ SERVICE_KEY = os.getenv("SERVICE_KEY")
POSTGIS_URL = os.getenv("POSTGIS_URL") POSTGIS_URL = os.getenv("POSTGIS_URL")
POSTGIS_SYNC_URL = os.getenv("SYNC_URL") POSTGIS_SYNC_URL = os.getenv("SYNC_URL")
DB_DSN = os.getenv("DB_DSN")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASS = os.getenv("DB_PASS")
QGIS_URL = os.getenv("QGIS_API_URL") QGIS_URL = os.getenv("QGIS_API_URL")

View File

@ -1,7 +1,5 @@
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.ext.asyncio import create_async_engine from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from core.config import POSTGIS_URL, POSTGIS_SYNC_URL from core.config import POSTGIS_URL, POSTGIS_SYNC_URL
engine = create_async_engine(POSTGIS_URL, pool_pre_ping=True) engine = create_async_engine(POSTGIS_URL, pool_pre_ping=True)

25
main.py
View File

@ -1,16 +1,35 @@
from contextlib import asynccontextmanager
import asyncpg
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from core.config import API_VERSION, ALLOWED_ORIGINS from core.config import API_VERSION, ALLOWED_ORIGINS, DB_DSN
from api.routers.system_router import router as system_router from api.routers.system_router import router as system_router
from api.routers.upload_file_router import router as upload_router from api.routers.upload_file_router import router as upload_router
from api.routers.datasets_router import router as dataset_router from api.routers.datasets_router import router as dataset_router
from services.pipeline.api.router import router as pipeline_router
# from contextlib import asynccontextmanager # from contextlib import asynccontextmanager
# from utils.qgis_init import init_qgis # from utils.qgis_init import init_qgis
db_pool = None
@asynccontextmanager
async def lifespan(app: FastAPI):
# 1. Start: Buat Pool koneksi saat aplikasi nyala
global db_pool
print("Creating DB Pool...")
db_pool = await asyncpg.create_pool(DB_DSN, min_size=5, max_size=20)
yield
# 2. Shutdown: Tutup Pool saat aplikasi mati
print("Closing DB Pool...")
await db_pool.close()
app = FastAPI( app = FastAPI(
title="ETL Geo Upload Service", title="ETL Geo Upload Service",
version=API_VERSION, version=API_VERSION,
description="Upload Automation API" description="Upload Automation API",
lifespan=lifespan
) )
app.add_middleware( app.add_middleware(
@ -21,6 +40,7 @@ app.add_middleware(
allow_headers=["*"], allow_headers=["*"],
) )
# Base.metadata.create_all(bind=engine) # Base.metadata.create_all(bind=engine)
# qgis setup # qgis setup
@ -56,3 +76,4 @@ app.add_middleware(
app.include_router(system_router, tags=["System"]) app.include_router(system_router, tags=["System"])
app.include_router(upload_router, prefix="/upload", tags=["Upload"]) app.include_router(upload_router, prefix="/upload", tags=["Upload"])
app.include_router(dataset_router, prefix="/dataset", tags=["Upload"]) app.include_router(dataset_router, prefix="/dataset", tags=["Upload"])
app.include_router(pipeline_router)

View File

@ -0,0 +1,9 @@
\documentclass[preview]{standalone}
\usepackage[english]{babel}
\usepackage{amsmath}
\usepackage{amssymb}
\begin{document}
\begin{align*}
A = \pi r^2
\end{align*}
\end{document}

View File

@ -0,0 +1,54 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="854" height="480" viewBox="0 0 854 480">
<defs>
<g>
<g id="glyph-0-0">
<path d="M 0.214844 -0.242188 C 0.734375 -0.296875 1.0625 -0.394531 1.191406 -0.53125 C 1.320312 -0.667969 1.386719 -1.023438 1.386719 -1.601562 L 1.386719 -7.375 C 1.386719 -7.851562 1.3125 -8.164062 1.167969 -8.308594 C 1.023438 -8.449219 0.707031 -8.539062 0.214844 -8.574219 L 0.214844 -8.820312 L 3.917969 -8.820312 L 3.917969 -8.574219 C 3.429688 -8.539062 3.109375 -8.449219 2.960938 -8.308594 C 2.8125 -8.164062 2.742188 -7.855469 2.742188 -7.375 L 2.742188 -4.777344 L 6.785156 -4.777344 L 6.785156 -7.375 C 6.785156 -7.851562 6.710938 -8.164062 6.570312 -8.308594 C 6.425781 -8.449219 6.105469 -8.539062 5.613281 -8.574219 L 5.613281 -8.820312 L 9.316406 -8.820312 L 9.316406 -8.574219 C 8.824219 -8.539062 8.507812 -8.449219 8.363281 -8.308594 C 8.21875 -8.164062 8.144531 -7.855469 8.144531 -7.375 L 8.144531 -1.445312 C 8.144531 -0.960938 8.21875 -0.648438 8.363281 -0.511719 C 8.507812 -0.375 8.824219 -0.285156 9.316406 -0.242188 L 9.316406 0 L 5.613281 0 L 5.613281 -0.242188 C 6.136719 -0.292969 6.464844 -0.386719 6.589844 -0.527344 C 6.71875 -0.667969 6.785156 -1.023438 6.785156 -1.601562 L 6.785156 -4.191406 L 2.742188 -4.191406 L 2.742188 -1.445312 C 2.742188 -0.960938 2.816406 -0.648438 2.960938 -0.507812 C 3.109375 -0.367188 3.429688 -0.28125 3.917969 -0.242188 L 3.917969 0 L 0.214844 0 Z "/>
</g>
<g id="glyph-0-1">
<path d="M 3.820312 -3.605469 C 3.320312 -3.441406 2.910156 -3.257812 2.585938 -3.058594 C 1.960938 -2.671875 1.648438 -2.234375 1.648438 -1.746094 C 1.648438 -1.351562 1.777344 -1.058594 2.039062 -0.871094 C 2.207031 -0.75 2.394531 -0.691406 2.605469 -0.691406 C 2.890625 -0.691406 3.164062 -0.769531 3.425781 -0.929688 C 3.691406 -1.089844 3.820312 -1.296875 3.820312 -1.542969 Z M 0.488281 -1.296875 C 0.488281 -1.925781 0.804688 -2.449219 1.433594 -2.871094 C 1.832031 -3.132812 2.628906 -3.484375 3.820312 -3.933594 L 3.820312 -4.484375 C 3.820312 -4.929688 3.777344 -5.238281 3.691406 -5.410156 C 3.542969 -5.699219 3.238281 -5.847656 2.773438 -5.847656 C 2.550781 -5.847656 2.339844 -5.789062 2.140625 -5.675781 C 1.941406 -5.558594 1.84375 -5.398438 1.84375 -5.195312 C 1.84375 -5.144531 1.851562 -5.054688 1.875 -4.929688 C 1.898438 -4.808594 1.90625 -4.730469 1.90625 -4.695312 C 1.90625 -4.453125 1.828125 -4.28125 1.667969 -4.1875 C 1.574219 -4.128906 1.46875 -4.101562 1.339844 -4.101562 C 1.144531 -4.101562 0.996094 -4.164062 0.890625 -4.292969 C 0.789062 -4.421875 0.734375 -4.5625 0.734375 -4.71875 C 0.734375 -5.023438 0.921875 -5.339844 1.296875 -5.671875 C 1.671875 -6.003906 2.222656 -6.171875 2.949219 -6.171875 C 3.792969 -6.171875 4.363281 -5.898438 4.660156 -5.351562 C 4.820312 -5.050781 4.902344 -4.617188 4.902344 -4.042969 L 4.902344 -1.433594 C 4.902344 -1.179688 4.917969 -1.007812 4.953125 -0.910156 C 5.011719 -0.742188 5.128906 -0.65625 5.304688 -0.65625 C 5.40625 -0.65625 5.488281 -0.671875 5.554688 -0.703125 C 5.617188 -0.734375 5.730469 -0.808594 5.890625 -0.925781 L 5.890625 -0.585938 C 5.753906 -0.417969 5.601562 -0.277344 5.441406 -0.167969 C 5.199219 -0.00390625 4.953125 0.078125 4.699219 0.078125 C 4.40625 0.078125 4.191406 -0.015625 4.058594 -0.207031 C 3.925781 -0.398438 3.855469 -0.628906 3.839844 -0.890625 C 3.511719 -0.605469 3.230469 -0.394531 2.996094 -0.253906 C 2.601562 -0.0195312 2.222656 0.0976562 1.867188 0.0976562 C 1.496094 0.0976562 1.171875 -0.0351562 0.898438 -0.296875 C 0.625 -0.558594 0.488281 -0.890625 0.488281 -1.296875 Z "/>
</g>
<g id="glyph-0-2">
<path d="M 0.273438 -0.183594 C 0.675781 -0.222656 0.949219 -0.300781 1.09375 -0.425781 C 1.238281 -0.550781 1.308594 -0.792969 1.308594 -1.152344 L 1.308594 -7.511719 C 1.308594 -7.800781 1.285156 -7.996094 1.238281 -8.105469 C 1.152344 -8.289062 0.972656 -8.378906 0.710938 -8.378906 C 0.648438 -8.378906 0.582031 -8.371094 0.511719 -8.359375 C 0.441406 -8.347656 0.347656 -8.328125 0.242188 -8.300781 L 0.242188 -8.515625 C 0.828125 -8.671875 1.53125 -8.878906 2.355469 -9.140625 C 2.386719 -9.140625 2.40625 -9.128906 2.410156 -9.101562 C 2.417969 -9.074219 2.421875 -9.019531 2.421875 -8.933594 L 2.421875 -1.125 C 2.421875 -0.75 2.488281 -0.503906 2.617188 -0.394531 C 2.746094 -0.285156 3.015625 -0.210938 3.425781 -0.183594 L 3.425781 0 L 0.273438 0 Z "/>
</g>
<g id="glyph-0-3">
<path d="M 0.339844 -3.019531 C 0.339844 -3.882812 0.613281 -4.613281 1.160156 -5.210938 C 1.710938 -5.808594 2.417969 -6.105469 3.28125 -6.105469 C 4.140625 -6.105469 4.851562 -5.824219 5.417969 -5.261719 C 5.980469 -4.695312 6.261719 -3.945312 6.261719 -3.007812 C 6.261719 -2.144531 5.988281 -1.394531 5.441406 -0.753906 C 4.894531 -0.117188 4.1875 0.203125 3.320312 0.203125 C 2.488281 0.203125 1.78125 -0.105469 1.203125 -0.714844 C 0.625 -1.328125 0.339844 -2.097656 0.339844 -3.019531 Z M 3.097656 -5.714844 C 2.753906 -5.714844 2.457031 -5.601562 2.207031 -5.378906 C 1.773438 -4.984375 1.554688 -4.300781 1.554688 -3.332031 C 1.554688 -2.558594 1.730469 -1.839844 2.078125 -1.171875 C 2.429688 -0.503906 2.914062 -0.167969 3.535156 -0.167969 C 4.019531 -0.167969 4.394531 -0.394531 4.65625 -0.839844 C 4.921875 -1.285156 5.050781 -1.871094 5.050781 -2.597656 C 5.050781 -3.347656 4.886719 -4.054688 4.550781 -4.71875 C 4.214844 -5.382812 3.734375 -5.714844 3.097656 -5.714844 Z "/>
</g>
<g id="glyph-0-4">
<rect x="0" y="0" width="0" height="0" mask="url(#mask-0)"/>
</g>
<g id="glyph-0-5">
<path d="M 0.148438 -0.242188 C 0.699219 -0.296875 1.054688 -0.421875 1.210938 -0.613281 C 1.367188 -0.808594 1.445312 -1.257812 1.445312 -1.960938 L 1.445312 -7.375 C 1.445312 -7.859375 1.371094 -8.171875 1.21875 -8.316406 C 1.066406 -8.460938 0.71875 -8.546875 0.183594 -8.574219 L 0.183594 -8.820312 L 2.820312 -8.820312 L 5.90625 -2.097656 L 8.855469 -8.820312 L 11.511719 -8.820312 L 11.511719 -8.574219 C 11.015625 -8.539062 10.695312 -8.449219 10.554688 -8.304688 C 10.410156 -8.160156 10.339844 -7.847656 10.339844 -7.375 L 10.339844 -1.445312 C 10.339844 -0.960938 10.410156 -0.648438 10.554688 -0.511719 C 10.695312 -0.375 11.015625 -0.285156 11.511719 -0.242188 L 11.511719 0 L 7.773438 0 L 7.773438 -0.242188 C 8.3125 -0.285156 8.648438 -0.378906 8.777344 -0.53125 C 8.910156 -0.679688 8.976562 -1.039062 8.976562 -1.601562 L 8.976562 -7.589844 L 5.566406 0 L 5.382812 0 L 2.03125 -7.277344 L 2.03125 -1.960938 C 2.03125 -1.230469 2.136719 -0.753906 2.351562 -0.535156 C 2.488281 -0.390625 2.800781 -0.292969 3.28125 -0.242188 L 3.28125 0 L 0.148438 0 Z "/>
</g>
<g id="glyph-0-6">
<path d="M 0.242188 -0.183594 C 0.550781 -0.222656 0.765625 -0.296875 0.886719 -0.414062 C 1.011719 -0.527344 1.074219 -0.785156 1.074219 -1.183594 L 1.074219 -4.484375 C 1.074219 -4.761719 1.046875 -4.957031 0.996094 -5.070312 C 0.914062 -5.234375 0.746094 -5.320312 0.488281 -5.320312 C 0.449219 -5.320312 0.410156 -5.316406 0.367188 -5.3125 C 0.328125 -5.308594 0.277344 -5.300781 0.214844 -5.292969 L 0.214844 -5.519531 C 0.394531 -5.574219 0.8125 -5.707031 1.476562 -5.925781 L 2.089844 -6.125 C 2.121094 -6.125 2.136719 -6.117188 2.144531 -6.09375 C 2.152344 -6.070312 2.15625 -6.042969 2.15625 -6.003906 L 2.15625 -5.046875 C 2.554688 -5.417969 2.867188 -5.675781 3.09375 -5.8125 C 3.429688 -6.027344 3.78125 -6.132812 4.148438 -6.132812 C 4.441406 -6.132812 4.710938 -6.046875 4.953125 -5.878906 C 5.421875 -5.550781 5.65625 -4.960938 5.65625 -4.113281 L 5.65625 -1.074219 C 5.65625 -0.761719 5.71875 -0.535156 5.847656 -0.398438 C 5.972656 -0.257812 6.183594 -0.1875 6.476562 -0.183594 L 6.476562 0 L 3.699219 0 L 3.699219 -0.183594 C 4.015625 -0.226562 4.234375 -0.3125 4.363281 -0.445312 C 4.488281 -0.578125 4.550781 -0.867188 4.550781 -1.308594 L 4.550781 -4.089844 C 4.550781 -4.460938 4.480469 -4.769531 4.34375 -5.015625 C 4.203125 -5.261719 3.949219 -5.382812 3.574219 -5.382812 C 3.316406 -5.382812 3.058594 -5.296875 2.792969 -5.125 C 2.644531 -5.023438 2.453125 -4.859375 2.21875 -4.628906 L 2.21875 -0.984375 C 2.21875 -0.671875 2.289062 -0.460938 2.429688 -0.355469 C 2.566406 -0.25 2.785156 -0.191406 3.085938 -0.183594 L 3.085938 0 L 0.242188 0 Z "/>
</g>
<g id="glyph-0-7">
<path d="M 1.09375 -8.40625 C 1.09375 -8.59375 1.160156 -8.753906 1.289062 -8.886719 C 1.417969 -9.019531 1.578125 -9.089844 1.769531 -9.089844 C 1.957031 -9.089844 2.117188 -9.023438 2.25 -8.890625 C 2.382812 -8.757812 2.449219 -8.597656 2.449219 -8.40625 C 2.449219 -8.21875 2.382812 -8.058594 2.25 -7.925781 C 2.117188 -7.792969 1.957031 -7.726562 1.769531 -7.726562 C 1.578125 -7.726562 1.417969 -7.792969 1.289062 -7.925781 C 1.160156 -8.058594 1.09375 -8.21875 1.09375 -8.40625 Z M 0.261719 -0.183594 C 0.726562 -0.226562 1.019531 -0.304688 1.140625 -0.417969 C 1.261719 -0.535156 1.320312 -0.847656 1.320312 -1.355469 L 1.320312 -4.460938 C 1.320312 -4.742188 1.300781 -4.9375 1.261719 -5.046875 C 1.199219 -5.222656 1.0625 -5.3125 0.851562 -5.3125 C 0.804688 -5.3125 0.757812 -5.308594 0.710938 -5.300781 C 0.667969 -5.292969 0.535156 -5.257812 0.320312 -5.195312 L 0.320312 -5.398438 L 0.597656 -5.488281 C 1.359375 -5.734375 1.886719 -5.921875 2.1875 -6.046875 C 2.308594 -6.101562 2.386719 -6.125 2.421875 -6.125 C 2.429688 -6.09375 2.433594 -6.0625 2.433594 -6.027344 L 2.433594 -1.355469 C 2.433594 -0.859375 2.496094 -0.550781 2.613281 -0.421875 C 2.734375 -0.296875 3.003906 -0.21875 3.425781 -0.183594 L 3.425781 0 L 0.261719 0 Z "/>
</g>
<g id="glyph-0-8">
<path d="M 0.214844 -0.167969 C 0.554688 -0.199219 0.777344 -0.257812 0.890625 -0.339844 C 1.066406 -0.464844 1.152344 -0.714844 1.152344 -1.09375 L 1.152344 -4.460938 C 1.152344 -4.78125 1.109375 -4.992188 1.023438 -5.089844 C 0.941406 -5.191406 0.800781 -5.242188 0.605469 -5.242188 C 0.515625 -5.242188 0.445312 -5.238281 0.398438 -5.226562 C 0.355469 -5.21875 0.300781 -5.203125 0.242188 -5.183594 L 0.242188 -5.410156 L 0.710938 -5.566406 C 0.878906 -5.621094 1.15625 -5.726562 1.542969 -5.871094 C 1.929688 -6.019531 2.132812 -6.09375 2.15625 -6.09375 C 2.175781 -6.09375 2.191406 -6.082031 2.195312 -6.0625 C 2.199219 -6.039062 2.199219 -6 2.199219 -5.9375 L 2.199219 -5.058594 C 2.628906 -5.449219 3 -5.71875 3.3125 -5.867188 C 3.625 -6.019531 3.949219 -6.09375 4.277344 -6.09375 C 4.722656 -6.09375 5.082031 -5.941406 5.34375 -5.636719 C 5.484375 -5.472656 5.597656 -5.25 5.691406 -4.96875 C 6.011719 -5.292969 6.292969 -5.535156 6.53125 -5.691406 C 6.941406 -5.960938 7.363281 -6.09375 7.792969 -6.09375 C 8.492188 -6.09375 8.957031 -5.808594 9.191406 -5.242188 C 9.328125 -4.921875 9.394531 -4.410156 9.394531 -3.71875 L 9.394531 -1.015625 C 9.394531 -0.707031 9.460938 -0.496094 9.597656 -0.386719 C 9.734375 -0.277344 9.980469 -0.203125 10.339844 -0.167969 L 10.339844 0 L 7.402344 0 L 7.402344 -0.183594 C 7.78125 -0.21875 8.027344 -0.292969 8.148438 -0.410156 C 8.265625 -0.527344 8.328125 -0.765625 8.328125 -1.125 L 8.328125 -3.933594 C 8.328125 -4.355469 8.28125 -4.664062 8.191406 -4.863281 C 8.03125 -5.21875 7.714844 -5.398438 7.246094 -5.398438 C 6.964844 -5.398438 6.683594 -5.304688 6.40625 -5.117188 C 6.246094 -5.007812 6.046875 -4.835938 5.8125 -4.597656 L 5.8125 -1.261719 C 5.8125 -0.910156 5.875 -0.644531 6 -0.460938 C 6.125 -0.28125 6.382812 -0.183594 6.785156 -0.167969 L 6.785156 0 L 3.796875 0 L 3.796875 -0.167969 C 4.207031 -0.222656 4.46875 -0.320312 4.582031 -0.46875 C 4.695312 -0.617188 4.753906 -0.980469 4.753906 -1.554688 L 4.753906 -3.378906 C 4.753906 -4.046875 4.710938 -4.507812 4.621094 -4.757812 C 4.480469 -5.183594 4.175781 -5.398438 3.710938 -5.398438 C 3.445312 -5.398438 3.1875 -5.324219 2.929688 -5.179688 C 2.671875 -5.035156 2.449219 -4.84375 2.253906 -4.609375 L 2.253906 -1.046875 C 2.253906 -0.71875 2.308594 -0.492188 2.425781 -0.363281 C 2.539062 -0.238281 2.789062 -0.171875 3.175781 -0.167969 L 3.175781 0 L 0.214844 0 Z "/>
</g>
<g id="glyph-0-9">
<path d="M 2.433594 0.144531 C 2.246094 0.144531 2.082031 0.078125 1.941406 -0.0507812 C 1.796875 -0.183594 1.726562 -0.347656 1.726562 -0.554688 C 1.726562 -0.75 1.792969 -0.917969 1.933594 -1.0625 C 2.074219 -1.203125 2.242188 -1.277344 2.449219 -1.277344 C 2.65625 -1.277344 2.828125 -1.203125 2.96875 -1.058594 C 3.109375 -0.914062 3.175781 -0.746094 3.175781 -0.554688 C 3.175781 -0.363281 3.105469 -0.199219 2.960938 -0.0625 C 2.820312 0.0742188 2.644531 0.144531 2.433594 0.144531 Z M 3.144531 -7.949219 C 3.144531 -7.871094 3.140625 -7.792969 3.136719 -7.71875 C 3.132812 -7.640625 3.125 -7.550781 3.113281 -7.449219 L 2.820312 -4.980469 L 2.511719 -2.34375 L 2.34375 -2.34375 L 2.21875 -3.777344 C 2.183594 -4.183594 2.089844 -4.964844 1.933594 -6.113281 C 1.792969 -7.117188 1.726562 -7.722656 1.726562 -7.929688 C 1.726562 -8.199219 1.777344 -8.445312 1.882812 -8.671875 C 1.984375 -8.898438 2.179688 -9.011719 2.460938 -9.011719 C 2.75 -9.011719 2.953125 -8.855469 3.058594 -8.546875 C 3.117188 -8.386719 3.144531 -8.1875 3.144531 -7.949219 Z "/>
</g>
</g>
<image id="source-9" x="0" y="0" width="0" height="0"/>
<mask id="mask-0">
<use xlink:href="#source-9"/>
</mask>
</defs>
<g fill="rgb(100%, 100%, 100%)" fill-opacity="1">
<use xlink:href="#glyph-0-0" x="30" y="30"/>
<use xlink:href="#glyph-0-1" x="40" y="30"/>
<use xlink:href="#glyph-0-2" x="46" y="30"/>
<use xlink:href="#glyph-0-3" x="50" y="30"/>
<use xlink:href="#glyph-0-4" x="57" y="30"/>
<use xlink:href="#glyph-0-5" x="60" y="30"/>
<use xlink:href="#glyph-0-1" x="72" y="30"/>
<use xlink:href="#glyph-0-6" x="78" y="30"/>
<use xlink:href="#glyph-0-7" x="85" y="30"/>
<use xlink:href="#glyph-0-8" x="89" y="30"/>
<use xlink:href="#glyph-0-9" x="99" y="30"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 13 KiB

View File

@ -18,3 +18,5 @@ asyncpg
psycopg2 psycopg2
python-multipart==0.0.22 python-multipart==0.0.22
pyarrow==21.0.0 pyarrow==21.0.0
subprocess
openpyxl

BIN
services/.DS_Store vendored

Binary file not shown.

View File

@ -1,9 +1,9 @@
from datetime import datetime from datetime import datetime
from sqlalchemy import text from sqlalchemy import text
from database.connection import sync_engine from database.connection import engine
from utils.logger_config import log_activity from utils.logger_config import log_activity
def update_job_status(table_name: str, status: str, job_id: str = None): async def update_job_status(table_name: str, status: str, job_id: str = None):
query = text(""" query = text("""
UPDATE backend.author_metadata UPDATE backend.author_metadata
SET process = :status, SET process = :status,
@ -17,12 +17,7 @@ def update_job_status(table_name: str, status: str, job_id: str = None):
"table_name": table_name "table_name": table_name
} }
with sync_engine.begin() as conn: async with engine.begin() as conn:
conn.execute(query, params) await conn.execute(query, params)
print(f"[DB] Metadata '{table_name}' updated to status '{status}'") print(f"[DB] Metadata '{table_name}' updated to status '{status}'")

View File

@ -2,7 +2,7 @@ from fastapi import HTTPException
import requests import requests
from sqlalchemy import text from sqlalchemy import text
from core.config import GEONETWORK_PASS, GEONETWORK_URL, GEONETWORK_USER from core.config import GEONETWORK_PASS, GEONETWORK_URL, GEONETWORK_USER
from database.connection import sync_engine as engine from database.connection import engine
from datetime import datetime from datetime import datetime
from uuid import uuid4 from uuid import uuid4
import re import re
@ -54,7 +54,7 @@ def fix_xml_urls(xml: str) -> str:
def get_extent(table_name: str): async def get_extent(table_name: str):
sql = f""" sql = f"""
SELECT SELECT
@ -62,24 +62,22 @@ def get_extent(table_name: str):
ST_XMax(extent), ST_YMax(extent) ST_XMax(extent), ST_YMax(extent)
FROM ( FROM (
SELECT ST_Extent(geom) AS extent SELECT ST_Extent(geom) AS extent
FROM public.{table_name} FROM public."{table_name}"
) AS box; ) AS box;
""" """
conn = engine.connect() async with engine.connect() as conn:
try: result = await conn.execute(sql)
row = conn.execute(text(sql)).fetchone() row = result.fetchone()
finally:
conn.close()
if not row or row[0] is None: if not row or row[0] is None:
return None return None
# return { # return {
# "xmin": float(row[0]), # "xmin": row[0],
# "ymin": float(row[1]), # "ymin": row[1],
# "xmax": float(row[2]), # "xmax": row[2],
# "ymax": float(row[3]) # "ymax": row[3]
# } # }
return { return {
@ -89,7 +87,7 @@ def get_extent(table_name: str):
"ymax": -5.4819 # north "ymax": -5.4819 # north
} }
def get_author_metadata(table_name: str): async def get_author_metadata(table_name: str):
sql = """ sql = """
SELECT am.table_title, am.dataset_title, am.dataset_abstract, am.keywords, am.date_created, SELECT am.table_title, am.dataset_title, am.dataset_abstract, am.keywords, am.date_created,
@ -106,15 +104,14 @@ def get_author_metadata(table_name: str):
LIMIT 1 LIMIT 1
""" """
conn = engine.connect() async with engine.connect() as conn:
try: result = await conn.execute(sql, {"table": table_name})
row = conn.execute(text(sql), {"table": table_name}).fetchone() row = result.fetchone()
finally:
conn.close()
if not row: if not row:
raise Exception(f"Tidak ada metadata untuk tabel: {table_name}") raise Exception(f"Tidak ada metadata untuk tabel: {table_name}")
# SQLAlchemy Async row support ._mapping untuk convert ke dict
return dict(row._mapping) return dict(row._mapping)
@ -628,10 +625,10 @@ def upload_metadata_to_geonetwork(xml_metadata: str):
def publish_metadata(table_name: str, geoserver_links: dict): async def publish_metadata(table_name: str, geoserver_links: dict):
extent = get_extent(table_name) extent = await get_extent(table_name)
meta = get_author_metadata(table_name) meta = await get_author_metadata(table_name)
xml = generate_metadata_xml( xml = generate_metadata_xml(
table_name=meta["dataset_title"], table_name=meta["dataset_title"],
meta=meta, meta=meta,

View File

@ -2,7 +2,7 @@ from fastapi import HTTPException
import requests import requests
from sqlalchemy import text from sqlalchemy import text
from core.config import GEONETWORK_PASS, GEONETWORK_URL, GEONETWORK_USER from core.config import GEONETWORK_PASS, GEONETWORK_URL, GEONETWORK_USER
from database.connection import sync_engine as engine from database.connection import engine
from datetime import datetime from datetime import datetime
from uuid import uuid4 from uuid import uuid4
import re import re
@ -54,7 +54,7 @@ def fix_xml_urls(xml: str) -> str:
def get_extent(table_name: str): async def get_extent(table_name: str):
sql = f""" sql = f"""
SELECT SELECT
@ -62,24 +62,22 @@ def get_extent(table_name: str):
ST_XMax(extent), ST_YMax(extent) ST_XMax(extent), ST_YMax(extent)
FROM ( FROM (
SELECT ST_Extent(geom) AS extent SELECT ST_Extent(geom) AS extent
FROM public.{table_name} FROM public."{table_name}"
) AS box; ) AS box;
""" """
conn = engine.connect() async with engine.connect() as conn:
try: result = await conn.execute(sql)
row = conn.execute(text(sql)).fetchone() row = result.fetchone()
finally:
conn.close()
if not row or row[0] is None: if not row or row[0] is None:
return None return None
# return { # return {
# "xmin": float(row[0]), # "xmin": row[0],
# "ymin": float(row[1]), # "ymin": row[1],
# "xmax": float(row[2]), # "xmax": row[2],
# "ymax": float(row[3]) # "ymax": row[3]
# } # }
return { return {
@ -89,7 +87,7 @@ def get_extent(table_name: str):
"ymax": -5.4819 # north "ymax": -5.4819 # north
} }
def get_author_metadata(table_name: str): async def get_author_metadata(table_name: str):
sql = """ sql = """
SELECT am.table_title, am.dataset_title, am.dataset_abstract, am.keywords, am.date_created, SELECT am.table_title, am.dataset_title, am.dataset_abstract, am.keywords, am.date_created,
@ -106,15 +104,14 @@ def get_author_metadata(table_name: str):
LIMIT 1 LIMIT 1
""" """
conn = engine.connect() async with engine.connect() as conn:
try: result = await conn.execute(sql, {"table": table_name})
row = conn.execute(text(sql), {"table": table_name}).fetchone() row = result.fetchone()
finally:
conn.close()
if not row: if not row:
raise Exception(f"Tidak ada metadata untuk tabel: {table_name}") raise Exception(f"Tidak ada metadata untuk tabel: {table_name}")
# SQLAlchemy Async row support ._mapping untuk convert ke dict
return dict(row._mapping) return dict(row._mapping)
@ -626,10 +623,10 @@ def upload_metadata_to_geonetwork(xml_metadata: str):
def publish_metadata(table_name: str, geoserver_links: dict): async def publish_metadata(table_name: str, geoserver_links: dict):
extent = get_extent(table_name) extent = await get_extent(table_name)
meta = get_author_metadata(table_name) meta = await get_author_metadata(table_name)
xml = generate_metadata_xml( xml = generate_metadata_xml(
table_name=meta["dataset_title"], table_name=meta["dataset_title"],
meta=meta, meta=meta,

Binary file not shown.

View File

@ -32,10 +32,96 @@ def detect_delimiter(path, sample_size=2048):
return ',' return ','
# def read_csv(path: str, sheet: str = None):
# ext = os.path.splitext(path)[1].lower()
# try:
# if ext in ['.csv']:
# header_line = detect_header_line(path)
# delimiter = detect_delimiter(path)
# print(f"[INFO] Detected header line: {header_line + 1}, delimiter: '{delimiter}'")
# df = pd.read_csv(
# path,
# header=header_line,
# sep=delimiter,
# encoding='utf-8',
# low_memory=False,
# thousands=','
# )
# elif ext in ['.xlsx', '.xls']:
# print(f"[INFO] Membaca file Excel: {os.path.basename(path)}")
# xls = pd.ExcelFile(path)
# print(f"[INFO] Ditemukan {len(xls.sheet_names)} sheet: {xls.sheet_names}")
# if sheet:
# if sheet not in xls.sheet_names:
# raise ValueError(f"Sheet '{sheet}' tidak ditemukan dalam file {os.path.basename(path)}")
# print(f"[INFO] Membaca sheet yang ditentukan: '{sheet}'")
# df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str)
# df = df.dropna(how='all').dropna(axis=1, how='all')
# else:
# print("[INFO] Tidak ada sheet yang ditentukan, mencari sheet paling relevan...")
# best_sheet = None
# best_score = -1
# best_df = None
# for sheet_name in xls.sheet_names:
# try:
# temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str)
# temp_df = temp_df.dropna(how='all').dropna(axis=1, how='all')
# if len(temp_df) == 0 or len(temp_df.columns) < 2:
# continue
# # hitung skor relevansi
# text_ratio = temp_df.applymap(lambda x: isinstance(x, str)).sum().sum() / (temp_df.size or 1)
# row_score = len(temp_df)
# score = (row_score * 0.7) + (text_ratio * 100)
# if score > best_score:
# best_score = score
# best_sheet = sheet_name
# best_df = temp_df
# except Exception as e:
# print(f"[WARN] Gagal membaca sheet {sheet_name}: {e}")
# continue
# if best_df is not None:
# print(f"[INFO] Sheet terpilih: '{best_sheet}' dengan skor {best_score:.2f}")
# df = best_df
# else:
# raise ValueError("Tidak ada sheet valid yang dapat dibaca.")
# for col in df.columns:
# if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
# df[col] = df[col].astype(str).str.replace(',', '', regex=False)
# df[col] = pd.to_numeric(df[col], errors='ignore')
# else:
# raise ValueError("Format file tidak dikenali (hanya .csv, .xlsx, .xls)")
# except Exception as e:
# print(f"[WARN] Gagal membaca file ({e}), fallback ke default reader.")
# df = pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
# df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]
# df.columns = [str(c).strip() for c in df.columns]
# df = df.dropna(how='all')
# return df
def read_csv(path: str, sheet: str = None): def read_csv(path: str, sheet: str = None):
ext = os.path.splitext(path)[1].lower() ext = os.path.splitext(path)[1].lower()
df = pd.DataFrame() # Inisialisasi default
try: try:
# --- BLOK PEMBACAAN FILE ---
if ext in ['.csv']: if ext in ['.csv']:
header_line = detect_header_line(path) header_line = detect_header_line(path)
delimiter = detect_delimiter(path) delimiter = detect_delimiter(path)
@ -52,17 +138,19 @@ def read_csv(path: str, sheet: str = None):
elif ext in ['.xlsx', '.xls']: elif ext in ['.xlsx', '.xls']:
print(f"[INFO] Membaca file Excel: {os.path.basename(path)}") print(f"[INFO] Membaca file Excel: {os.path.basename(path)}")
xls = pd.ExcelFile(path) xls = pd.ExcelFile(path, engine='openpyxl') # Pakai engine openpyxl
print(f"[INFO] Ditemukan {len(xls.sheet_names)} sheet: {xls.sheet_names}") print(f"[INFO] Ditemukan {len(xls.sheet_names)} sheet: {xls.sheet_names}")
if sheet: if sheet:
if sheet not in xls.sheet_names: if sheet not in xls.sheet_names:
raise ValueError(f"Sheet '{sheet}' tidak ditemukan dalam file {os.path.basename(path)}") raise ValueError(f"Sheet '{sheet}' tidak ditemukan.")
print(f"[INFO] Membaca sheet yang ditentukan: '{sheet}'") print(f"[INFO] Membaca sheet yang ditentukan: '{sheet}'")
df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str) # Tambahkan engine='openpyxl'
df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str, engine='openpyxl')
df = df.dropna(how='all').dropna(axis=1, how='all') df = df.dropna(how='all').dropna(axis=1, how='all')
else: else:
# Logika pencarian sheet terbaik (tidak berubah, hanya indentasi)
print("[INFO] Tidak ada sheet yang ditentukan, mencari sheet paling relevan...") print("[INFO] Tidak ada sheet yang ditentukan, mencari sheet paling relevan...")
best_sheet = None best_sheet = None
best_score = -1 best_score = -1
@ -70,13 +158,12 @@ def read_csv(path: str, sheet: str = None):
for sheet_name in xls.sheet_names: for sheet_name in xls.sheet_names:
try: try:
temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str) temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str, engine='openpyxl')
temp_df = temp_df.dropna(how='all').dropna(axis=1, how='all') temp_df = temp_df.dropna(how='all').dropna(axis=1, how='all')
if len(temp_df) == 0 or len(temp_df.columns) < 2: if len(temp_df) == 0 or len(temp_df.columns) < 2:
continue continue
# hitung skor relevansi
text_ratio = temp_df.applymap(lambda x: isinstance(x, str)).sum().sum() / (temp_df.size or 1) text_ratio = temp_df.applymap(lambda x: isinstance(x, str)).sum().sum() / (temp_df.size or 1)
row_score = len(temp_df) row_score = len(temp_df)
score = (row_score * 0.7) + (text_ratio * 100) score = (row_score * 0.7) + (text_ratio * 100)
@ -85,7 +172,6 @@ def read_csv(path: str, sheet: str = None):
best_score = score best_score = score
best_sheet = sheet_name best_sheet = sheet_name
best_df = temp_df best_df = temp_df
except Exception as e: except Exception as e:
print(f"[WARN] Gagal membaca sheet {sheet_name}: {e}") print(f"[WARN] Gagal membaca sheet {sheet_name}: {e}")
continue continue
@ -96,21 +182,47 @@ def read_csv(path: str, sheet: str = None):
else: else:
raise ValueError("Tidak ada sheet valid yang dapat dibaca.") raise ValueError("Tidak ada sheet valid yang dapat dibaca.")
for col in df.columns:
if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
df[col] = df[col].astype(str).str.replace(',', '', regex=False)
df[col] = pd.to_numeric(df[col], errors='ignore')
else: else:
raise ValueError("Format file tidak dikenali (hanya .csv, .xlsx, .xls)") raise ValueError("Format file tidak dikenali (hanya .csv, .xlsx, .xls)")
# --- BLOK PEMBERSIHAN (Dilakukan setelah file sukses terbaca) ---
# Kita bungkus ini agar error konversi angka TIDAK menggagalkan pembacaan file
if not df.empty:
df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]
df.columns = [str(c).strip() for c in df.columns]
df = df.dropna(how='all')
# Konversi Angka yang Lebih Aman
for col in df.columns:
try:
# Cek apakah kolom terlihat seperti angka
if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
# Bersihkan koma
clean_col = df[col].astype(str).str.replace(',', '', regex=False)
# Gunakan errors='coerce' agar jika ada error value (NaN/REF), dia jadi NaN, bukan crash
df[col] = pd.to_numeric(clean_col, errors='coerce')
except Exception as ex:
# Jika konversi gagal, biarkan sebagai string/object dan lanjut ke kolom berikutnya
print(f"[WARN] Gagal konversi numerik pada kolom '{col}': {ex}")
pass
return df
except Exception as e: except Exception as e:
print(f"[WARN] Gagal membaca file ({e}), fallback ke default reader.") # --- ERROR HANDLING YANG BENAR ---
df = pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',') print(f"[WARN] Gagal membaca file utama ({e}).")
df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')] # Hanya lakukan fallback CSV jika file aslinya MEMANG CSV (atau txt)
df.columns = [str(c).strip() for c in df.columns] # Jangan paksa baca .xlsx pakai read_csv
df = df.dropna(how='all') if ext in ['.csv', '.txt']:
print("[INFO] Mencoba fallback ke default CSV reader...")
try:
return pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
except Exception as e2:
print(f"[ERROR] Fallback CSV juga gagal: {e2}")
# Jika file Excel gagal dibaca, return DataFrame kosong atau raise error
print("[ERROR] Tidak dapat memulihkan pembacaan file Excel.")
return pd.DataFrame()
return df

View File

@ -1,7 +1,7 @@
import re import re
import pdfplumber import pdfplumber
import pandas as pd import pandas as pd
from services.upload_file.utils.pdf_cleaner import get_number_column_index, get_start_end_number, normalize_number_column, row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column from services.upload_file.readers.utils.pdf_cleaner import get_number_column_index, get_start_end_number, normalize_number_column, row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column
from services.upload_file.upload_exceptions import PDFReadError from services.upload_file.upload_exceptions import PDFReadError
from utils.logger_config import setup_logger from utils.logger_config import setup_logger

View File

@ -1,5 +1,9 @@
import json import json
import os import os
import random
import subprocess
import uuid
import asyncpg
import pandas as pd import pandas as pd
import geopandas as gpd import geopandas as gpd
import numpy as np import numpy as np
@ -12,13 +16,14 @@ from shapely.geometry.base import BaseGeometry
from shapely.geometry import base as shapely_base from shapely.geometry import base as shapely_base
from fastapi import Depends, File, Form, UploadFile, HTTPException from fastapi import Depends, File, Form, UploadFile, HTTPException
from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main
from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES, GEONETWORK_URL from core.config import DB_DSN, DB_HOST, DB_NAME, DB_PASS, DB_PORT, DB_USER, UPLOAD_FOLDER, MAX_FILE_MB, GEONETWORK_URL
from services.upload_file.ai_generate import send_metadata from services.upload_file.ai_generate import send_metadata
from services.upload_file.readers.reader_csv import read_csv from services.upload_file.readers.reader_csv import read_csv
from services.upload_file.readers.reader_shp import read_shp from services.upload_file.readers.reader_shp import read_shp
from services.upload_file.readers.reader_gdb import read_gdb from services.upload_file.readers.reader_gdb import read_gdb
from services.upload_file.readers.reader_mpk import read_mpk from services.upload_file.readers.reader_mpk import read_mpk
from services.upload_file.readers.reader_pdf import convert_df, read_pdf from services.upload_file.readers.reader_pdf import convert_df, read_pdf
from services.upload_file.utils.df_validation import process_dataframe_synchronous
from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto
from database.connection import engine, sync_engine from database.connection import engine, sync_engine
from pydantic import BaseModel from pydantic import BaseModel
@ -71,155 +76,13 @@ def detect_zip_type(zip_path: str) -> str:
return "unknown" return "unknown"
# def detect_zip_type(zip_path: str) -> str:
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# files = zip_ref.namelist()
# # ------------------------------------------------------------- async def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
# # 1) DETECT FileGDB
# # -------------------------------------------------------------
# is_gdb = (
# any(".gdb/" in f.lower() for f in files)
# or any(f.lower().endswith(ext) for ext in
# [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files)
# )
# if is_gdb:
# print("\n[INFO] ZIP terdeteksi berisi FileGDB.")
# with tempfile.TemporaryDirectory() as temp_dir:
# # extract ZIP
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# zip_ref.extractall(temp_dir)
# # find folder *.gdb
# gdb_path = None
# for root, dirs, _ in os.walk(temp_dir):
# for d in dirs:
# if d.lower().endswith(".gdb"):
# gdb_path = os.path.join(root, d)
# break
# if not gdb_path:
# print("[ERROR] Folder .gdb tidak ditemukan.")
# return "gdb"
# print(f"[INFO] GDB Path: {gdb_path}")
# # Cari seluruh file .gdbtable
# table_files = [
# os.path.join(gdb_path, f)
# for f in os.listdir(gdb_path)
# if f.lower().endswith(".gdbtable")
# ]
# if not table_files:
# print("[ERROR] Tidak ada file .gdbtable ditemukan.")
# return "gdb"
# # Scan semua table file untuk mencari SpatialReference
# found_crs = False
# for table_file in table_files:
# try:
# with open(table_file, "rb") as f:
# raw = f.read(15000) # baca awal file, cukup untuk header JSON
# text = raw.decode("utf-8", errors="ignore")
# start = text.find("{")
# end = text.rfind("}") + 1
# if start == -1 or end == -1:
# continue
# json_str = text[start:end]
# meta = json.loads(json_str)
# spatial_ref = meta.get("SpatialReference")
# if not spatial_ref:
# continue
# wkt = spatial_ref.get("WKT")
# if not wkt:
# continue
# print(f"[FOUND] CRS metadata pada: {os.path.basename(table_file)}")
# print(f"[CRS WKT] {wkt[:200]}...")
# # Convert to EPSG
# try:
# epsg = CRS.from_wkt(wkt).to_epsg()
# print(f"[EPSG] {epsg}")
# except:
# print("[EPSG] Tidak ditemukan EPSG.")
# found_crs = True
# break
# except Exception:
# continue
# if not found_crs:
# print("[WARNING] Tidak ditemukan CRS di file .gdbtable manapun.")
# return "gdb"
# # -----------------------------------------------------
# # 2. DETEKSI SHP
# # -----------------------------------------------------
# if any(f.lower().endswith(".shp") for f in files):
# print("\n[INFO] ZIP terdeteksi berisi SHP.")
# # cari file .prj
# prj_files = [f for f in files if f.lower().endswith(".prj")]
# if not prj_files:
# print("[WARNING] Tidak ada file .prj → CRS tidak diketahui.")
# return "shp"
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# with tempfile.TemporaryDirectory() as temp_dir:
# prj_path = os.path.join(temp_dir, os.path.basename(prj_files[0]))
# zip_ref.extract(prj_files[0], temp_dir)
# # baca isi prj
# with open(prj_path, "r") as f:
# prj_text = f.read()
# try:
# crs = CRS.from_wkt(prj_text)
# print(f"[CRS WKT] {crs.to_wkt()[:200]}...")
# epsg = crs.to_epsg()
# if epsg:
# print(f"[EPSG] {epsg}")
# else:
# print("[EPSG] Tidak ditemukan dalam database EPSG.")
# except Exception as e:
# print("[ERROR] Gagal membaca CRS dari file PRJ:", e)
# return "shp"
# # -----------------------------------------------------
# # 3. UNKNOWN
# # -----------------------------------------------------
# return "unknown"
def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
result = detect_and_build_geometry(df, master_polygons=None) result = detect_and_build_geometry(df, master_polygons=None)
if not hasattr(result, "geometry") or result.geometry.isna().all(): if not hasattr(result, "geometry") or result.geometry.isna().all():
result = attach_polygon_geometry_auto(result) result = attach_polygon_geometry_auto(result)
# if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
# geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
# if not result.empty else "None"
# null_geom = result.geometry.isna().sum()
def normalize_geom_type(geom_type): def normalize_geom_type(geom_type):
if geom_type.startswith("Multi"): if geom_type.startswith("Multi"):
return geom_type.replace("Multi", "") return geom_type.replace("Multi", "")
@ -295,18 +158,12 @@ def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
"tipe_data_spasial": geom_type, "tipe_data_spasial": geom_type,
"deskripsi_singkat": fileDesc, "deskripsi_singkat": fileDesc,
"struktur_atribut_data": {}, "struktur_atribut_data": {},
# "metadata": {
# "judul": "",
# "abstrak": "",
# "tujuan": "",
# "keyword": [],
# "kategori": [],
# "kategori_mapset": ""
# }
} }
ai_suggest = send_metadata(ai_context) ai_suggest = send_metadata(ai_context)
# ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'} # ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
# print(ai_suggest)
tmp_file = generate_unique_filename()
await asyncio.to_thread(process_dataframe_synchronous, result, tmp_file)
response = { response = {
"message": "File berhasil dibaca dan dianalisis.", "message": "File berhasil dibaca dan dianalisis.",
@ -321,7 +178,8 @@ def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
"warnings": warnings, "warnings": warnings,
"warning_rows": warning_safe, "warning_rows": warning_safe,
"preview": preview_safe, "preview": preview_safe,
"metadata_suggest": ai_suggest "metadata_suggest": ai_suggest,
"tmp_path": tmp_file
} }
# return successRes(content=response) # return successRes(content=response)
@ -393,7 +251,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
if df is None or (hasattr(df, "empty") and df.empty): if df is None or (hasattr(df, "empty") and df.empty):
return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid") return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
res = process_data(df, ext, fname, fileDesc) res = await process_data(df, ext, fname, fileDesc)
tmp_path.unlink(missing_ok=True) tmp_path.unlink(missing_ok=True)
@ -427,7 +285,7 @@ async def handle_process_pdf(payload: PdfRequest):
if df is None or (hasattr(df, "empty") and df.empty): if df is None or (hasattr(df, "empty") and df.empty):
return errorRes(message="Tidak ada tabel") return errorRes(message="Tidak ada tabel")
res = process_data(df, '.pdf', payload.fileName, payload.fileDesc) res = await process_data(df, '.pdf', payload.fileName, payload.fileDesc)
return successRes(data=res) return successRes(data=res)
except Exception as e: except Exception as e:
@ -447,6 +305,7 @@ async def handle_process_pdf(payload: PdfRequest):
class UploadRequest(BaseModel): class UploadRequest(BaseModel):
title: str title: str
path: str
rows: List[dict] rows: List[dict]
columns: List[str] columns: List[str]
author: Dict[str, Any] author: Dict[str, Any]
@ -483,6 +342,14 @@ def str_to_date(raw_date: str):
def generate_unique_filename(folder="tmp", ext="parquet", digits=6):
os.makedirs(folder, exist_ok=True)
while True:
file_id = file_id = uuid.uuid4().int
filename = f"{folder}/{file_id}.{ext}"
if not os.path.exists(filename):
return filename
def generate_job_id(user_id: str) -> str: def generate_job_id(user_id: str) -> str:
timestamp = datetime.now().strftime("%Y%m%d%H%M%S") timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
@ -502,9 +369,135 @@ def save_xml_to_sld(xml_string, filename):
return file_path return file_path
async def process_parquet_upload(filename: str, table_name: str):
from main import db_pool
file_path = os.path.join("tmp", filename)
if not os.path.exists(file_path):
print(f"File {file_path} tidak ditemukan")
return
try:
loop = asyncio.get_running_loop()
df = await loop.run_in_executor(None, pd.read_parquet, file_path)
# =====================================================================
# 1. CLEANING NAMA KOLOM (PENTING!)
# =====================================================================
df.columns = [str(col).strip().upper() for col in df.columns]
# Cek kolom GEOM (bisa GEOM atau geom setelah upper)
# Kita standarkan nama kolom GEOM di DF menjadi "GEOM" untuk memudahkan logic
if "GEOM" in df.columns:
df.rename(columns={"GEOM": "GEOM"}, inplace=True)
if "GEOM" not in df.columns:
raise Exception("Kolom GEOM tidak ditemukan")
# =====================================================================
# 2. PERSIAPAN DATA (Row Processing)
# =====================================================================
clean_rows = []
geom_types = set()
# Ambil semua kolom atribut selain GEOM
# Pastikan list ini yang dipakai untuk CREATE TABLE dan COPY (SINKRON)
attr_columns = [col for col in df.columns if col != "GEOM"]
for row in df.itertuples(index=False):
# --- Handle GEOM ---
raw_geom = getattr(row, "GEOM", None)
if not raw_geom: continue
try:
geom = None
if isinstance(raw_geom, str):
geom = wkt.loads(raw_geom)
elif isinstance(raw_geom, bytes):
from shapely import wkb
geom = wkb.loads(raw_geom)
if not geom: continue
if not geom.is_valid: geom = geom.buffer(0)
gtype = geom.geom_type.upper()
if gtype == "POLYGON": geom = MultiPolygon([geom])
elif gtype == "LINESTRING": geom = MultiLineString([geom])
geom_types.add(geom.geom_type)
ewkt = f"SRID=4326;{geom.wkt}"
except Exception:
continue
# --- Handle Attributes (FORCE STRING) ---
row_data = []
for col in attr_columns:
# getattr menggunakan nama kolom uppercase dari attr_columns
val = getattr(row, col, None)
if val is not None:
row_data.append(str(val)) # Convert int/float ke string
else:
row_data.append(None)
row_data.append(ewkt)
clean_rows.append(tuple(row_data))
if not clean_rows:
raise Exception("Data valid kosong")
# =====================================================================
# 3. DATABASE OPERATIONS
# =====================================================================
final_geom_type = list(geom_types)[0].upper() if geom_types else "GEOM"
if "MULTI" not in final_geom_type and final_geom_type != "GEOM":
final_geom_type = "MULTI" + final_geom_type
# A. BUILD DDL (CREATE TABLE)
# Kita pakai f-string quotes f'"{col}"' agar di DB jadi UPPERCASE ("ID", "NAMA")
col_defs = [f'"{col}" TEXT' for col in attr_columns]
create_sql = f"""
CREATE TABLE {table_name} (
_id SERIAL PRIMARY KEY, -- lowercase default
{', '.join(col_defs)}, -- UPPERCASE (Hasil loop attr_columns)
geom TEXT -- lowercase
);
"""
async with db_pool.acquire() as conn:
# Drop table jika ada (untuk safety dev, production hati-hati)
# await conn.execute(f"DROP TABLE IF EXISTS {table_name}")
# 1. Create Table
await conn.execute(create_sql)
# 2. COPY Data
# target_cols harus PERSIS sama dengan attr_columns
# asyncpg akan meng-quote string ini otomatis ("ID", "NAMA", "geom")
target_cols = attr_columns + ['geom']
await conn.copy_records_to_table(
table_name,
records=clean_rows,
columns=target_cols
)
# 3. Alter ke GEOM 2D
alter_sql = f"""
ALTER TABLE {table_name}
ALTER COLUMN geom TYPE geometry({final_geom_type}, 4326)
USING ST_Force2D(geom::geometry)::geometry({final_geom_type}, 4326);
CREATE INDEX idx_{table_name}_geom ON {table_name} USING GIST (geom);
"""
await conn.execute(alter_sql)
print(f"Sukses upload {len(clean_rows)} baris ke {table_name}.")
os.remove(file_path)
except Exception as e:
print(f"Error processing parquet: {e}")
# Log error
async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
@ -580,26 +573,10 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
raise HTTPException(400, f"CRS {detected_crs} tidak valid") raise HTTPException(400, f"CRS {detected_crs} tidak valid")
# ===================================================================== # =====================================================================
# 7. SIMPAN KE POSTGIS (synchronous) # 7. SIMPAN KE POSTGIS
# ===================================================================== # =====================================================================
loop = asyncio.get_running_loop() job_id = generate_job_id(str(user_id))
await loop.run_in_executor( await process_parquet_upload(payload.path, table_name)
None,
lambda: gdf.to_postgis(
table_name,
sync_engine,
if_exists="replace",
index=False
)
)
# =====================================================================
# 8. ADD PRIMARY KEY (wajib untuk QGIS API)
# =====================================================================
async with engine.begin() as conn:
await conn.execute(text(
f'ALTER TABLE "{table_name}" ADD COLUMN _ID SERIAL PRIMARY KEY;'
))
# ===================================================================== # =====================================================================
# 9. SIMPAN METADATA (geom_type, author metadata) # 9. SIMPAN METADATA (geom_type, author metadata)
@ -646,7 +623,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
"dataset_title": payload.title, "dataset_title": payload.title,
"dataset_abstract": author.get("abstract"), "dataset_abstract": author.get("abstract"),
"keywords": author.get("keywords"), "keywords": author.get("keywords"),
# "topic_category": author.get("topicCategory"),
"topic_category": ", ".join(author.get("topicCategory")), "topic_category": ", ".join(author.get("topicCategory")),
"date_created": str_to_date(author.get("dateCreated")), "date_created": str_to_date(author.get("dateCreated")),
"dataset_status": author.get("status"), "dataset_status": author.get("status"),
@ -660,7 +636,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
"geometry_count": row_count "geometry_count": row_count
}) })
# ===================================================================== # =====================================================================
# 10. LOGGING # 10. LOGGING
# ===================================================================== # =====================================================================
@ -671,7 +646,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
details={"table_name": table_name, "rows": len(gdf)} details={"table_name": table_name, "rows": len(gdf)}
) )
job_id = generate_job_id(str(user_id))
result = { result = {
"job_id": job_id, "job_id": job_id,
"job_status": "wait", "job_status": "wait",
@ -685,9 +659,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
} }
save_xml_to_sld(payload.style, job_id) save_xml_to_sld(payload.style, job_id)
# await report_progress(job_id, "upload", 20, "Upload selesai")
# cleansing_data(table_name, job_id)
cleansing = await query_cleansing_data(table_name) cleansing = await query_cleansing_data(table_name)
result['job_status'] = cleansing result['job_status'] = cleansing
@ -698,13 +669,13 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
"name": payload.title, "name": payload.title,
"description": author.get("abstract"), "description": author.get("abstract"),
"scale": "1:25000", "scale": "1:25000",
"projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7", 'projection_system_id': '0196c746-d1ba-7f1c-9706-5df738679cc7',
"category_id": author.get("mapsetCategory"), "category_id": author.get("mapsetCategory"),
"data_status": "sementara", "data_status": "sementara",
"classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4", 'classification_id': '01968b4b-d3f9-76c9-888c-ee887ac31ce4',
"producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187", 'producer_id': '01968b54-0000-7a67-bd10-975b8923b93e',
"layer_type": unified_geom_type[0], "layer_type": unified_geom_type[0],
"source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"], 'source_id': ['019c03ef-35e1-738b-858d-871dc7d1e4d6'],
"layer_url": publish['geos_link'], "layer_url": publish['geos_link'],
"metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}", "metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}",
"coverage_level": "provinsi", "coverage_level": "provinsi",
@ -713,12 +684,11 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
"data_version": "2026", "data_version": "2026",
"is_popular": False, "is_popular": False,
"is_active": True, "is_active": True,
"regional_id": "01968b53-a910-7a67-bd10-975b8923b92e", 'regional_id': '01968b53-a910-7a67-bd10-975b8923b92e',
"notes": "Mapset baru dibuat", "notes": "Mapset baru dibuat",
"status_validation": "on_verification", "status_validation": "on_verification",
} }
print("mapset data",mapset)
await upload_to_main(mapset) await upload_to_main(mapset)
return successRes(data=result) return successRes(data=result)
@ -731,265 +701,3 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
details={"error": str(e)} details={"error": str(e)}
) )
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
# async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
# try:
# job_id = generate_job_id(str(user_id))
# result = {
# "job_id": job_id,
# "job_status": "done",
# "table_name": "just for test",
# "status": "success",
# "message": f"Tabel test berhasil dibuat.",
# "total_rows": 10,
# "geometry_type": "Polygon",
# "crs": "EPSG 4326",
# "metadata_uuid": "-"
# }
# mapset = {
# "name": "Resiko Letusan Gunung Arjuno",
# "description": "Testing Automation Upload",
# "scale": "1:25000",
# "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
# "category_id": "0196c80c-855f-77f9-abd0-0c8a30b8c2f5",
# "data_status": "sementara",
# "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
# "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
# "layer_type": "polygon",
# "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
# "layer_url": "http://192.168.60.24:8888/geoserver/wms?service=WMS&version=1.1.0&request=GetMap&layers=labai:risiko_letusan_gunung_arjuno_bromo&bbox=110.89528623700005,-8.780412043999945,116.26994997700001,-5.042971664999925&width=768&height=534&srs=EPSG:4326&styles=&format=application/openlayers",
# "metadata_url": "http://192.168.60.24:7777/geonetwork/srv/eng/catalog.search#/metadata/9e5e2f09-13ef-49b5-bb49-1cb12136f63b",
# "coverage_level": "provinsi",
# "coverage_area": "kabupaten",
# "data_update_period": "Tahunan",
# "data_version": "2026",
# "is_popular": False,
# "is_active": True,
# "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
# "notes": "Mapset baru dibuat",
# "status_validation": "on_verification",
# }
# await upload_to_main(mapset)
# return successRes(data=result)
# except Exception as e:
# print("errot", e)
# ===================================
# partition +VIEW
# ===================================
# Daftar prefix WKT yang valid
# VALID_WKT_PREFIXES = ("POINT", "LINESTRING", "POLYGON", "MULTIPOLYGON", "MULTILINESTRING")
def slugify(value: str) -> str:
"""Mengubah judul dataset jadi nama aman untuk VIEW"""
return re.sub(r'[^a-zA-Z0-9]+', '_', value.lower()).strip('_')
# Partition + VIEW
# async def create_dataset_view_from_metadata(conn, metadata_id: int, user_id: int, title: str):
# norm_title = slugify(title)
# view_name = f"v_user_{user_id}_{norm_title}"
# base_table = f"test_partition_user_{user_id}"
# # Ambil daftar field
# result = await conn.execute(text("SELECT fields FROM dataset_metadata WHERE id=:mid"), {"mid": metadata_id})
# fields_json = result.scalar_one_or_none()
# base_columns = {"id", "user_id", "metadata_id", "geom"}
# columns_sql = ""
# field_list = []
# if fields_json:
# fields = json.loads(fields_json) if isinstance(fields_json, str) else fields_json
# field_list = fields
# for f in field_list:
# safe_col = slugify(f)
# alias_name = safe_col if safe_col not in base_columns else f"attr_{safe_col}"
# # CAST otomatis
# if safe_col in ["longitude", "latitude", "lon", "lat"]:
# columns_sql += f", (p.attributes->>'{f}')::float AS {alias_name}"
# else:
# columns_sql += f", p.attributes->>'{f}' AS {alias_name}"
# # Drop view lama
# await conn.execute(text(f"DROP VIEW IF EXISTS {view_name} CASCADE;"))
# # 🔥 Buat VIEW baru yang punya FID unik
# create_view_query = f"""
# CREATE OR REPLACE VIEW {view_name} AS
# SELECT
# row_number() OVER() AS fid, -- FID unik untuk QGIS
# p.id,
# p.user_id,
# p.metadata_id,
# p.geom
# {columns_sql},
# m.title,
# m.year,
# m.description
# FROM {base_table} p
# JOIN dataset_metadata m ON m.id = p.metadata_id
# WHERE p.metadata_id = {metadata_id};
# """
# await conn.execute(text(create_view_query))
# # Register geometry untuk QGIS
# await conn.execute(text(f"DELETE FROM geometry_columns WHERE f_table_name = '{view_name}';"))
# await conn.execute(text(f"""
# INSERT INTO geometry_columns
# (f_table_schema, f_table_name, f_geometry_column, coord_dimension, srid, type)
# VALUES ('public', '{view_name}', 'geom', 2, 4326, 'GEOMETRY');
# """))
# print(f"[INFO] VIEW {view_name} dibuat dengan FID unik dan kompatibel dengan QGIS.")
# async def handle_to_postgis(payload, engine, user_id: int = 3):
# """
# Menangani upload data spasial ke PostGIS (dengan partition per user).
# - Jika partisi belum ada, akan dibuat otomatis
# - Metadata dataset disimpan di tabel dataset_metadata
# - Data spasial dimasukkan ke tabel partisi (test_partition_user_{id})
# - VIEW otomatis dibuat untuk QGIS
# """
# try:
# df = pd.DataFrame(payload.rows)
# print(f"[INFO] Diterima {len(df)} baris data dari frontend.")
# # --- Validasi kolom geometry ---
# if "geometry" not in df.columns:
# raise errorRes(status_code=400, message="Kolom 'geometry' tidak ditemukan dalam data.")
# # --- Parsing geometry ke objek shapely ---
# df["geometry"] = df["geometry"].apply(
# lambda g: wkt.loads(g)
# if isinstance(g, str) and g.strip().upper().startswith(VALID_WKT_PREFIXES)
# else None
# )
# # --- Buat GeoDataFrame ---
# gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
# # --- Metadata info dari payload ---
# # dataset_title = getattr(payload, "dataset_title", None)
# # dataset_year = getattr(payload, "dataset_year", None)
# # dataset_desc = getattr(payload, "dataset_description", None)
# dataset_title = "hujan 2045"
# dataset_year = 2045
# dataset_desc = "test metadata"
# if not dataset_title:
# raise errorRes(status_code=400, detail="Field 'dataset_title' wajib ada untuk metadata.")
# async with engine.begin() as conn:
# fields = [col for col in df.columns if col != "geometry"]
# # 💾 1⃣ Simpan Metadata Dataset
# print("[INFO] Menyimpan metadata dataset...")
# result = await conn.execute(
# text("""
# INSERT INTO dataset_metadata (user_id, title, year, description, fields, created_at)
# VALUES (:user_id, :title, :year, :desc, :fields, :created_at)
# RETURNING id;
# """),
# {
# "user_id": user_id,
# "title": dataset_title,
# "year": dataset_year,
# "desc": dataset_desc,
# "fields": json.dumps(fields),
# "created_at": datetime.utcnow(),
# },
# )
# metadata_id = result.scalar_one()
# print(f"[INFO] Metadata disimpan dengan ID {metadata_id}")
# # ⚙️ 2⃣ Auto-create Partisi Jika Belum Ada
# print(f"[INFO] Memastikan partisi test_partition_user_{user_id} tersedia...")
# await conn.execute(
# text(f"""
# DO $$
# BEGIN
# IF NOT EXISTS (
# SELECT 1 FROM pg_tables WHERE tablename = 'test_partition_user_{user_id}'
# ) THEN
# EXECUTE format('
# CREATE TABLE test_partition_user_%s
# PARTITION OF test_partition
# FOR VALUES IN (%s);
# ', {user_id}, {user_id});
# EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_geom ON test_partition_user_%s USING GIST (geom);', {user_id}, {user_id});
# EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_metadata ON test_partition_user_%s (metadata_id);', {user_id}, {user_id});
# END IF;
# END
# $$;
# """)
# )
# # 🧩 3⃣ Insert Data Spasial ke Partisi
# print(f"[INFO] Memasukkan data ke test_partition_user_{user_id} ...")
# insert_count = 0
# for _, row in gdf.iterrows():
# geom_wkt = row["geometry"].wkt if row["geometry"] is not None else None
# attributes = row.drop(labels=["geometry"]).to_dict()
# await conn.execute(
# text("""
# INSERT INTO test_partition (user_id, metadata_id, geom, attributes, created_at)
# VALUES (:user_id, :metadata_id, ST_Force2D(ST_GeomFromText(:geom, 4326)),
# CAST(:attr AS jsonb), :created_at);
# """),
# {
# "user_id": user_id,
# "metadata_id": metadata_id,
# "geom": geom_wkt,
# "attr": json.dumps(attributes),
# "created_at": datetime.utcnow(),
# },
# )
# insert_count += 1
# # 🧩 4⃣ Membuat VIEW untuk dataset baru di QGIS
# await create_dataset_view_from_metadata(conn, metadata_id, user_id, dataset_title)
# print(f"[INFO] ✅ Berhasil memasukkan {insert_count} baris ke partisi user_id={user_id} (metadata_id={metadata_id}).")
# return {
# "status": "success",
# "user_id": user_id,
# "metadata_id": metadata_id,
# "dataset_title": dataset_title,
# "inserted_rows": insert_count,
# "geometry_type": list(gdf.geom_type.unique()),
# }
# except Exception as e:
# print(f"[ERROR] Gagal upload ke PostGIS partition: {e}")
# raise errorRes(status_code=500, message="Gagal upload ke PostGIS partition", details=str(e))

View File

@ -0,0 +1,843 @@
import json
import os
import random
import subprocess
import uuid
import asyncpg
import pandas as pd
import geopandas as gpd
import numpy as np
import re
import zipfile
import tempfile
import asyncio
from pyproj import CRS
from shapely.geometry.base import BaseGeometry
from shapely.geometry import base as shapely_base
from fastapi import Depends, File, Form, UploadFile, HTTPException
from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main
from core.config import DB_DSN, DB_HOST, DB_NAME, DB_PASS, DB_PORT, DB_USER, UPLOAD_FOLDER, MAX_FILE_MB, GEONETWORK_URL
from services.upload_file.ai_generate import send_metadata
from services.upload_file.readers.reader_csv import read_csv
from services.upload_file.readers.reader_shp import read_shp
from services.upload_file.readers.reader_gdb import read_gdb
from services.upload_file.readers.reader_mpk import read_mpk
from services.upload_file.readers.reader_pdf import convert_df, read_pdf
from services.upload_file.utils.df_validation import process_dataframe_synchronous
from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto
from database.connection import engine, sync_engine
from pydantic import BaseModel
from typing import Any, Dict, List, Optional
from shapely import MultiLineString, MultiPolygon, wkt
from sqlalchemy import text
from datetime import datetime
from response import successRes, errorRes
from utils.logger_config import log_activity
# Base.metadata.create_all(bind=engine)
def is_geom_empty(g):
if g is None:
return True
if isinstance(g, float) and pd.isna(g):
return True
if isinstance(g, BaseGeometry):
return g.is_empty
return False
def safe_json(value):
"""Konversi aman untuk semua tipe numpy/pandas/shapely ke tipe JSON-serializable"""
if isinstance(value, (np.int64, np.int32)):
return int(value)
if isinstance(value, (np.float64, np.float32)):
return float(value)
if isinstance(value, pd.Timestamp):
return value.isoformat()
if isinstance(value, shapely_base.BaseGeometry):
return str(value) # convert to WKT string
if pd.isna(value):
return None
return value
def detect_zip_type(zip_path: str) -> str:
with zipfile.ZipFile(zip_path, "r") as zip_ref:
files = zip_ref.namelist()
if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files):
return "gdb"
if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files):
return "gdb"
if any(f.lower().endswith(".shp") for f in files):
return "shp"
return "unknown"
async def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
result = detect_and_build_geometry(df, master_polygons=None)
if not hasattr(result, "geometry") or result.geometry.isna().all():
result = attach_polygon_geometry_auto(result)
# if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
# geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
# if not result.empty else "None"
# null_geom = result.geometry.isna().sum()
def normalize_geom_type(geom_type):
if geom_type.startswith("Multi"):
return geom_type.replace("Multi", "")
return geom_type
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_types = (
result.geometry
.dropna()
.geom_type
.apply(normalize_geom_type)
.unique()
)
geom_type = geom_types[0] if len(geom_types) > 0 else "None"
null_geom = result.geometry.isna().sum()
print(f"[INFO] Tipe Geometry: {geom_type}")
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
else:
res = {
"message": "Tidak menemukan tabel yang relevan.",
"file_type": ext,
"rows": 0,
"columns": 0,
"geometry_valid": 0,
"geometry_empty": 0,
"geometry_valid_percent": 0,
"warnings": [],
"warning_examples": [],
"preview": []
}
return errorRes(message="Tidak berhasil mencocokan geometry pada tabel." ,details=res, status_code=422)
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
result['geometry'] = result['geometry'].apply(
lambda g: g.wkt if g is not None else None
)
empty_count = result['geometry'].apply(is_geom_empty).sum()
valid_count = len(result) - empty_count
match_percentage = (valid_count / len(result)) * 100
warnings = []
if empty_count > 0:
warnings.append(
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
f"({100 - match_percentage:.2f}% data gagal cocok)."
)
if empty_count > 0:
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
warning_examples = examples.to_dict(orient="records")
else:
warning_examples = []
# preview_data = result.head(15).to_dict(orient="records")
preview_data = result.to_dict(orient="records")
preview_safe = [
{k: safe_json(v) for k, v in row.items()} for row in preview_data
]
warning_safe = [
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
]
ai_context = {
"nama_file_peta": filename,
"nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim",
"tipe_data_spasial": geom_type,
"deskripsi_singkat": fileDesc,
"struktur_atribut_data": {},
# "metadata": {
# "judul": "",
# "abstrak": "",
# "tujuan": "",
# "keyword": [],
# "kategori": [],
# "kategori_mapset": ""
# }
}
ai_suggest = send_metadata(ai_context)
# ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
# print(ai_suggest)
tmp_file = generate_unique_filename()
# tmp_file = f"tmp/{filename}.parquet"
# export_df = result.copy()
# export_df["geom"] = export_df["geometry"].apply(wkt.loads)
# export_df = export_df.drop(columns=["geometry"])
# export_df = export_df.set_geometry("geom")
# export_df = export_df.set_crs("EPSG:4326")
# export_df = export_df.rename(
# columns=lambda c: c.upper() if c != "geom" else c
# )
# await asyncio.to_thread(export_df.to_parquet, tmp_file)
await asyncio.to_thread(process_dataframe_synchronous, result, tmp_file)
response = {
"message": "File berhasil dibaca dan dianalisis.",
"file_name": filename,
"file_type": ext,
"rows": int(len(result)),
"columns": list(map(str, result.columns)),
"geometry_valid": int(valid_count),
"geometry_empty": int(empty_count),
"geometry_valid_percent": float(round(match_percentage, 2)),
"geometry_type": geom_type,
"warnings": warnings,
"warning_rows": warning_safe,
"preview": preview_safe,
"metadata_suggest": ai_suggest,
"tmp_path": tmp_file
}
# return successRes(content=response)
return response
async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form(""), fileDesc: Optional[str] = Form("")):
fname = file.filename
ext = os.path.splitext(fname)[1].lower()
contents = await file.read()
size_mb = len(contents) / (1024*1024)
if size_mb > MAX_FILE_MB:
raise errorRes(status_code=413, message="Ukuran File Terlalu Besar")
tmp_path = UPLOAD_FOLDER / fname
with open(tmp_path, "wb") as f:
f.write(contents)
try:
df = None
print('ext', ext)
if ext == ".csv":
df = read_csv(str(tmp_path))
elif ext == ".xlsx":
df = read_csv(str(tmp_path), sheet)
elif ext == ".mpk":
df = read_mpk(str(tmp_path))
elif ext == ".pdf":
tbl = read_pdf(tmp_path, page)
if len(tbl) == 0:
res = {
"message": "Tidak ditemukan tabel valid pada halaman yang dipilih",
"tables": {},
"file_type": ext
}
return successRes(message="Tidak ditemukan tabel valid pada halaman yang dipilih", data=res)
elif len(tbl) > 1:
res = {
"message": "File berhasil dibaca dan dianalisis.",
"tables": tbl,
"file_type": ext
}
return successRes(data=res, message="File berhasil dibaca dan dianalisis.")
else:
df = convert_df(tbl[0])
elif ext == ".zip":
zip_type = detect_zip_type(str(tmp_path))
if zip_type == "shp":
print("[INFO] ZIP terdeteksi sebagai Shapefile.")
df = read_shp(str(tmp_path))
elif zip_type == "gdb":
print("[INFO] ZIP terdeteksi sebagai Geodatabase (GDB).")
df = read_gdb(str(tmp_path))
else:
return successRes(message="ZIP file tidak mengandung SHP / GDB valid.")
else:
raise errorRes(status_code=400, message="Unsupported file type")
if df is None or (hasattr(df, "empty") and df.empty):
return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
res = await process_data(df, ext, fname, fileDesc)
tmp_path.unlink(missing_ok=True)
return successRes(data=res)
except Exception as e:
print(f"[ERROR] {e}")
return errorRes(
message="Internal Server Error",
details=str(e),
status_code=500
)
# finally:
# db_session.close()
class PdfRequest(BaseModel):
title: str
columns: List[str]
rows: List[List]
fileName: str
fileDesc: str
async def handle_process_pdf(payload: PdfRequest):
try:
df = convert_df(payload.model_dump())
if df is None or (hasattr(df, "empty") and df.empty):
return errorRes(message="Tidak ada tabel")
res = await process_data(df, '.pdf', payload.fileName, payload.fileDesc)
return successRes(data=res)
except Exception as e:
print(f"[ERROR] {e}")
return errorRes(message="Internal Server Error", details= str(e), status_code=500)
# finally:
# db_session.close()
class UploadRequest(BaseModel):
title: str
path: str
rows: List[dict]
columns: List[str]
author: Dict[str, Any]
style: str
# generate _2 if exist
async def generate_unique_table_name(base_name: str):
base_name = base_name.lower().replace(" ", "_").replace("-", "_")
table_name = base_name
counter = 2
async with engine.connect() as conn:
while True:
result = await conn.execute(
text("SELECT to_regclass(:tname)"),
{"tname": table_name}
)
exists = result.scalar()
if not exists:
return table_name
table_name = f"{base_name}_{counter}"
counter += 1
def str_to_date(raw_date: str):
if raw_date:
try:
return datetime.strptime(raw_date, "%Y-%m-%d").date()
except Exception as e:
print("[WARNING] Tidak bisa parse dateCreated:", e)
return None
def generate_unique_filename(folder="tmp", ext="parquet", digits=6):
os.makedirs(folder, exist_ok=True)
while True:
file_id = file_id = uuid.uuid4().int
filename = f"{folder}/{file_id}.{ext}"
if not os.path.exists(filename):
return filename
def generate_job_id(user_id: str) -> str:
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
return f"{user_id}_{timestamp}"
def save_xml_to_sld(xml_string, filename):
folder_path = 'style_temp'
os.makedirs(folder_path, exist_ok=True)
file_path = os.path.join(folder_path, f"{filename}.sld")
with open(file_path, "w", encoding="utf-8") as f:
f.write(xml_string)
return file_path
async def process_parquet_upload(filename: str, table_name: str):
from main import db_pool
file_path = os.path.join("tmp", filename)
if not os.path.exists(file_path):
print(f"File {file_path} tidak ditemukan")
return
try:
loop = asyncio.get_running_loop()
df = await loop.run_in_executor(None, pd.read_parquet, file_path)
# =====================================================================
# 1. CLEANING NAMA KOLOM (PENTING!)
# =====================================================================
df.columns = [str(col).strip().upper() for col in df.columns]
# Cek kolom GEOM (bisa GEOM atau geom setelah upper)
# Kita standarkan nama kolom GEOM di DF menjadi "GEOM" untuk memudahkan logic
if "GEOM" in df.columns:
df.rename(columns={"GEOM": "GEOM"}, inplace=True)
if "GEOM" not in df.columns:
raise Exception("Kolom GEOM tidak ditemukan")
# =====================================================================
# 2. PERSIAPAN DATA (Row Processing)
# =====================================================================
clean_rows = []
geom_types = set()
# Ambil semua kolom atribut selain GEOM
# Pastikan list ini yang dipakai untuk CREATE TABLE dan COPY (SINKRON)
attr_columns = [col for col in df.columns if col != "GEOM"]
for row in df.itertuples(index=False):
# --- Handle GEOM ---
raw_geom = getattr(row, "GEOM", None)
if not raw_geom: continue
try:
geom = None
if isinstance(raw_geom, str):
geom = wkt.loads(raw_geom)
elif isinstance(raw_geom, bytes):
from shapely import wkb
geom = wkb.loads(raw_geom)
if not geom: continue
if not geom.is_valid: geom = geom.buffer(0)
gtype = geom.geom_type.upper()
if gtype == "POLYGON": geom = MultiPolygon([geom])
elif gtype == "LINESTRING": geom = MultiLineString([geom])
geom_types.add(geom.geom_type)
ewkt = f"SRID=4326;{geom.wkt}"
except Exception:
continue
# --- Handle Attributes (FORCE STRING) ---
row_data = []
for col in attr_columns:
# getattr menggunakan nama kolom uppercase dari attr_columns
val = getattr(row, col, None)
if val is not None:
row_data.append(str(val)) # Convert int/float ke string
else:
row_data.append(None)
row_data.append(ewkt)
clean_rows.append(tuple(row_data))
if not clean_rows:
raise Exception("Data valid kosong")
# =====================================================================
# 3. DATABASE OPERATIONS
# =====================================================================
final_geom_type = list(geom_types)[0].upper() if geom_types else "GEOM"
if "MULTI" not in final_geom_type and final_geom_type != "GEOM":
final_geom_type = "MULTI" + final_geom_type
# A. BUILD DDL (CREATE TABLE)
# Kita pakai f-string quotes f'"{col}"' agar di DB jadi UPPERCASE ("ID", "NAMA")
col_defs = [f'"{col}" TEXT' for col in attr_columns]
create_sql = f"""
CREATE TABLE {table_name} (
_id SERIAL PRIMARY KEY, -- lowercase default
{', '.join(col_defs)}, -- UPPERCASE (Hasil loop attr_columns)
geom TEXT -- lowercase
);
"""
async with db_pool.acquire() as conn:
# Drop table jika ada (untuk safety dev, production hati-hati)
# await conn.execute(f"DROP TABLE IF EXISTS {table_name}")
# 1. Create Table
await conn.execute(create_sql)
# 2. COPY Data
# target_cols harus PERSIS sama dengan attr_columns
# asyncpg akan meng-quote string ini otomatis ("ID", "NAMA", "geom")
target_cols = attr_columns + ['geom']
await conn.copy_records_to_table(
table_name,
records=clean_rows,
columns=target_cols
)
# 3. Alter ke GEOM 2D
alter_sql = f"""
ALTER TABLE {table_name}
ALTER COLUMN geom TYPE geometry({final_geom_type}, 4326)
USING ST_Force2D(geom::geometry)::geometry({final_geom_type}, 4326);
CREATE INDEX idx_{table_name}_geom ON {table_name} USING GIST (geom);
"""
await conn.execute(alter_sql)
print(f"Sukses upload {len(clean_rows)} baris ke {table_name}.")
os.remove(file_path)
except Exception as e:
print(f"Error processing parquet: {e}")
# Log error
async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
try:
table_name = await generate_unique_table_name(payload.title)
# DataFrame
df = pd.DataFrame(payload.rows)
df.columns = [col.upper() for col in df.columns]
if "GEOMETRY" not in df.columns:
raise HTTPException(400, "Kolom GEOMETRY tidak ditemukan")
# =====================================================================
# 1. LOAD WKT → SHAPELY
# =====================================================================
def safe_load_wkt(g):
if not isinstance(g, str):
return None
try:
geom = wkt.loads(g)
return geom
except:
return None
df["GEOMETRY"] = df["GEOMETRY"].apply(safe_load_wkt)
df = df.rename(columns={"GEOMETRY": "geom"})
# =====================================================================
# 2. DROP ROW geometry NULL
# =====================================================================
df = df[df["geom"].notnull()]
if df.empty:
raise HTTPException(400, "Semua geometry invalid atau NULL")
# =====================================================================
# 3. VALIDATE geometry (very important)
# =====================================================================
df["geom"] = df["geom"].apply(lambda g: g if g.is_valid else g.buffer(0))
# =====================================================================
# 4. SERAGAMKAN TIPE GEOMETRY (Polygon→MultiPolygon, Line→MultiLine)
# =====================================================================
def unify_geometry_type(g):
gtype = g.geom_type.upper()
if gtype == "POLYGON":
return MultiPolygon([g])
if gtype == "LINESTRING":
return MultiLineString([g])
return g # sudah MULTI atau POINT
df["geom"] = df["geom"].apply(unify_geometry_type)
# =====================================================================
# 5. DETEKSI CRS DARI METADATA / INPUT / DEFAULT
# =====================================================================
detected_crs = payload.author.get("crs")
detected = payload.author.get("crs")
print('crs', detected)
if not detected_crs:
detected_crs = "EPSG:4326"
detected_crs = 'EPSG:4326'
# Buat GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry="geom", crs=detected_crs)
row_count = len(gdf)
# =====================================================================
# 6. VERIFY CRS (SRID) VALID di PROJ / PostGIS
# =====================================================================
try:
_ = gdf.to_crs(gdf.crs) # test CRS valid
except:
raise HTTPException(400, f"CRS {detected_crs} tidak valid")
# =====================================================================
# 7. SIMPAN KE POSTGIS (synchronous)
# =====================================================================
# print("run")
# # 1. Konfigurasi Database (Ambil dari Config/Env Variable)
# # Jangan hardcode password di sini
# db_host = DB_HOST
# db_port = DB_PORT
# db_name = DB_NAME
# db_user = DB_USER
# db_pass = DB_PASS
# # Connection string untuk OGR (tanpa password agar aman)
# conn_str = f"PG:host={db_host} port={db_port} dbname={db_name} user={db_user}"
# # 2. Siapkan Environment Variable khusus untuk subprocess ini
# # Password dimasukkan lewat ENV agar tidak muncul di process list server
# env = os.environ.copy()
# env["PGPASSWORD"] = db_pass
# temp_parquet = f"tmp/123123.parquet"
# # 3. Jalankan ogr2ogr secara Async
# # Command: ogr2ogr -f PostgreSQL "PG:..." input.parquet -nln nama_tabel -overwrite
# process = await asyncio.create_subprocess_exec(
# "ogr2ogr",
# "-f", "PostgreSQL",
# conn_str,
# temp_parquet,
# "-nln", table_name,
# "-overwrite",
# "-nlt", "PROMOTE_TO_MULTI",
# "-a_srs", "EPSG:4326",
# "-lco", "GEOMETRY_NAME=geom", # Nama kolom di DB
# "-lco", "FID=_id", # Nama Primary Key
# "-lco", "SPATIAL_INDEX=YES",
# "-lco", "LAUNDER=NO",
# "--config", "PG_USE_COPY", "YES",
# "-dim", "2",
# stdout=asyncio.subprocess.PIPE,
# stderr=asyncio.subprocess.PIPE,
# env=env # Pass password lewat sini
# )
# # 4. Tunggu proses selesai
# stdout, stderr = await process.communicate()
# # 5. Cek apakah sukses
# if process.returncode != 0:
# error_msg = stderr.decode().strip()
# print(f"OGR2OGR Error: {error_msg}")
# raise HTTPException(500, detail=f"Gagal import ke DB: {error_msg}")
# if os.path.exists(temp_parquet):
# os.remove(temp_parquet)
job_id = generate_job_id(str(user_id))
await process_parquet_upload(payload.path, table_name)
# =====================================================================
# 9. SIMPAN METADATA (geom_type, author metadata)
# =====================================================================
unified_geom_type = list(gdf.geom_type.unique())
author = payload.author
async with engine.begin() as conn:
await conn.execute(text("""
INSERT INTO backend.author_metadata (
table_title,
dataset_title,
dataset_abstract,
keywords,
topic_category,
date_created,
dataset_status,
organization_name,
contact_person_name,
contact_email,
contact_phone,
geom_type,
user_id,
process,
geometry_count
) VALUES (
:table_title,
:dataset_title,
:dataset_abstract,
:keywords,
:topic_category,
:date_created,
:dataset_status,
:organization_name,
:contact_person_name,
:contact_email,
:contact_phone,
:geom_type,
:user_id,
:process,
:geometry_count
)
"""), {
"table_title": table_name,
"dataset_title": payload.title,
"dataset_abstract": author.get("abstract"),
"keywords": author.get("keywords"),
"topic_category": ", ".join(author.get("topicCategory")),
"date_created": str_to_date(author.get("dateCreated")),
"dataset_status": author.get("status"),
"organization_name": author.get("organization"),
"contact_person_name": author.get("contactName"),
"contact_email": author.get("contactEmail"),
"contact_phone": author.get("contactPhone"),
"geom_type": json.dumps(unified_geom_type),
"user_id": user_id,
"process": 'CLEANSING',
"geometry_count": row_count
})
# =====================================================================
# 10. LOGGING
# =====================================================================
await log_activity(
user_id=user_id,
action_type="UPLOAD",
action_title=f"Upload dataset {table_name}",
details={"table_name": table_name, "rows": len(gdf)}
)
# job_id = generate_job_id(str(user_id))
result = {
"job_id": job_id,
"job_status": "wait",
"table_name": table_name,
"status": "success",
"message": f"Tabel '{table_name}' berhasil dibuat.",
"total_rows": len(gdf),
"geometry_type": unified_geom_type,
"crs": detected_crs,
"metadata_uuid": ""
}
save_xml_to_sld(payload.style, job_id)
# await report_progress(job_id, "upload", 20, "Upload selesai")
# cleansing_data(table_name, job_id)
cleansing = await query_cleansing_data(table_name)
result['job_status'] = cleansing
publish = await publish_layer(table_name, job_id)
result['metadata_uuid'] = publish['uuid']
mapset = {
"name": payload.title,
"description": author.get("abstract"),
"scale": "1:25000",
'projection_system_id': '0196c746-d1ba-7f1c-9706-5df738679cc7',
"category_id": author.get("mapsetCategory"),
"data_status": "sementara",
'classification_id': '01968b4b-d3f9-76c9-888c-ee887ac31ce4',
'producer_id': '01968b54-0000-7a67-bd10-975b8923b93e',
"layer_type": unified_geom_type[0],
'source_id': ['019c03ef-35e1-738b-858d-871dc7d1e4d6'],
"layer_url": publish['geos_link'],
"metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}",
"coverage_level": "provinsi",
"coverage_area": "kabupaten",
"data_update_period": "Tahunan",
"data_version": "2026",
"is_popular": False,
"is_active": True,
'regional_id': '01968b53-a910-7a67-bd10-975b8923b92e',
"notes": "Mapset baru dibuat",
"status_validation": "on_verification",
}
print("mapset data",mapset)
await upload_to_main(mapset)
return successRes(data=result)
except Exception as e:
await log_activity(
user_id=user_id,
action_type="ERROR",
action_title="Upload gagal",
details={"error": str(e)}
)
raise HTTPException(status_code=500, detail=str(e))
# async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
# try:
# os.remove(payload.path)
# job_id = generate_job_id(str(user_id))
# result = {
# "job_id": job_id,
# "job_status": "done",
# "table_name": "just for test",
# "status": "success",
# "message": f"Tabel test berhasil dibuat.",
# "total_rows": 10,
# "geometry_type": "Polygon",
# "crs": "EPSG 4326",
# "metadata_uuid": "-"
# }
# # mapset = {
# # 'name': 'TEST Risiko Letusan Gunung Arjuno',
# # 'description': 'Peta ini menampilkan area yang berpotensi mengalami letusan Gunung Arjuno dan Gunung Bromo di Provinsi Jawa Timur. Data disusun dalam bentuk poligon yang mengindikasikan tingkat risiko berdasarkan sejarah aktivitas dan karakteristik geologi di wilayah tersebut.',
# # 'scale': '1:25000',
# # 'projection_system_id': '0196c746-d1ba-7f1c-9706-5df738679cc7',
# # 'category_id': '0196c80c-855f-77f9-abd0-0c8a30b8c2f5',
# # 'data_status': 'sementara',
# # 'classification_id': '01968b4b-d3f9-76c9-888c-ee887ac31ce4',
# # 'producer_id': '01968b54-0000-7a67-bd10-975b8923b93e',
# # 'layer_type': 'MultiPolygon',
# # 'source_id': ['019c03ef-35e1-738b-858d-871dc7d1e4d6'],
# # 'layer_url': 'http://192.168.60.24:8888/geoserver/labai/wms?service=WMS&version=1.1.0&request=GetMap&layers=labai:test_risiko_letusan_gunung_arjuno&styles=&bbox=110.89528623700005%2C-8.780412043999945%2C116.26994997700001%2C-5.042971664999925&width=768&height=384&srs=EPSG:4326&format=application/openlayers',
# # 'metadata_url': 'http://192.168.60.24:7777/geonetwork/srv/eng/catalog.search#/metadata/123123',
# # 'coverage_level': 'provinsi',
# # 'coverage_area': 'kabupaten',
# # 'data_update_period': 'Tahunan',
# # 'data_version': '2026',
# # 'is_popular': False,
# # 'is_active': True,
# # 'regional_id': '01968b53-a910-7a67-bd10-975b8923b92e',
# # 'notes': 'Mapset baru dibuat',
# # 'status_validation': 'on_verification'
# # }
# # await upload_to_main(mapset)
# return successRes(data=result)
# except Exception as e:
# print("errot", e)

View File

@ -0,0 +1,992 @@
import json
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import re
import zipfile
import tempfile
import asyncio
from pyproj import CRS
from shapely.geometry.base import BaseGeometry
from shapely.geometry import base as shapely_base
from fastapi import Depends, File, Form, UploadFile, HTTPException
from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main
from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES, GEONETWORK_URL
from services.upload_file.ai_generate import send_metadata
from services.upload_file.readers.reader_csv import read_csv
from services.upload_file.readers.reader_shp import read_shp
from services.upload_file.readers.reader_gdb import read_gdb
from services.upload_file.readers.reader_mpk import read_mpk
from services.upload_file.readers.reader_pdf import convert_df, read_pdf
from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto
from database.connection import engine, sync_engine
from pydantic import BaseModel
from typing import Any, Dict, List, Optional
from shapely import MultiLineString, MultiPolygon, wkt
from sqlalchemy import text
from datetime import datetime
from response import successRes, errorRes
from utils.logger_config import log_activity
# Base.metadata.create_all(bind=engine)
def is_geom_empty(g):
if g is None:
return True
if isinstance(g, float) and pd.isna(g):
return True
if isinstance(g, BaseGeometry):
return g.is_empty
return False
def safe_json(value):
"""Konversi aman untuk semua tipe numpy/pandas/shapely ke tipe JSON-serializable"""
if isinstance(value, (np.int64, np.int32)):
return int(value)
if isinstance(value, (np.float64, np.float32)):
return float(value)
if isinstance(value, pd.Timestamp):
return value.isoformat()
if isinstance(value, shapely_base.BaseGeometry):
return str(value) # convert to WKT string
if pd.isna(value):
return None
return value
def detect_zip_type(zip_path: str) -> str:
with zipfile.ZipFile(zip_path, "r") as zip_ref:
files = zip_ref.namelist()
if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files):
return "gdb"
if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files):
return "gdb"
if any(f.lower().endswith(".shp") for f in files):
return "shp"
return "unknown"
# def detect_zip_type(zip_path: str) -> str:
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# files = zip_ref.namelist()
# # -------------------------------------------------------------
# # 1) DETECT FileGDB
# # -------------------------------------------------------------
# is_gdb = (
# any(".gdb/" in f.lower() for f in files)
# or any(f.lower().endswith(ext) for ext in
# [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files)
# )
# if is_gdb:
# print("\n[INFO] ZIP terdeteksi berisi FileGDB.")
# with tempfile.TemporaryDirectory() as temp_dir:
# # extract ZIP
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# zip_ref.extractall(temp_dir)
# # find folder *.gdb
# gdb_path = None
# for root, dirs, _ in os.walk(temp_dir):
# for d in dirs:
# if d.lower().endswith(".gdb"):
# gdb_path = os.path.join(root, d)
# break
# if not gdb_path:
# print("[ERROR] Folder .gdb tidak ditemukan.")
# return "gdb"
# print(f"[INFO] GDB Path: {gdb_path}")
# # Cari seluruh file .gdbtable
# table_files = [
# os.path.join(gdb_path, f)
# for f in os.listdir(gdb_path)
# if f.lower().endswith(".gdbtable")
# ]
# if not table_files:
# print("[ERROR] Tidak ada file .gdbtable ditemukan.")
# return "gdb"
# # Scan semua table file untuk mencari SpatialReference
# found_crs = False
# for table_file in table_files:
# try:
# with open(table_file, "rb") as f:
# raw = f.read(15000) # baca awal file, cukup untuk header JSON
# text = raw.decode("utf-8", errors="ignore")
# start = text.find("{")
# end = text.rfind("}") + 1
# if start == -1 or end == -1:
# continue
# json_str = text[start:end]
# meta = json.loads(json_str)
# spatial_ref = meta.get("SpatialReference")
# if not spatial_ref:
# continue
# wkt = spatial_ref.get("WKT")
# if not wkt:
# continue
# print(f"[FOUND] CRS metadata pada: {os.path.basename(table_file)}")
# print(f"[CRS WKT] {wkt[:200]}...")
# # Convert to EPSG
# try:
# epsg = CRS.from_wkt(wkt).to_epsg()
# print(f"[EPSG] {epsg}")
# except:
# print("[EPSG] Tidak ditemukan EPSG.")
# found_crs = True
# break
# except Exception:
# continue
# if not found_crs:
# print("[WARNING] Tidak ditemukan CRS di file .gdbtable manapun.")
# return "gdb"
# # -----------------------------------------------------
# # 2. DETEKSI SHP
# # -----------------------------------------------------
# if any(f.lower().endswith(".shp") for f in files):
# print("\n[INFO] ZIP terdeteksi berisi SHP.")
# # cari file .prj
# prj_files = [f for f in files if f.lower().endswith(".prj")]
# if not prj_files:
# print("[WARNING] Tidak ada file .prj → CRS tidak diketahui.")
# return "shp"
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# with tempfile.TemporaryDirectory() as temp_dir:
# prj_path = os.path.join(temp_dir, os.path.basename(prj_files[0]))
# zip_ref.extract(prj_files[0], temp_dir)
# # baca isi prj
# with open(prj_path, "r") as f:
# prj_text = f.read()
# try:
# crs = CRS.from_wkt(prj_text)
# print(f"[CRS WKT] {crs.to_wkt()[:200]}...")
# epsg = crs.to_epsg()
# if epsg:
# print(f"[EPSG] {epsg}")
# else:
# print("[EPSG] Tidak ditemukan dalam database EPSG.")
# except Exception as e:
# print("[ERROR] Gagal membaca CRS dari file PRJ:", e)
# return "shp"
# # -----------------------------------------------------
# # 3. UNKNOWN
# # -----------------------------------------------------
# return "unknown"
def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
result = detect_and_build_geometry(df, master_polygons=None)
if not hasattr(result, "geometry") or result.geometry.isna().all():
result = attach_polygon_geometry_auto(result)
# if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
# geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
# if not result.empty else "None"
# null_geom = result.geometry.isna().sum()
def normalize_geom_type(geom_type):
if geom_type.startswith("Multi"):
return geom_type.replace("Multi", "")
return geom_type
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_types = (
result.geometry
.dropna()
.geom_type
.apply(normalize_geom_type)
.unique()
)
geom_type = geom_types[0] if len(geom_types) > 0 else "None"
null_geom = result.geometry.isna().sum()
print(f"[INFO] Tipe Geometry: {geom_type}")
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
else:
res = {
"message": "Tidak menemukan tabel yang relevan.",
"file_type": ext,
"rows": 0,
"columns": 0,
"geometry_valid": 0,
"geometry_empty": 0,
"geometry_valid_percent": 0,
"warnings": [],
"warning_examples": [],
"preview": []
}
return errorRes(message="Tidak berhasil mencocokan geometry pada tabel." ,details=res, status_code=422)
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
result['geometry'] = result['geometry'].apply(
lambda g: g.wkt if g is not None else None
)
empty_count = result['geometry'].apply(is_geom_empty).sum()
valid_count = len(result) - empty_count
match_percentage = (valid_count / len(result)) * 100
warnings = []
if empty_count > 0:
warnings.append(
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
f"({100 - match_percentage:.2f}% data gagal cocok)."
)
if empty_count > 0:
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
warning_examples = examples.to_dict(orient="records")
else:
warning_examples = []
# preview_data = result.head(15).to_dict(orient="records")
preview_data = result.to_dict(orient="records")
preview_safe = [
{k: safe_json(v) for k, v in row.items()} for row in preview_data
]
warning_safe = [
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
]
ai_context = {
"nama_file_peta": filename,
"nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim",
"tipe_data_spasial": geom_type,
"deskripsi_singkat": fileDesc,
"struktur_atribut_data": {},
# "metadata": {
# "judul": "",
# "abstrak": "",
# "tujuan": "",
# "keyword": [],
# "kategori": [],
# "kategori_mapset": ""
# }
}
ai_suggest = send_metadata(ai_context)
# ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
# print(ai_suggest)
response = {
"message": "File berhasil dibaca dan dianalisis.",
"file_name": filename,
"file_type": ext,
"rows": int(len(result)),
"columns": list(map(str, result.columns)),
"geometry_valid": int(valid_count),
"geometry_empty": int(empty_count),
"geometry_valid_percent": float(round(match_percentage, 2)),
"geometry_type": geom_type,
"warnings": warnings,
"warning_rows": warning_safe,
"preview": preview_safe,
"metadata_suggest": ai_suggest
}
# return successRes(content=response)
return response
async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form(""), fileDesc: Optional[str] = Form("")):
fname = file.filename
ext = os.path.splitext(fname)[1].lower()
contents = await file.read()
size_mb = len(contents) / (1024*1024)
if size_mb > MAX_FILE_MB:
raise errorRes(status_code=413, message="Ukuran File Terlalu Besar")
tmp_path = UPLOAD_FOLDER / fname
with open(tmp_path, "wb") as f:
f.write(contents)
try:
df = None
print('ext', ext)
if ext == ".csv":
df = read_csv(str(tmp_path))
elif ext == ".xlsx":
df = read_csv(str(tmp_path), sheet)
elif ext == ".mpk":
df = read_mpk(str(tmp_path))
elif ext == ".pdf":
tbl = read_pdf(tmp_path, page)
if len(tbl) == 0:
res = {
"message": "Tidak ditemukan tabel valid pada halaman yang dipilih",
"tables": {},
"file_type": ext
}
return successRes(message="Tidak ditemukan tabel valid pada halaman yang dipilih", data=res)
elif len(tbl) > 1:
res = {
"message": "File berhasil dibaca dan dianalisis.",
"tables": tbl,
"file_type": ext
}
return successRes(data=res, message="File berhasil dibaca dan dianalisis.")
else:
df = convert_df(tbl[0])
elif ext == ".zip":
zip_type = detect_zip_type(str(tmp_path))
if zip_type == "shp":
print("[INFO] ZIP terdeteksi sebagai Shapefile.")
df = read_shp(str(tmp_path))
elif zip_type == "gdb":
print("[INFO] ZIP terdeteksi sebagai Geodatabase (GDB).")
df = read_gdb(str(tmp_path))
else:
return successRes(message="ZIP file tidak mengandung SHP / GDB valid.")
else:
raise errorRes(status_code=400, message="Unsupported file type")
if df is None or (hasattr(df, "empty") and df.empty):
return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
res = process_data(df, ext, fname, fileDesc)
tmp_path.unlink(missing_ok=True)
return successRes(data=res)
except Exception as e:
print(f"[ERROR] {e}")
return errorRes(
message="Internal Server Error",
details=str(e),
status_code=500
)
# finally:
# db_session.close()
class PdfRequest(BaseModel):
title: str
columns: List[str]
rows: List[List]
fileName: str
fileDesc: str
async def handle_process_pdf(payload: PdfRequest):
try:
df = convert_df(payload.model_dump())
if df is None or (hasattr(df, "empty") and df.empty):
return errorRes(message="Tidak ada tabel")
res = process_data(df, '.pdf', payload.fileName, payload.fileDesc)
return successRes(data=res)
except Exception as e:
print(f"[ERROR] {e}")
return errorRes(message="Internal Server Error", details= str(e), status_code=500)
# finally:
# db_session.close()
class UploadRequest(BaseModel):
title: str
rows: List[dict]
columns: List[str]
author: Dict[str, Any]
style: str
# generate _2 if exist
async def generate_unique_table_name(base_name: str):
base_name = base_name.lower().replace(" ", "_").replace("-", "_")
table_name = base_name
counter = 2
async with engine.connect() as conn:
while True:
result = await conn.execute(
text("SELECT to_regclass(:tname)"),
{"tname": table_name}
)
exists = result.scalar()
if not exists:
return table_name
table_name = f"{base_name}_{counter}"
counter += 1
def str_to_date(raw_date: str):
if raw_date:
try:
return datetime.strptime(raw_date, "%Y-%m-%d").date()
except Exception as e:
print("[WARNING] Tidak bisa parse dateCreated:", e)
return None
def generate_job_id(user_id: str) -> str:
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
return f"{user_id}_{timestamp}"
def save_xml_to_sld(xml_string, filename):
folder_path = 'style_temp'
os.makedirs(folder_path, exist_ok=True)
file_path = os.path.join(folder_path, f"{filename}.sld")
with open(file_path, "w", encoding="utf-8") as f:
f.write(xml_string)
return file_path
async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
try:
table_name = await generate_unique_table_name(payload.title)
# DataFrame
df = pd.DataFrame(payload.rows)
df.columns = [col.upper() for col in df.columns]
if "GEOMETRY" not in df.columns:
raise HTTPException(400, "Kolom GEOMETRY tidak ditemukan")
# =====================================================================
# 1. LOAD WKT → SHAPELY
# =====================================================================
def safe_load_wkt(g):
if not isinstance(g, str):
return None
try:
geom = wkt.loads(g)
return geom
except:
return None
df["GEOMETRY"] = df["GEOMETRY"].apply(safe_load_wkt)
df = df.rename(columns={"GEOMETRY": "geom"})
# =====================================================================
# 2. DROP ROW geometry NULL
# =====================================================================
df = df[df["geom"].notnull()]
if df.empty:
raise HTTPException(400, "Semua geometry invalid atau NULL")
# =====================================================================
# 3. VALIDATE geometry (very important)
# =====================================================================
df["geom"] = df["geom"].apply(lambda g: g if g.is_valid else g.buffer(0))
# =====================================================================
# 4. SERAGAMKAN TIPE GEOMETRY (Polygon→MultiPolygon, Line→MultiLine)
# =====================================================================
def unify_geometry_type(g):
gtype = g.geom_type.upper()
if gtype == "POLYGON":
return MultiPolygon([g])
if gtype == "LINESTRING":
return MultiLineString([g])
return g # sudah MULTI atau POINT
df["geom"] = df["geom"].apply(unify_geometry_type)
# =====================================================================
# 5. DETEKSI CRS DARI METADATA / INPUT / DEFAULT
# =====================================================================
detected_crs = payload.author.get("crs")
detected = payload.author.get("crs")
print('crs', detected)
if not detected_crs:
detected_crs = "EPSG:4326"
detected_crs = 'EPSG:4326'
# Buat GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry="geom", crs=detected_crs)
row_count = len(gdf)
# =====================================================================
# 6. VERIFY CRS (SRID) VALID di PROJ / PostGIS
# =====================================================================
try:
_ = gdf.to_crs(gdf.crs) # test CRS valid
except:
raise HTTPException(400, f"CRS {detected_crs} tidak valid")
# =====================================================================
# 7. SIMPAN KE POSTGIS (synchronous)
# =====================================================================
await asyncio.to_thread(
gdf.to_postgis,
table_name,
sync_engine,
if_exists="replace",
index=False
)
# =====================================================================
# 8. ADD PRIMARY KEY (wajib untuk QGIS API)
# =====================================================================
async with engine.begin() as conn:
await conn.execute(text(
f'ALTER TABLE "{table_name}" ADD COLUMN _ID SERIAL PRIMARY KEY;'
))
# =====================================================================
# 9. SIMPAN METADATA (geom_type, author metadata)
# =====================================================================
unified_geom_type = list(gdf.geom_type.unique())
author = payload.author
async with engine.begin() as conn:
await conn.execute(text("""
INSERT INTO backend.author_metadata (
table_title,
dataset_title,
dataset_abstract,
keywords,
topic_category,
date_created,
dataset_status,
organization_name,
contact_person_name,
contact_email,
contact_phone,
geom_type,
user_id,
process,
geometry_count
) VALUES (
:table_title,
:dataset_title,
:dataset_abstract,
:keywords,
:topic_category,
:date_created,
:dataset_status,
:organization_name,
:contact_person_name,
:contact_email,
:contact_phone,
:geom_type,
:user_id,
:process,
:geometry_count
)
"""), {
"table_title": table_name,
"dataset_title": payload.title,
"dataset_abstract": author.get("abstract"),
"keywords": author.get("keywords"),
# "topic_category": author.get("topicCategory"),
"topic_category": ", ".join(author.get("topicCategory")),
"date_created": str_to_date(author.get("dateCreated")),
"dataset_status": author.get("status"),
"organization_name": author.get("organization"),
"contact_person_name": author.get("contactName"),
"contact_email": author.get("contactEmail"),
"contact_phone": author.get("contactPhone"),
"geom_type": json.dumps(unified_geom_type),
"user_id": user_id,
"process": 'CLEANSING',
"geometry_count": row_count
})
# =====================================================================
# 10. LOGGING
# =====================================================================
await log_activity(
user_id=user_id,
action_type="UPLOAD",
action_title=f"Upload dataset {table_name}",
details={"table_name": table_name, "rows": len(gdf)}
)
job_id = generate_job_id(str(user_id))
result = {
"job_id": job_id,
"job_status": "wait",
"table_name": table_name,
"status": "success",
"message": f"Tabel '{table_name}' berhasil dibuat.",
"total_rows": len(gdf),
"geometry_type": unified_geom_type,
"crs": detected_crs,
"metadata_uuid": ""
}
save_xml_to_sld(payload.style, job_id)
# await report_progress(job_id, "upload", 20, "Upload selesai")
# cleansing_data(table_name, job_id)
cleansing = await query_cleansing_data(table_name)
result['job_status'] = cleansing
publish = await publish_layer(table_name, job_id)
result['metadata_uuid'] = publish['uuid']
mapset = {
"name": payload.title,
"description": author.get("abstract"),
"scale": "1:25000",
"projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
"category_id": author.get("mapsetCategory"),
"data_status": "sementara",
"classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
"producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
"layer_type": unified_geom_type[0],
"source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
"layer_url": publish['geos_link'],
"metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}",
"coverage_level": "provinsi",
"coverage_area": "kabupaten",
"data_update_period": "Tahunan",
"data_version": "2026",
"is_popular": False,
"is_active": True,
"regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
"notes": "Mapset baru dibuat",
"status_validation": "on_verification",
}
print("mapset data",mapset)
await upload_to_main(mapset)
return successRes(data=result)
except Exception as e:
await log_activity(
user_id=user_id,
action_type="ERROR",
action_title="Upload gagal",
details={"error": str(e)}
)
raise HTTPException(status_code=500, detail=str(e))
# async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
# try:
# job_id = generate_job_id(str(user_id))
# result = {
# "job_id": job_id,
# "job_status": "done",
# "table_name": "just for test",
# "status": "success",
# "message": f"Tabel test berhasil dibuat.",
# "total_rows": 10,
# "geometry_type": "Polygon",
# "crs": "EPSG 4326",
# "metadata_uuid": "-"
# }
# mapset = {
# "name": "Resiko Letusan Gunung Arjuno",
# "description": "Testing Automation Upload",
# "scale": "1:25000",
# "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
# "category_id": "0196c80c-855f-77f9-abd0-0c8a30b8c2f5",
# "data_status": "sementara",
# "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
# "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
# "layer_type": "polygon",
# "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
# "layer_url": "http://192.168.60.24:8888/geoserver/wms?service=WMS&version=1.1.0&request=GetMap&layers=labai:risiko_letusan_gunung_arjuno_bromo&bbox=110.89528623700005,-8.780412043999945,116.26994997700001,-5.042971664999925&width=768&height=534&srs=EPSG:4326&styles=&format=application/openlayers",
# "metadata_url": "http://192.168.60.24:7777/geonetwork/srv/eng/catalog.search#/metadata/9e5e2f09-13ef-49b5-bb49-1cb12136f63b",
# "coverage_level": "provinsi",
# "coverage_area": "kabupaten",
# "data_update_period": "Tahunan",
# "data_version": "2026",
# "is_popular": False,
# "is_active": True,
# "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
# "notes": "Mapset baru dibuat",
# "status_validation": "on_verification",
# }
# await upload_to_main(mapset)
# return successRes(data=result)
# except Exception as e:
# print("errot", e)
# ===================================
# partition +VIEW
# ===================================
# Daftar prefix WKT yang valid
# VALID_WKT_PREFIXES = ("POINT", "LINESTRING", "POLYGON", "MULTIPOLYGON", "MULTILINESTRING")
def slugify(value: str) -> str:
"""Mengubah judul dataset jadi nama aman untuk VIEW"""
return re.sub(r'[^a-zA-Z0-9]+', '_', value.lower()).strip('_')
# Partition + VIEW
# async def create_dataset_view_from_metadata(conn, metadata_id: int, user_id: int, title: str):
# norm_title = slugify(title)
# view_name = f"v_user_{user_id}_{norm_title}"
# base_table = f"test_partition_user_{user_id}"
# # Ambil daftar field
# result = await conn.execute(text("SELECT fields FROM dataset_metadata WHERE id=:mid"), {"mid": metadata_id})
# fields_json = result.scalar_one_or_none()
# base_columns = {"id", "user_id", "metadata_id", "geom"}
# columns_sql = ""
# field_list = []
# if fields_json:
# fields = json.loads(fields_json) if isinstance(fields_json, str) else fields_json
# field_list = fields
# for f in field_list:
# safe_col = slugify(f)
# alias_name = safe_col if safe_col not in base_columns else f"attr_{safe_col}"
# # CAST otomatis
# if safe_col in ["longitude", "latitude", "lon", "lat"]:
# columns_sql += f", (p.attributes->>'{f}')::float AS {alias_name}"
# else:
# columns_sql += f", p.attributes->>'{f}' AS {alias_name}"
# # Drop view lama
# await conn.execute(text(f"DROP VIEW IF EXISTS {view_name} CASCADE;"))
# # 🔥 Buat VIEW baru yang punya FID unik
# create_view_query = f"""
# CREATE OR REPLACE VIEW {view_name} AS
# SELECT
# row_number() OVER() AS fid, -- FID unik untuk QGIS
# p.id,
# p.user_id,
# p.metadata_id,
# p.geom
# {columns_sql},
# m.title,
# m.year,
# m.description
# FROM {base_table} p
# JOIN dataset_metadata m ON m.id = p.metadata_id
# WHERE p.metadata_id = {metadata_id};
# """
# await conn.execute(text(create_view_query))
# # Register geometry untuk QGIS
# await conn.execute(text(f"DELETE FROM geometry_columns WHERE f_table_name = '{view_name}';"))
# await conn.execute(text(f"""
# INSERT INTO geometry_columns
# (f_table_schema, f_table_name, f_geometry_column, coord_dimension, srid, type)
# VALUES ('public', '{view_name}', 'geom', 2, 4326, 'GEOMETRY');
# """))
# print(f"[INFO] VIEW {view_name} dibuat dengan FID unik dan kompatibel dengan QGIS.")
# async def handle_to_postgis(payload, engine, user_id: int = 3):
# """
# Menangani upload data spasial ke PostGIS (dengan partition per user).
# - Jika partisi belum ada, akan dibuat otomatis
# - Metadata dataset disimpan di tabel dataset_metadata
# - Data spasial dimasukkan ke tabel partisi (test_partition_user_{id})
# - VIEW otomatis dibuat untuk QGIS
# """
# try:
# df = pd.DataFrame(payload.rows)
# print(f"[INFO] Diterima {len(df)} baris data dari frontend.")
# # --- Validasi kolom geometry ---
# if "geometry" not in df.columns:
# raise errorRes(status_code=400, message="Kolom 'geometry' tidak ditemukan dalam data.")
# # --- Parsing geometry ke objek shapely ---
# df["geometry"] = df["geometry"].apply(
# lambda g: wkt.loads(g)
# if isinstance(g, str) and g.strip().upper().startswith(VALID_WKT_PREFIXES)
# else None
# )
# # --- Buat GeoDataFrame ---
# gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
# # --- Metadata info dari payload ---
# # dataset_title = getattr(payload, "dataset_title", None)
# # dataset_year = getattr(payload, "dataset_year", None)
# # dataset_desc = getattr(payload, "dataset_description", None)
# dataset_title = "hujan 2045"
# dataset_year = 2045
# dataset_desc = "test metadata"
# if not dataset_title:
# raise errorRes(status_code=400, detail="Field 'dataset_title' wajib ada untuk metadata.")
# async with engine.begin() as conn:
# fields = [col for col in df.columns if col != "geometry"]
# # 💾 1⃣ Simpan Metadata Dataset
# print("[INFO] Menyimpan metadata dataset...")
# result = await conn.execute(
# text("""
# INSERT INTO dataset_metadata (user_id, title, year, description, fields, created_at)
# VALUES (:user_id, :title, :year, :desc, :fields, :created_at)
# RETURNING id;
# """),
# {
# "user_id": user_id,
# "title": dataset_title,
# "year": dataset_year,
# "desc": dataset_desc,
# "fields": json.dumps(fields),
# "created_at": datetime.utcnow(),
# },
# )
# metadata_id = result.scalar_one()
# print(f"[INFO] Metadata disimpan dengan ID {metadata_id}")
# # ⚙️ 2⃣ Auto-create Partisi Jika Belum Ada
# print(f"[INFO] Memastikan partisi test_partition_user_{user_id} tersedia...")
# await conn.execute(
# text(f"""
# DO $$
# BEGIN
# IF NOT EXISTS (
# SELECT 1 FROM pg_tables WHERE tablename = 'test_partition_user_{user_id}'
# ) THEN
# EXECUTE format('
# CREATE TABLE test_partition_user_%s
# PARTITION OF test_partition
# FOR VALUES IN (%s);
# ', {user_id}, {user_id});
# EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_geom ON test_partition_user_%s USING GIST (geom);', {user_id}, {user_id});
# EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_metadata ON test_partition_user_%s (metadata_id);', {user_id}, {user_id});
# END IF;
# END
# $$;
# """)
# )
# # 🧩 3⃣ Insert Data Spasial ke Partisi
# print(f"[INFO] Memasukkan data ke test_partition_user_{user_id} ...")
# insert_count = 0
# for _, row in gdf.iterrows():
# geom_wkt = row["geometry"].wkt if row["geometry"] is not None else None
# attributes = row.drop(labels=["geometry"]).to_dict()
# await conn.execute(
# text("""
# INSERT INTO test_partition (user_id, metadata_id, geom, attributes, created_at)
# VALUES (:user_id, :metadata_id, ST_Force2D(ST_GeomFromText(:geom, 4326)),
# CAST(:attr AS jsonb), :created_at);
# """),
# {
# "user_id": user_id,
# "metadata_id": metadata_id,
# "geom": geom_wkt,
# "attr": json.dumps(attributes),
# "created_at": datetime.utcnow(),
# },
# )
# insert_count += 1
# # 🧩 4⃣ Membuat VIEW untuk dataset baru di QGIS
# await create_dataset_view_from_metadata(conn, metadata_id, user_id, dataset_title)
# print(f"[INFO] ✅ Berhasil memasukkan {insert_count} baris ke partisi user_id={user_id} (metadata_id={metadata_id}).")
# return {
# "status": "success",
# "user_id": user_id,
# "metadata_id": metadata_id,
# "dataset_title": dataset_title,
# "inserted_rows": insert_count,
# "geometry_type": list(gdf.geom_type.unique()),
# }
# except Exception as e:
# print(f"[ERROR] Gagal upload ke PostGIS partition: {e}")
# raise errorRes(status_code=500, message="Gagal upload ke PostGIS partition", details=str(e))

View File

@ -0,0 +1,70 @@
import pandas as pd
import geopandas as gpd
from shapely import wkt
from shapely.errors import WKTReadingError
def process_dataframe_synchronous(df_input, tmp_file):
"""
Fungsi ini berjalan di thread terpisah (CPU bound).
Melakukan validasi, cleaning, dan export ke parquet.
"""
# 1. Copy agar tidak mengubah data asli
export_df = df_input.copy()
# =========================================================================
# TAHAP 1: SAFE WKT LOADING
# =========================================================================
def safe_load_wkt(raw):
if not isinstance(raw, str):
return None
try:
return wkt.loads(raw)
except (WKTReadingError, Exception):
return None # Return None jika WKT corrupt
# Terapkan safe load
export_df["geom"] = export_df["geometry"].apply(safe_load_wkt)
# =========================================================================
# TAHAP 2: FILTER NULL & INVALID GEOMETRY
# =========================================================================
# Hapus baris di mana konversi WKT gagal (None)
export_df = export_df[export_df["geom"].notnull()]
print("df", export_df)
if export_df.empty:
raise ValueError("Tidak ada data spasial valid yang ditemukan.")
# Jadikan GeoDataFrame
export_df = gpd.GeoDataFrame(export_df, geometry="geom")
# =========================================================================
# TAHAP 3: FIX TOPOLOGY (PENTING!)
# =========================================================================
# Cek validitas (misal: Polygon yang garisnya menabrak diri sendiri)
# buffer(0) adalah trik standar GIS untuk memperbaiki topologi ringan
export_df["geom"] = export_df["geom"].apply(
lambda g: g.buffer(0) if not g.is_valid else g
)
# Hapus lagi jika setelah di-fix malah jadi kosong (jarang terjadi, tapi aman)
export_df = export_df[~export_df["geom"].is_empty]
# =========================================================================
# TAHAP 4: FINALISASI (CRS & RENAME)
# =========================================================================
export_df = export_df.drop(columns=["geometry"]) # Buang kolom string WKT lama
export_df = export_df.set_crs("EPSG:4326", allow_override=True)
# Rename kolom atribut ke UPPERCASE, biarkan 'geom' lowercase
# .strip() untuk membuang spasi hantu (" ID " -> "ID")
export_df = export_df.rename(
columns=lambda c: str(c).strip().upper() if c != "geom" else c
)
# Simpan ke Parquet
export_df.to_parquet(tmp_file)
return len(export_df)
# --- Cara Pemanggilan di Async Function ---
# await asyncio.to_thread(process_dataframe_synchronous, result, tmp_file)