update latest
This commit is contained in:
parent
cc4d897862
commit
c9c641e8e9
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -10,6 +10,7 @@ venv/
|
|||
pdf/
|
||||
data_cache/
|
||||
service_tmp/
|
||||
tmp/
|
||||
testing/
|
||||
test-ai/
|
||||
uploads/
|
||||
|
|
@ -17,6 +18,7 @@ scrapp/
|
|||
logs/
|
||||
style_temp/
|
||||
services/styles/
|
||||
services/pipeline/
|
||||
|
||||
cleansing_func.sql
|
||||
|
||||
|
|
|
|||
|
|
@ -112,9 +112,9 @@ async def job_callback(payload: dict):
|
|||
|
||||
geos_link = publish_layer_to_geoserver(table, job_id)
|
||||
|
||||
uuid = publish_metadata(table_name=table, geoserver_links=geos_link)
|
||||
uuid = await publish_metadata(table_name=table, geoserver_links=geos_link)
|
||||
|
||||
update_job_status(table, "FINISHED", job_id)
|
||||
await update_job_status(table, "FINISHED", job_id)
|
||||
return {
|
||||
"ok": True,
|
||||
"uuid": uuid
|
||||
|
|
@ -225,14 +225,17 @@ async def query_cleansing_data(
|
|||
table_name: str
|
||||
):
|
||||
try:
|
||||
print("clean")
|
||||
async with engine.begin() as conn:
|
||||
await conn.execute(
|
||||
text("CALL pr_cleansing_satupeta_polygon(:table_name, NULL);"),
|
||||
{"table_name": table_name}
|
||||
)
|
||||
print("clean-done")
|
||||
return "done"
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
print("clean-error", e)
|
||||
raise RuntimeError(f"Fix geometry failed: {str(e)}")
|
||||
|
||||
|
||||
|
|
@ -242,20 +245,22 @@ async def publish_layer(table_name: str, job_id: str):
|
|||
|
||||
geos_link = publish_layer_to_geoserver(table_name, job_id)
|
||||
|
||||
uuid = publish_metadata(
|
||||
table_name=table_name,
|
||||
geoserver_links=geos_link
|
||||
)
|
||||
# uuid = await publish_metadata(
|
||||
# table_name=table_name,
|
||||
# geoserver_links=geos_link
|
||||
# )
|
||||
|
||||
# await update_job_status(table_name, "FINISHED", job_id)
|
||||
|
||||
update_job_status(table_name, "FINISHED", job_id)
|
||||
# return uuid
|
||||
return {
|
||||
"geos_link": geos_link["layer_url"],
|
||||
"uuid": uuid
|
||||
# "uuid": uuid
|
||||
"uuid": "123123"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
update_job_status(table_name, "FAILED", job_id)
|
||||
await update_job_status(table_name, "FAILED", job_id)
|
||||
raise RuntimeError(f"Publish layer gagal: {e}") from e
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ async def upload_file(payload: PdfRequest):
|
|||
|
||||
class UploadRequest(BaseModel):
|
||||
title: str
|
||||
path: str
|
||||
rows: List[dict]
|
||||
columns: List[str]
|
||||
author: Dict[str, Any]
|
||||
|
|
|
|||
|
|
@ -11,6 +11,13 @@ SERVICE_KEY = os.getenv("SERVICE_KEY")
|
|||
|
||||
POSTGIS_URL = os.getenv("POSTGIS_URL")
|
||||
POSTGIS_SYNC_URL = os.getenv("SYNC_URL")
|
||||
DB_DSN = os.getenv("DB_DSN")
|
||||
|
||||
DB_HOST = os.getenv("DB_HOST")
|
||||
DB_PORT = os.getenv("DB_PORT")
|
||||
DB_NAME = os.getenv("DB_NAME")
|
||||
DB_USER = os.getenv("DB_USER")
|
||||
DB_PASS = os.getenv("DB_PASS")
|
||||
|
||||
QGIS_URL = os.getenv("QGIS_API_URL")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
from core.config import POSTGIS_URL, POSTGIS_SYNC_URL
|
||||
|
||||
engine = create_async_engine(POSTGIS_URL, pool_pre_ping=True)
|
||||
|
|
|
|||
25
main.py
25
main.py
|
|
@ -1,16 +1,35 @@
|
|||
from contextlib import asynccontextmanager
|
||||
import asyncpg
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from core.config import API_VERSION, ALLOWED_ORIGINS
|
||||
from core.config import API_VERSION, ALLOWED_ORIGINS, DB_DSN
|
||||
from api.routers.system_router import router as system_router
|
||||
from api.routers.upload_file_router import router as upload_router
|
||||
from api.routers.datasets_router import router as dataset_router
|
||||
from services.pipeline.api.router import router as pipeline_router
|
||||
# from contextlib import asynccontextmanager
|
||||
# from utils.qgis_init import init_qgis
|
||||
|
||||
|
||||
db_pool = None
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# 1. Start: Buat Pool koneksi saat aplikasi nyala
|
||||
global db_pool
|
||||
print("Creating DB Pool...")
|
||||
db_pool = await asyncpg.create_pool(DB_DSN, min_size=5, max_size=20)
|
||||
yield
|
||||
# 2. Shutdown: Tutup Pool saat aplikasi mati
|
||||
print("Closing DB Pool...")
|
||||
await db_pool.close()
|
||||
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="ETL Geo Upload Service",
|
||||
version=API_VERSION,
|
||||
description="Upload Automation API"
|
||||
description="Upload Automation API",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
|
|
@ -21,6 +40,7 @@ app.add_middleware(
|
|||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
# Base.metadata.create_all(bind=engine)
|
||||
|
||||
# qgis setup
|
||||
|
|
@ -56,3 +76,4 @@ app.add_middleware(
|
|||
app.include_router(system_router, tags=["System"])
|
||||
app.include_router(upload_router, prefix="/upload", tags=["Upload"])
|
||||
app.include_router(dataset_router, prefix="/dataset", tags=["Upload"])
|
||||
app.include_router(pipeline_router)
|
||||
9
media/Tex/9a4b37ec36e3ac09.tex
Normal file
9
media/Tex/9a4b37ec36e3ac09.tex
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
\documentclass[preview]{standalone}
|
||||
\usepackage[english]{babel}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{amssymb}
|
||||
\begin{document}
|
||||
\begin{align*}
|
||||
A = \pi r^2
|
||||
\end{align*}
|
||||
\end{document}
|
||||
54
media/texts/4be1eeeac36704e2.svg
Normal file
54
media/texts/4be1eeeac36704e2.svg
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="854" height="480" viewBox="0 0 854 480">
|
||||
<defs>
|
||||
<g>
|
||||
<g id="glyph-0-0">
|
||||
<path d="M 0.214844 -0.242188 C 0.734375 -0.296875 1.0625 -0.394531 1.191406 -0.53125 C 1.320312 -0.667969 1.386719 -1.023438 1.386719 -1.601562 L 1.386719 -7.375 C 1.386719 -7.851562 1.3125 -8.164062 1.167969 -8.308594 C 1.023438 -8.449219 0.707031 -8.539062 0.214844 -8.574219 L 0.214844 -8.820312 L 3.917969 -8.820312 L 3.917969 -8.574219 C 3.429688 -8.539062 3.109375 -8.449219 2.960938 -8.308594 C 2.8125 -8.164062 2.742188 -7.855469 2.742188 -7.375 L 2.742188 -4.777344 L 6.785156 -4.777344 L 6.785156 -7.375 C 6.785156 -7.851562 6.710938 -8.164062 6.570312 -8.308594 C 6.425781 -8.449219 6.105469 -8.539062 5.613281 -8.574219 L 5.613281 -8.820312 L 9.316406 -8.820312 L 9.316406 -8.574219 C 8.824219 -8.539062 8.507812 -8.449219 8.363281 -8.308594 C 8.21875 -8.164062 8.144531 -7.855469 8.144531 -7.375 L 8.144531 -1.445312 C 8.144531 -0.960938 8.21875 -0.648438 8.363281 -0.511719 C 8.507812 -0.375 8.824219 -0.285156 9.316406 -0.242188 L 9.316406 0 L 5.613281 0 L 5.613281 -0.242188 C 6.136719 -0.292969 6.464844 -0.386719 6.589844 -0.527344 C 6.71875 -0.667969 6.785156 -1.023438 6.785156 -1.601562 L 6.785156 -4.191406 L 2.742188 -4.191406 L 2.742188 -1.445312 C 2.742188 -0.960938 2.816406 -0.648438 2.960938 -0.507812 C 3.109375 -0.367188 3.429688 -0.28125 3.917969 -0.242188 L 3.917969 0 L 0.214844 0 Z "/>
|
||||
</g>
|
||||
<g id="glyph-0-1">
|
||||
<path d="M 3.820312 -3.605469 C 3.320312 -3.441406 2.910156 -3.257812 2.585938 -3.058594 C 1.960938 -2.671875 1.648438 -2.234375 1.648438 -1.746094 C 1.648438 -1.351562 1.777344 -1.058594 2.039062 -0.871094 C 2.207031 -0.75 2.394531 -0.691406 2.605469 -0.691406 C 2.890625 -0.691406 3.164062 -0.769531 3.425781 -0.929688 C 3.691406 -1.089844 3.820312 -1.296875 3.820312 -1.542969 Z M 0.488281 -1.296875 C 0.488281 -1.925781 0.804688 -2.449219 1.433594 -2.871094 C 1.832031 -3.132812 2.628906 -3.484375 3.820312 -3.933594 L 3.820312 -4.484375 C 3.820312 -4.929688 3.777344 -5.238281 3.691406 -5.410156 C 3.542969 -5.699219 3.238281 -5.847656 2.773438 -5.847656 C 2.550781 -5.847656 2.339844 -5.789062 2.140625 -5.675781 C 1.941406 -5.558594 1.84375 -5.398438 1.84375 -5.195312 C 1.84375 -5.144531 1.851562 -5.054688 1.875 -4.929688 C 1.898438 -4.808594 1.90625 -4.730469 1.90625 -4.695312 C 1.90625 -4.453125 1.828125 -4.28125 1.667969 -4.1875 C 1.574219 -4.128906 1.46875 -4.101562 1.339844 -4.101562 C 1.144531 -4.101562 0.996094 -4.164062 0.890625 -4.292969 C 0.789062 -4.421875 0.734375 -4.5625 0.734375 -4.71875 C 0.734375 -5.023438 0.921875 -5.339844 1.296875 -5.671875 C 1.671875 -6.003906 2.222656 -6.171875 2.949219 -6.171875 C 3.792969 -6.171875 4.363281 -5.898438 4.660156 -5.351562 C 4.820312 -5.050781 4.902344 -4.617188 4.902344 -4.042969 L 4.902344 -1.433594 C 4.902344 -1.179688 4.917969 -1.007812 4.953125 -0.910156 C 5.011719 -0.742188 5.128906 -0.65625 5.304688 -0.65625 C 5.40625 -0.65625 5.488281 -0.671875 5.554688 -0.703125 C 5.617188 -0.734375 5.730469 -0.808594 5.890625 -0.925781 L 5.890625 -0.585938 C 5.753906 -0.417969 5.601562 -0.277344 5.441406 -0.167969 C 5.199219 -0.00390625 4.953125 0.078125 4.699219 0.078125 C 4.40625 0.078125 4.191406 -0.015625 4.058594 -0.207031 C 3.925781 -0.398438 3.855469 -0.628906 3.839844 -0.890625 C 3.511719 -0.605469 3.230469 -0.394531 2.996094 -0.253906 C 2.601562 -0.0195312 2.222656 0.0976562 1.867188 0.0976562 C 1.496094 0.0976562 1.171875 -0.0351562 0.898438 -0.296875 C 0.625 -0.558594 0.488281 -0.890625 0.488281 -1.296875 Z "/>
|
||||
</g>
|
||||
<g id="glyph-0-2">
|
||||
<path d="M 0.273438 -0.183594 C 0.675781 -0.222656 0.949219 -0.300781 1.09375 -0.425781 C 1.238281 -0.550781 1.308594 -0.792969 1.308594 -1.152344 L 1.308594 -7.511719 C 1.308594 -7.800781 1.285156 -7.996094 1.238281 -8.105469 C 1.152344 -8.289062 0.972656 -8.378906 0.710938 -8.378906 C 0.648438 -8.378906 0.582031 -8.371094 0.511719 -8.359375 C 0.441406 -8.347656 0.347656 -8.328125 0.242188 -8.300781 L 0.242188 -8.515625 C 0.828125 -8.671875 1.53125 -8.878906 2.355469 -9.140625 C 2.386719 -9.140625 2.40625 -9.128906 2.410156 -9.101562 C 2.417969 -9.074219 2.421875 -9.019531 2.421875 -8.933594 L 2.421875 -1.125 C 2.421875 -0.75 2.488281 -0.503906 2.617188 -0.394531 C 2.746094 -0.285156 3.015625 -0.210938 3.425781 -0.183594 L 3.425781 0 L 0.273438 0 Z "/>
|
||||
</g>
|
||||
<g id="glyph-0-3">
|
||||
<path d="M 0.339844 -3.019531 C 0.339844 -3.882812 0.613281 -4.613281 1.160156 -5.210938 C 1.710938 -5.808594 2.417969 -6.105469 3.28125 -6.105469 C 4.140625 -6.105469 4.851562 -5.824219 5.417969 -5.261719 C 5.980469 -4.695312 6.261719 -3.945312 6.261719 -3.007812 C 6.261719 -2.144531 5.988281 -1.394531 5.441406 -0.753906 C 4.894531 -0.117188 4.1875 0.203125 3.320312 0.203125 C 2.488281 0.203125 1.78125 -0.105469 1.203125 -0.714844 C 0.625 -1.328125 0.339844 -2.097656 0.339844 -3.019531 Z M 3.097656 -5.714844 C 2.753906 -5.714844 2.457031 -5.601562 2.207031 -5.378906 C 1.773438 -4.984375 1.554688 -4.300781 1.554688 -3.332031 C 1.554688 -2.558594 1.730469 -1.839844 2.078125 -1.171875 C 2.429688 -0.503906 2.914062 -0.167969 3.535156 -0.167969 C 4.019531 -0.167969 4.394531 -0.394531 4.65625 -0.839844 C 4.921875 -1.285156 5.050781 -1.871094 5.050781 -2.597656 C 5.050781 -3.347656 4.886719 -4.054688 4.550781 -4.71875 C 4.214844 -5.382812 3.734375 -5.714844 3.097656 -5.714844 Z "/>
|
||||
</g>
|
||||
<g id="glyph-0-4">
|
||||
<rect x="0" y="0" width="0" height="0" mask="url(#mask-0)"/>
|
||||
</g>
|
||||
<g id="glyph-0-5">
|
||||
<path d="M 0.148438 -0.242188 C 0.699219 -0.296875 1.054688 -0.421875 1.210938 -0.613281 C 1.367188 -0.808594 1.445312 -1.257812 1.445312 -1.960938 L 1.445312 -7.375 C 1.445312 -7.859375 1.371094 -8.171875 1.21875 -8.316406 C 1.066406 -8.460938 0.71875 -8.546875 0.183594 -8.574219 L 0.183594 -8.820312 L 2.820312 -8.820312 L 5.90625 -2.097656 L 8.855469 -8.820312 L 11.511719 -8.820312 L 11.511719 -8.574219 C 11.015625 -8.539062 10.695312 -8.449219 10.554688 -8.304688 C 10.410156 -8.160156 10.339844 -7.847656 10.339844 -7.375 L 10.339844 -1.445312 C 10.339844 -0.960938 10.410156 -0.648438 10.554688 -0.511719 C 10.695312 -0.375 11.015625 -0.285156 11.511719 -0.242188 L 11.511719 0 L 7.773438 0 L 7.773438 -0.242188 C 8.3125 -0.285156 8.648438 -0.378906 8.777344 -0.53125 C 8.910156 -0.679688 8.976562 -1.039062 8.976562 -1.601562 L 8.976562 -7.589844 L 5.566406 0 L 5.382812 0 L 2.03125 -7.277344 L 2.03125 -1.960938 C 2.03125 -1.230469 2.136719 -0.753906 2.351562 -0.535156 C 2.488281 -0.390625 2.800781 -0.292969 3.28125 -0.242188 L 3.28125 0 L 0.148438 0 Z "/>
|
||||
</g>
|
||||
<g id="glyph-0-6">
|
||||
<path d="M 0.242188 -0.183594 C 0.550781 -0.222656 0.765625 -0.296875 0.886719 -0.414062 C 1.011719 -0.527344 1.074219 -0.785156 1.074219 -1.183594 L 1.074219 -4.484375 C 1.074219 -4.761719 1.046875 -4.957031 0.996094 -5.070312 C 0.914062 -5.234375 0.746094 -5.320312 0.488281 -5.320312 C 0.449219 -5.320312 0.410156 -5.316406 0.367188 -5.3125 C 0.328125 -5.308594 0.277344 -5.300781 0.214844 -5.292969 L 0.214844 -5.519531 C 0.394531 -5.574219 0.8125 -5.707031 1.476562 -5.925781 L 2.089844 -6.125 C 2.121094 -6.125 2.136719 -6.117188 2.144531 -6.09375 C 2.152344 -6.070312 2.15625 -6.042969 2.15625 -6.003906 L 2.15625 -5.046875 C 2.554688 -5.417969 2.867188 -5.675781 3.09375 -5.8125 C 3.429688 -6.027344 3.78125 -6.132812 4.148438 -6.132812 C 4.441406 -6.132812 4.710938 -6.046875 4.953125 -5.878906 C 5.421875 -5.550781 5.65625 -4.960938 5.65625 -4.113281 L 5.65625 -1.074219 C 5.65625 -0.761719 5.71875 -0.535156 5.847656 -0.398438 C 5.972656 -0.257812 6.183594 -0.1875 6.476562 -0.183594 L 6.476562 0 L 3.699219 0 L 3.699219 -0.183594 C 4.015625 -0.226562 4.234375 -0.3125 4.363281 -0.445312 C 4.488281 -0.578125 4.550781 -0.867188 4.550781 -1.308594 L 4.550781 -4.089844 C 4.550781 -4.460938 4.480469 -4.769531 4.34375 -5.015625 C 4.203125 -5.261719 3.949219 -5.382812 3.574219 -5.382812 C 3.316406 -5.382812 3.058594 -5.296875 2.792969 -5.125 C 2.644531 -5.023438 2.453125 -4.859375 2.21875 -4.628906 L 2.21875 -0.984375 C 2.21875 -0.671875 2.289062 -0.460938 2.429688 -0.355469 C 2.566406 -0.25 2.785156 -0.191406 3.085938 -0.183594 L 3.085938 0 L 0.242188 0 Z "/>
|
||||
</g>
|
||||
<g id="glyph-0-7">
|
||||
<path d="M 1.09375 -8.40625 C 1.09375 -8.59375 1.160156 -8.753906 1.289062 -8.886719 C 1.417969 -9.019531 1.578125 -9.089844 1.769531 -9.089844 C 1.957031 -9.089844 2.117188 -9.023438 2.25 -8.890625 C 2.382812 -8.757812 2.449219 -8.597656 2.449219 -8.40625 C 2.449219 -8.21875 2.382812 -8.058594 2.25 -7.925781 C 2.117188 -7.792969 1.957031 -7.726562 1.769531 -7.726562 C 1.578125 -7.726562 1.417969 -7.792969 1.289062 -7.925781 C 1.160156 -8.058594 1.09375 -8.21875 1.09375 -8.40625 Z M 0.261719 -0.183594 C 0.726562 -0.226562 1.019531 -0.304688 1.140625 -0.417969 C 1.261719 -0.535156 1.320312 -0.847656 1.320312 -1.355469 L 1.320312 -4.460938 C 1.320312 -4.742188 1.300781 -4.9375 1.261719 -5.046875 C 1.199219 -5.222656 1.0625 -5.3125 0.851562 -5.3125 C 0.804688 -5.3125 0.757812 -5.308594 0.710938 -5.300781 C 0.667969 -5.292969 0.535156 -5.257812 0.320312 -5.195312 L 0.320312 -5.398438 L 0.597656 -5.488281 C 1.359375 -5.734375 1.886719 -5.921875 2.1875 -6.046875 C 2.308594 -6.101562 2.386719 -6.125 2.421875 -6.125 C 2.429688 -6.09375 2.433594 -6.0625 2.433594 -6.027344 L 2.433594 -1.355469 C 2.433594 -0.859375 2.496094 -0.550781 2.613281 -0.421875 C 2.734375 -0.296875 3.003906 -0.21875 3.425781 -0.183594 L 3.425781 0 L 0.261719 0 Z "/>
|
||||
</g>
|
||||
<g id="glyph-0-8">
|
||||
<path d="M 0.214844 -0.167969 C 0.554688 -0.199219 0.777344 -0.257812 0.890625 -0.339844 C 1.066406 -0.464844 1.152344 -0.714844 1.152344 -1.09375 L 1.152344 -4.460938 C 1.152344 -4.78125 1.109375 -4.992188 1.023438 -5.089844 C 0.941406 -5.191406 0.800781 -5.242188 0.605469 -5.242188 C 0.515625 -5.242188 0.445312 -5.238281 0.398438 -5.226562 C 0.355469 -5.21875 0.300781 -5.203125 0.242188 -5.183594 L 0.242188 -5.410156 L 0.710938 -5.566406 C 0.878906 -5.621094 1.15625 -5.726562 1.542969 -5.871094 C 1.929688 -6.019531 2.132812 -6.09375 2.15625 -6.09375 C 2.175781 -6.09375 2.191406 -6.082031 2.195312 -6.0625 C 2.199219 -6.039062 2.199219 -6 2.199219 -5.9375 L 2.199219 -5.058594 C 2.628906 -5.449219 3 -5.71875 3.3125 -5.867188 C 3.625 -6.019531 3.949219 -6.09375 4.277344 -6.09375 C 4.722656 -6.09375 5.082031 -5.941406 5.34375 -5.636719 C 5.484375 -5.472656 5.597656 -5.25 5.691406 -4.96875 C 6.011719 -5.292969 6.292969 -5.535156 6.53125 -5.691406 C 6.941406 -5.960938 7.363281 -6.09375 7.792969 -6.09375 C 8.492188 -6.09375 8.957031 -5.808594 9.191406 -5.242188 C 9.328125 -4.921875 9.394531 -4.410156 9.394531 -3.71875 L 9.394531 -1.015625 C 9.394531 -0.707031 9.460938 -0.496094 9.597656 -0.386719 C 9.734375 -0.277344 9.980469 -0.203125 10.339844 -0.167969 L 10.339844 0 L 7.402344 0 L 7.402344 -0.183594 C 7.78125 -0.21875 8.027344 -0.292969 8.148438 -0.410156 C 8.265625 -0.527344 8.328125 -0.765625 8.328125 -1.125 L 8.328125 -3.933594 C 8.328125 -4.355469 8.28125 -4.664062 8.191406 -4.863281 C 8.03125 -5.21875 7.714844 -5.398438 7.246094 -5.398438 C 6.964844 -5.398438 6.683594 -5.304688 6.40625 -5.117188 C 6.246094 -5.007812 6.046875 -4.835938 5.8125 -4.597656 L 5.8125 -1.261719 C 5.8125 -0.910156 5.875 -0.644531 6 -0.460938 C 6.125 -0.28125 6.382812 -0.183594 6.785156 -0.167969 L 6.785156 0 L 3.796875 0 L 3.796875 -0.167969 C 4.207031 -0.222656 4.46875 -0.320312 4.582031 -0.46875 C 4.695312 -0.617188 4.753906 -0.980469 4.753906 -1.554688 L 4.753906 -3.378906 C 4.753906 -4.046875 4.710938 -4.507812 4.621094 -4.757812 C 4.480469 -5.183594 4.175781 -5.398438 3.710938 -5.398438 C 3.445312 -5.398438 3.1875 -5.324219 2.929688 -5.179688 C 2.671875 -5.035156 2.449219 -4.84375 2.253906 -4.609375 L 2.253906 -1.046875 C 2.253906 -0.71875 2.308594 -0.492188 2.425781 -0.363281 C 2.539062 -0.238281 2.789062 -0.171875 3.175781 -0.167969 L 3.175781 0 L 0.214844 0 Z "/>
|
||||
</g>
|
||||
<g id="glyph-0-9">
|
||||
<path d="M 2.433594 0.144531 C 2.246094 0.144531 2.082031 0.078125 1.941406 -0.0507812 C 1.796875 -0.183594 1.726562 -0.347656 1.726562 -0.554688 C 1.726562 -0.75 1.792969 -0.917969 1.933594 -1.0625 C 2.074219 -1.203125 2.242188 -1.277344 2.449219 -1.277344 C 2.65625 -1.277344 2.828125 -1.203125 2.96875 -1.058594 C 3.109375 -0.914062 3.175781 -0.746094 3.175781 -0.554688 C 3.175781 -0.363281 3.105469 -0.199219 2.960938 -0.0625 C 2.820312 0.0742188 2.644531 0.144531 2.433594 0.144531 Z M 3.144531 -7.949219 C 3.144531 -7.871094 3.140625 -7.792969 3.136719 -7.71875 C 3.132812 -7.640625 3.125 -7.550781 3.113281 -7.449219 L 2.820312 -4.980469 L 2.511719 -2.34375 L 2.34375 -2.34375 L 2.21875 -3.777344 C 2.183594 -4.183594 2.089844 -4.964844 1.933594 -6.113281 C 1.792969 -7.117188 1.726562 -7.722656 1.726562 -7.929688 C 1.726562 -8.199219 1.777344 -8.445312 1.882812 -8.671875 C 1.984375 -8.898438 2.179688 -9.011719 2.460938 -9.011719 C 2.75 -9.011719 2.953125 -8.855469 3.058594 -8.546875 C 3.117188 -8.386719 3.144531 -8.1875 3.144531 -7.949219 Z "/>
|
||||
</g>
|
||||
</g>
|
||||
<image id="source-9" x="0" y="0" width="0" height="0"/>
|
||||
<mask id="mask-0">
|
||||
<use xlink:href="#source-9"/>
|
||||
</mask>
|
||||
</defs>
|
||||
<g fill="rgb(100%, 100%, 100%)" fill-opacity="1">
|
||||
<use xlink:href="#glyph-0-0" x="30" y="30"/>
|
||||
<use xlink:href="#glyph-0-1" x="40" y="30"/>
|
||||
<use xlink:href="#glyph-0-2" x="46" y="30"/>
|
||||
<use xlink:href="#glyph-0-3" x="50" y="30"/>
|
||||
<use xlink:href="#glyph-0-4" x="57" y="30"/>
|
||||
<use xlink:href="#glyph-0-5" x="60" y="30"/>
|
||||
<use xlink:href="#glyph-0-1" x="72" y="30"/>
|
||||
<use xlink:href="#glyph-0-6" x="78" y="30"/>
|
||||
<use xlink:href="#glyph-0-7" x="85" y="30"/>
|
||||
<use xlink:href="#glyph-0-8" x="89" y="30"/>
|
||||
<use xlink:href="#glyph-0-9" x="99" y="30"/>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 13 KiB |
|
|
@ -18,3 +18,5 @@ asyncpg
|
|||
psycopg2
|
||||
python-multipart==0.0.22
|
||||
pyarrow==21.0.0
|
||||
subprocess
|
||||
openpyxl
|
||||
BIN
services/.DS_Store
vendored
BIN
services/.DS_Store
vendored
Binary file not shown.
|
|
@ -1,9 +1,9 @@
|
|||
from datetime import datetime
|
||||
from sqlalchemy import text
|
||||
from database.connection import sync_engine
|
||||
from database.connection import engine
|
||||
from utils.logger_config import log_activity
|
||||
|
||||
def update_job_status(table_name: str, status: str, job_id: str = None):
|
||||
async def update_job_status(table_name: str, status: str, job_id: str = None):
|
||||
query = text("""
|
||||
UPDATE backend.author_metadata
|
||||
SET process = :status,
|
||||
|
|
@ -17,12 +17,7 @@ def update_job_status(table_name: str, status: str, job_id: str = None):
|
|||
"table_name": table_name
|
||||
}
|
||||
|
||||
with sync_engine.begin() as conn:
|
||||
conn.execute(query, params)
|
||||
async with engine.begin() as conn:
|
||||
await conn.execute(query, params)
|
||||
|
||||
print(f"[DB] Metadata '{table_name}' updated to status '{status}'")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ from fastapi import HTTPException
|
|||
import requests
|
||||
from sqlalchemy import text
|
||||
from core.config import GEONETWORK_PASS, GEONETWORK_URL, GEONETWORK_USER
|
||||
from database.connection import sync_engine as engine
|
||||
from database.connection import engine
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
import re
|
||||
|
|
@ -54,7 +54,7 @@ def fix_xml_urls(xml: str) -> str:
|
|||
|
||||
|
||||
|
||||
def get_extent(table_name: str):
|
||||
async def get_extent(table_name: str):
|
||||
|
||||
sql = f"""
|
||||
SELECT
|
||||
|
|
@ -62,24 +62,22 @@ def get_extent(table_name: str):
|
|||
ST_XMax(extent), ST_YMax(extent)
|
||||
FROM (
|
||||
SELECT ST_Extent(geom) AS extent
|
||||
FROM public.{table_name}
|
||||
FROM public."{table_name}"
|
||||
) AS box;
|
||||
"""
|
||||
|
||||
conn = engine.connect()
|
||||
try:
|
||||
row = conn.execute(text(sql)).fetchone()
|
||||
finally:
|
||||
conn.close()
|
||||
async with engine.connect() as conn:
|
||||
result = await conn.execute(sql)
|
||||
row = result.fetchone()
|
||||
|
||||
if not row or row[0] is None:
|
||||
return None
|
||||
|
||||
# return {
|
||||
# "xmin": float(row[0]),
|
||||
# "ymin": float(row[1]),
|
||||
# "xmax": float(row[2]),
|
||||
# "ymax": float(row[3])
|
||||
# "xmin": row[0],
|
||||
# "ymin": row[1],
|
||||
# "xmax": row[2],
|
||||
# "ymax": row[3]
|
||||
# }
|
||||
|
||||
return {
|
||||
|
|
@ -89,7 +87,7 @@ def get_extent(table_name: str):
|
|||
"ymax": -5.4819 # north
|
||||
}
|
||||
|
||||
def get_author_metadata(table_name: str):
|
||||
async def get_author_metadata(table_name: str):
|
||||
|
||||
sql = """
|
||||
SELECT am.table_title, am.dataset_title, am.dataset_abstract, am.keywords, am.date_created,
|
||||
|
|
@ -106,15 +104,14 @@ def get_author_metadata(table_name: str):
|
|||
LIMIT 1
|
||||
"""
|
||||
|
||||
conn = engine.connect()
|
||||
try:
|
||||
row = conn.execute(text(sql), {"table": table_name}).fetchone()
|
||||
finally:
|
||||
conn.close()
|
||||
async with engine.connect() as conn:
|
||||
result = await conn.execute(sql, {"table": table_name})
|
||||
row = result.fetchone()
|
||||
|
||||
if not row:
|
||||
raise Exception(f"Tidak ada metadata untuk tabel: {table_name}")
|
||||
|
||||
# SQLAlchemy Async row support ._mapping untuk convert ke dict
|
||||
return dict(row._mapping)
|
||||
|
||||
|
||||
|
|
@ -628,10 +625,10 @@ def upload_metadata_to_geonetwork(xml_metadata: str):
|
|||
|
||||
|
||||
|
||||
def publish_metadata(table_name: str, geoserver_links: dict):
|
||||
async def publish_metadata(table_name: str, geoserver_links: dict):
|
||||
|
||||
extent = get_extent(table_name)
|
||||
meta = get_author_metadata(table_name)
|
||||
extent = await get_extent(table_name)
|
||||
meta = await get_author_metadata(table_name)
|
||||
xml = generate_metadata_xml(
|
||||
table_name=meta["dataset_title"],
|
||||
meta=meta,
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ from fastapi import HTTPException
|
|||
import requests
|
||||
from sqlalchemy import text
|
||||
from core.config import GEONETWORK_PASS, GEONETWORK_URL, GEONETWORK_USER
|
||||
from database.connection import sync_engine as engine
|
||||
from database.connection import engine
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
import re
|
||||
|
|
@ -54,7 +54,7 @@ def fix_xml_urls(xml: str) -> str:
|
|||
|
||||
|
||||
|
||||
def get_extent(table_name: str):
|
||||
async def get_extent(table_name: str):
|
||||
|
||||
sql = f"""
|
||||
SELECT
|
||||
|
|
@ -62,24 +62,22 @@ def get_extent(table_name: str):
|
|||
ST_XMax(extent), ST_YMax(extent)
|
||||
FROM (
|
||||
SELECT ST_Extent(geom) AS extent
|
||||
FROM public.{table_name}
|
||||
FROM public."{table_name}"
|
||||
) AS box;
|
||||
"""
|
||||
|
||||
conn = engine.connect()
|
||||
try:
|
||||
row = conn.execute(text(sql)).fetchone()
|
||||
finally:
|
||||
conn.close()
|
||||
async with engine.connect() as conn:
|
||||
result = await conn.execute(sql)
|
||||
row = result.fetchone()
|
||||
|
||||
if not row or row[0] is None:
|
||||
return None
|
||||
|
||||
# return {
|
||||
# "xmin": float(row[0]),
|
||||
# "ymin": float(row[1]),
|
||||
# "xmax": float(row[2]),
|
||||
# "ymax": float(row[3])
|
||||
# "xmin": row[0],
|
||||
# "ymin": row[1],
|
||||
# "xmax": row[2],
|
||||
# "ymax": row[3]
|
||||
# }
|
||||
|
||||
return {
|
||||
|
|
@ -89,7 +87,7 @@ def get_extent(table_name: str):
|
|||
"ymax": -5.4819 # north
|
||||
}
|
||||
|
||||
def get_author_metadata(table_name: str):
|
||||
async def get_author_metadata(table_name: str):
|
||||
|
||||
sql = """
|
||||
SELECT am.table_title, am.dataset_title, am.dataset_abstract, am.keywords, am.date_created,
|
||||
|
|
@ -106,15 +104,14 @@ def get_author_metadata(table_name: str):
|
|||
LIMIT 1
|
||||
"""
|
||||
|
||||
conn = engine.connect()
|
||||
try:
|
||||
row = conn.execute(text(sql), {"table": table_name}).fetchone()
|
||||
finally:
|
||||
conn.close()
|
||||
async with engine.connect() as conn:
|
||||
result = await conn.execute(sql, {"table": table_name})
|
||||
row = result.fetchone()
|
||||
|
||||
if not row:
|
||||
raise Exception(f"Tidak ada metadata untuk tabel: {table_name}")
|
||||
|
||||
# SQLAlchemy Async row support ._mapping untuk convert ke dict
|
||||
return dict(row._mapping)
|
||||
|
||||
|
||||
|
|
@ -626,10 +623,10 @@ def upload_metadata_to_geonetwork(xml_metadata: str):
|
|||
|
||||
|
||||
|
||||
def publish_metadata(table_name: str, geoserver_links: dict):
|
||||
async def publish_metadata(table_name: str, geoserver_links: dict):
|
||||
|
||||
extent = get_extent(table_name)
|
||||
meta = get_author_metadata(table_name)
|
||||
extent = await get_extent(table_name)
|
||||
meta = await get_author_metadata(table_name)
|
||||
xml = generate_metadata_xml(
|
||||
table_name=meta["dataset_title"],
|
||||
meta=meta,
|
||||
|
|
|
|||
BIN
services/upload_file/.DS_Store
vendored
BIN
services/upload_file/.DS_Store
vendored
Binary file not shown.
|
|
@ -32,10 +32,96 @@ def detect_delimiter(path, sample_size=2048):
|
|||
return ','
|
||||
|
||||
|
||||
# def read_csv(path: str, sheet: str = None):
|
||||
# ext = os.path.splitext(path)[1].lower()
|
||||
|
||||
# try:
|
||||
# if ext in ['.csv']:
|
||||
# header_line = detect_header_line(path)
|
||||
# delimiter = detect_delimiter(path)
|
||||
# print(f"[INFO] Detected header line: {header_line + 1}, delimiter: '{delimiter}'")
|
||||
|
||||
# df = pd.read_csv(
|
||||
# path,
|
||||
# header=header_line,
|
||||
# sep=delimiter,
|
||||
# encoding='utf-8',
|
||||
# low_memory=False,
|
||||
# thousands=','
|
||||
# )
|
||||
|
||||
# elif ext in ['.xlsx', '.xls']:
|
||||
# print(f"[INFO] Membaca file Excel: {os.path.basename(path)}")
|
||||
# xls = pd.ExcelFile(path)
|
||||
# print(f"[INFO] Ditemukan {len(xls.sheet_names)} sheet: {xls.sheet_names}")
|
||||
|
||||
# if sheet:
|
||||
# if sheet not in xls.sheet_names:
|
||||
# raise ValueError(f"Sheet '{sheet}' tidak ditemukan dalam file {os.path.basename(path)}")
|
||||
# print(f"[INFO] Membaca sheet yang ditentukan: '{sheet}'")
|
||||
# df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str)
|
||||
# df = df.dropna(how='all').dropna(axis=1, how='all')
|
||||
|
||||
# else:
|
||||
# print("[INFO] Tidak ada sheet yang ditentukan, mencari sheet paling relevan...")
|
||||
# best_sheet = None
|
||||
# best_score = -1
|
||||
# best_df = None
|
||||
|
||||
# for sheet_name in xls.sheet_names:
|
||||
# try:
|
||||
# temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str)
|
||||
# temp_df = temp_df.dropna(how='all').dropna(axis=1, how='all')
|
||||
|
||||
# if len(temp_df) == 0 or len(temp_df.columns) < 2:
|
||||
# continue
|
||||
|
||||
# # hitung skor relevansi
|
||||
# text_ratio = temp_df.applymap(lambda x: isinstance(x, str)).sum().sum() / (temp_df.size or 1)
|
||||
# row_score = len(temp_df)
|
||||
# score = (row_score * 0.7) + (text_ratio * 100)
|
||||
|
||||
# if score > best_score:
|
||||
# best_score = score
|
||||
# best_sheet = sheet_name
|
||||
# best_df = temp_df
|
||||
|
||||
# except Exception as e:
|
||||
# print(f"[WARN] Gagal membaca sheet {sheet_name}: {e}")
|
||||
# continue
|
||||
|
||||
# if best_df is not None:
|
||||
# print(f"[INFO] Sheet terpilih: '{best_sheet}' dengan skor {best_score:.2f}")
|
||||
# df = best_df
|
||||
# else:
|
||||
# raise ValueError("Tidak ada sheet valid yang dapat dibaca.")
|
||||
|
||||
# for col in df.columns:
|
||||
# if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
|
||||
# df[col] = df[col].astype(str).str.replace(',', '', regex=False)
|
||||
# df[col] = pd.to_numeric(df[col], errors='ignore')
|
||||
|
||||
# else:
|
||||
# raise ValueError("Format file tidak dikenali (hanya .csv, .xlsx, .xls)")
|
||||
|
||||
# except Exception as e:
|
||||
# print(f"[WARN] Gagal membaca file ({e}), fallback ke default reader.")
|
||||
# df = pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
|
||||
|
||||
# df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]
|
||||
# df.columns = [str(c).strip() for c in df.columns]
|
||||
# df = df.dropna(how='all')
|
||||
|
||||
# return df
|
||||
|
||||
|
||||
|
||||
def read_csv(path: str, sheet: str = None):
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
df = pd.DataFrame() # Inisialisasi default
|
||||
|
||||
try:
|
||||
# --- BLOK PEMBACAAN FILE ---
|
||||
if ext in ['.csv']:
|
||||
header_line = detect_header_line(path)
|
||||
delimiter = detect_delimiter(path)
|
||||
|
|
@ -52,17 +138,19 @@ def read_csv(path: str, sheet: str = None):
|
|||
|
||||
elif ext in ['.xlsx', '.xls']:
|
||||
print(f"[INFO] Membaca file Excel: {os.path.basename(path)}")
|
||||
xls = pd.ExcelFile(path)
|
||||
xls = pd.ExcelFile(path, engine='openpyxl') # Pakai engine openpyxl
|
||||
print(f"[INFO] Ditemukan {len(xls.sheet_names)} sheet: {xls.sheet_names}")
|
||||
|
||||
if sheet:
|
||||
if sheet not in xls.sheet_names:
|
||||
raise ValueError(f"Sheet '{sheet}' tidak ditemukan dalam file {os.path.basename(path)}")
|
||||
raise ValueError(f"Sheet '{sheet}' tidak ditemukan.")
|
||||
print(f"[INFO] Membaca sheet yang ditentukan: '{sheet}'")
|
||||
df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str)
|
||||
# Tambahkan engine='openpyxl'
|
||||
df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str, engine='openpyxl')
|
||||
df = df.dropna(how='all').dropna(axis=1, how='all')
|
||||
|
||||
else:
|
||||
# Logika pencarian sheet terbaik (tidak berubah, hanya indentasi)
|
||||
print("[INFO] Tidak ada sheet yang ditentukan, mencari sheet paling relevan...")
|
||||
best_sheet = None
|
||||
best_score = -1
|
||||
|
|
@ -70,13 +158,12 @@ def read_csv(path: str, sheet: str = None):
|
|||
|
||||
for sheet_name in xls.sheet_names:
|
||||
try:
|
||||
temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str)
|
||||
temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str, engine='openpyxl')
|
||||
temp_df = temp_df.dropna(how='all').dropna(axis=1, how='all')
|
||||
|
||||
if len(temp_df) == 0 or len(temp_df.columns) < 2:
|
||||
continue
|
||||
|
||||
# hitung skor relevansi
|
||||
text_ratio = temp_df.applymap(lambda x: isinstance(x, str)).sum().sum() / (temp_df.size or 1)
|
||||
row_score = len(temp_df)
|
||||
score = (row_score * 0.7) + (text_ratio * 100)
|
||||
|
|
@ -85,7 +172,6 @@ def read_csv(path: str, sheet: str = None):
|
|||
best_score = score
|
||||
best_sheet = sheet_name
|
||||
best_df = temp_df
|
||||
|
||||
except Exception as e:
|
||||
print(f"[WARN] Gagal membaca sheet {sheet_name}: {e}")
|
||||
continue
|
||||
|
|
@ -96,21 +182,47 @@ def read_csv(path: str, sheet: str = None):
|
|||
else:
|
||||
raise ValueError("Tidak ada sheet valid yang dapat dibaca.")
|
||||
|
||||
for col in df.columns:
|
||||
if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
|
||||
df[col] = df[col].astype(str).str.replace(',', '', regex=False)
|
||||
df[col] = pd.to_numeric(df[col], errors='ignore')
|
||||
|
||||
else:
|
||||
raise ValueError("Format file tidak dikenali (hanya .csv, .xlsx, .xls)")
|
||||
|
||||
# --- BLOK PEMBERSIHAN (Dilakukan setelah file sukses terbaca) ---
|
||||
# Kita bungkus ini agar error konversi angka TIDAK menggagalkan pembacaan file
|
||||
if not df.empty:
|
||||
df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]
|
||||
df.columns = [str(c).strip() for c in df.columns]
|
||||
df = df.dropna(how='all')
|
||||
|
||||
# Konversi Angka yang Lebih Aman
|
||||
for col in df.columns:
|
||||
try:
|
||||
# Cek apakah kolom terlihat seperti angka
|
||||
if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
|
||||
# Bersihkan koma
|
||||
clean_col = df[col].astype(str).str.replace(',', '', regex=False)
|
||||
# Gunakan errors='coerce' agar jika ada error value (NaN/REF), dia jadi NaN, bukan crash
|
||||
df[col] = pd.to_numeric(clean_col, errors='coerce')
|
||||
except Exception as ex:
|
||||
# Jika konversi gagal, biarkan sebagai string/object dan lanjut ke kolom berikutnya
|
||||
print(f"[WARN] Gagal konversi numerik pada kolom '{col}': {ex}")
|
||||
pass
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"[WARN] Gagal membaca file ({e}), fallback ke default reader.")
|
||||
df = pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
|
||||
# --- ERROR HANDLING YANG BENAR ---
|
||||
print(f"[WARN] Gagal membaca file utama ({e}).")
|
||||
|
||||
df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]
|
||||
df.columns = [str(c).strip() for c in df.columns]
|
||||
df = df.dropna(how='all')
|
||||
# Hanya lakukan fallback CSV jika file aslinya MEMANG CSV (atau txt)
|
||||
# Jangan paksa baca .xlsx pakai read_csv
|
||||
if ext in ['.csv', '.txt']:
|
||||
print("[INFO] Mencoba fallback ke default CSV reader...")
|
||||
try:
|
||||
return pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
|
||||
except Exception as e2:
|
||||
print(f"[ERROR] Fallback CSV juga gagal: {e2}")
|
||||
|
||||
# Jika file Excel gagal dibaca, return DataFrame kosong atau raise error
|
||||
print("[ERROR] Tidak dapat memulihkan pembacaan file Excel.")
|
||||
return pd.DataFrame()
|
||||
|
||||
return df
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import re
|
||||
import pdfplumber
|
||||
import pandas as pd
|
||||
from services.upload_file.utils.pdf_cleaner import get_number_column_index, get_start_end_number, normalize_number_column, row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column
|
||||
from services.upload_file.readers.utils.pdf_cleaner import get_number_column_index, get_start_end_number, normalize_number_column, row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column
|
||||
from services.upload_file.upload_exceptions import PDFReadError
|
||||
from utils.logger_config import setup_logger
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
import json
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
import uuid
|
||||
import asyncpg
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
import numpy as np
|
||||
|
|
@ -12,13 +16,14 @@ from shapely.geometry.base import BaseGeometry
|
|||
from shapely.geometry import base as shapely_base
|
||||
from fastapi import Depends, File, Form, UploadFile, HTTPException
|
||||
from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main
|
||||
from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES, GEONETWORK_URL
|
||||
from core.config import DB_DSN, DB_HOST, DB_NAME, DB_PASS, DB_PORT, DB_USER, UPLOAD_FOLDER, MAX_FILE_MB, GEONETWORK_URL
|
||||
from services.upload_file.ai_generate import send_metadata
|
||||
from services.upload_file.readers.reader_csv import read_csv
|
||||
from services.upload_file.readers.reader_shp import read_shp
|
||||
from services.upload_file.readers.reader_gdb import read_gdb
|
||||
from services.upload_file.readers.reader_mpk import read_mpk
|
||||
from services.upload_file.readers.reader_pdf import convert_df, read_pdf
|
||||
from services.upload_file.utils.df_validation import process_dataframe_synchronous
|
||||
from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto
|
||||
from database.connection import engine, sync_engine
|
||||
from pydantic import BaseModel
|
||||
|
|
@ -71,155 +76,13 @@ def detect_zip_type(zip_path: str) -> str:
|
|||
|
||||
return "unknown"
|
||||
|
||||
# def detect_zip_type(zip_path: str) -> str:
|
||||
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
# files = zip_ref.namelist()
|
||||
|
||||
# # -------------------------------------------------------------
|
||||
# # 1) DETECT FileGDB
|
||||
# # -------------------------------------------------------------
|
||||
# is_gdb = (
|
||||
# any(".gdb/" in f.lower() for f in files)
|
||||
# or any(f.lower().endswith(ext) for ext in
|
||||
# [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files)
|
||||
# )
|
||||
|
||||
# if is_gdb:
|
||||
# print("\n[INFO] ZIP terdeteksi berisi FileGDB.")
|
||||
|
||||
# with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# # extract ZIP
|
||||
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
# zip_ref.extractall(temp_dir)
|
||||
|
||||
# # find folder *.gdb
|
||||
# gdb_path = None
|
||||
# for root, dirs, _ in os.walk(temp_dir):
|
||||
# for d in dirs:
|
||||
# if d.lower().endswith(".gdb"):
|
||||
# gdb_path = os.path.join(root, d)
|
||||
# break
|
||||
|
||||
# if not gdb_path:
|
||||
# print("[ERROR] Folder .gdb tidak ditemukan.")
|
||||
# return "gdb"
|
||||
|
||||
# print(f"[INFO] GDB Path: {gdb_path}")
|
||||
|
||||
# # Cari seluruh file .gdbtable
|
||||
# table_files = [
|
||||
# os.path.join(gdb_path, f)
|
||||
# for f in os.listdir(gdb_path)
|
||||
# if f.lower().endswith(".gdbtable")
|
||||
# ]
|
||||
|
||||
# if not table_files:
|
||||
# print("[ERROR] Tidak ada file .gdbtable ditemukan.")
|
||||
# return "gdb"
|
||||
|
||||
# # Scan semua table file untuk mencari SpatialReference
|
||||
# found_crs = False
|
||||
|
||||
# for table_file in table_files:
|
||||
# try:
|
||||
# with open(table_file, "rb") as f:
|
||||
# raw = f.read(15000) # baca awal file, cukup untuk header JSON
|
||||
|
||||
# text = raw.decode("utf-8", errors="ignore")
|
||||
|
||||
# start = text.find("{")
|
||||
# end = text.rfind("}") + 1
|
||||
|
||||
# if start == -1 or end == -1:
|
||||
# continue
|
||||
|
||||
# json_str = text[start:end]
|
||||
# meta = json.loads(json_str)
|
||||
|
||||
# spatial_ref = meta.get("SpatialReference")
|
||||
# if not spatial_ref:
|
||||
# continue
|
||||
|
||||
# wkt = spatial_ref.get("WKT")
|
||||
# if not wkt:
|
||||
# continue
|
||||
|
||||
# print(f"[FOUND] CRS metadata pada: {os.path.basename(table_file)}")
|
||||
# print(f"[CRS WKT] {wkt[:200]}...")
|
||||
|
||||
# # Convert to EPSG
|
||||
# try:
|
||||
# epsg = CRS.from_wkt(wkt).to_epsg()
|
||||
# print(f"[EPSG] {epsg}")
|
||||
# except:
|
||||
# print("[EPSG] Tidak ditemukan EPSG.")
|
||||
|
||||
# found_crs = True
|
||||
# break
|
||||
|
||||
# except Exception:
|
||||
# continue
|
||||
|
||||
# if not found_crs:
|
||||
# print("[WARNING] Tidak ditemukan CRS di file .gdbtable manapun.")
|
||||
|
||||
# return "gdb"
|
||||
|
||||
# # -----------------------------------------------------
|
||||
# # 2. DETEKSI SHP
|
||||
# # -----------------------------------------------------
|
||||
# if any(f.lower().endswith(".shp") for f in files):
|
||||
# print("\n[INFO] ZIP terdeteksi berisi SHP.")
|
||||
|
||||
# # cari file .prj
|
||||
# prj_files = [f for f in files if f.lower().endswith(".prj")]
|
||||
|
||||
# if not prj_files:
|
||||
# print("[WARNING] Tidak ada file .prj → CRS tidak diketahui.")
|
||||
# return "shp"
|
||||
|
||||
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
# with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# prj_path = os.path.join(temp_dir, os.path.basename(prj_files[0]))
|
||||
# zip_ref.extract(prj_files[0], temp_dir)
|
||||
|
||||
# # baca isi prj
|
||||
# with open(prj_path, "r") as f:
|
||||
# prj_text = f.read()
|
||||
|
||||
# try:
|
||||
# crs = CRS.from_wkt(prj_text)
|
||||
# print(f"[CRS WKT] {crs.to_wkt()[:200]}...")
|
||||
|
||||
# epsg = crs.to_epsg()
|
||||
# if epsg:
|
||||
# print(f"[EPSG] {epsg}")
|
||||
# else:
|
||||
# print("[EPSG] Tidak ditemukan dalam database EPSG.")
|
||||
|
||||
# except Exception as e:
|
||||
# print("[ERROR] Gagal membaca CRS dari file PRJ:", e)
|
||||
|
||||
# return "shp"
|
||||
|
||||
# # -----------------------------------------------------
|
||||
# # 3. UNKNOWN
|
||||
# # -----------------------------------------------------
|
||||
# return "unknown"
|
||||
|
||||
|
||||
def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
|
||||
async def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
|
||||
result = detect_and_build_geometry(df, master_polygons=None)
|
||||
|
||||
if not hasattr(result, "geometry") or result.geometry.isna().all():
|
||||
result = attach_polygon_geometry_auto(result)
|
||||
|
||||
# if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
|
||||
# geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
|
||||
# if not result.empty else "None"
|
||||
|
||||
# null_geom = result.geometry.isna().sum()
|
||||
|
||||
def normalize_geom_type(geom_type):
|
||||
if geom_type.startswith("Multi"):
|
||||
return geom_type.replace("Multi", "")
|
||||
|
|
@ -295,18 +158,12 @@ def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
|
|||
"tipe_data_spasial": geom_type,
|
||||
"deskripsi_singkat": fileDesc,
|
||||
"struktur_atribut_data": {},
|
||||
# "metadata": {
|
||||
# "judul": "",
|
||||
# "abstrak": "",
|
||||
# "tujuan": "",
|
||||
# "keyword": [],
|
||||
# "kategori": [],
|
||||
# "kategori_mapset": ""
|
||||
# }
|
||||
}
|
||||
ai_suggest = send_metadata(ai_context)
|
||||
# ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
|
||||
# print(ai_suggest)
|
||||
|
||||
tmp_file = generate_unique_filename()
|
||||
await asyncio.to_thread(process_dataframe_synchronous, result, tmp_file)
|
||||
|
||||
response = {
|
||||
"message": "File berhasil dibaca dan dianalisis.",
|
||||
|
|
@ -321,7 +178,8 @@ def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
|
|||
"warnings": warnings,
|
||||
"warning_rows": warning_safe,
|
||||
"preview": preview_safe,
|
||||
"metadata_suggest": ai_suggest
|
||||
"metadata_suggest": ai_suggest,
|
||||
"tmp_path": tmp_file
|
||||
}
|
||||
|
||||
# return successRes(content=response)
|
||||
|
|
@ -393,7 +251,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
|
|||
if df is None or (hasattr(df, "empty") and df.empty):
|
||||
return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
|
||||
|
||||
res = process_data(df, ext, fname, fileDesc)
|
||||
res = await process_data(df, ext, fname, fileDesc)
|
||||
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
||||
|
|
@ -427,7 +285,7 @@ async def handle_process_pdf(payload: PdfRequest):
|
|||
if df is None or (hasattr(df, "empty") and df.empty):
|
||||
return errorRes(message="Tidak ada tabel")
|
||||
|
||||
res = process_data(df, '.pdf', payload.fileName, payload.fileDesc)
|
||||
res = await process_data(df, '.pdf', payload.fileName, payload.fileDesc)
|
||||
return successRes(data=res)
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -447,6 +305,7 @@ async def handle_process_pdf(payload: PdfRequest):
|
|||
|
||||
class UploadRequest(BaseModel):
|
||||
title: str
|
||||
path: str
|
||||
rows: List[dict]
|
||||
columns: List[str]
|
||||
author: Dict[str, Any]
|
||||
|
|
@ -483,6 +342,14 @@ def str_to_date(raw_date: str):
|
|||
|
||||
|
||||
|
||||
def generate_unique_filename(folder="tmp", ext="parquet", digits=6):
|
||||
os.makedirs(folder, exist_ok=True)
|
||||
while True:
|
||||
file_id = file_id = uuid.uuid4().int
|
||||
filename = f"{folder}/{file_id}.{ext}"
|
||||
|
||||
if not os.path.exists(filename):
|
||||
return filename
|
||||
|
||||
def generate_job_id(user_id: str) -> str:
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
|
|
@ -502,9 +369,135 @@ def save_xml_to_sld(xml_string, filename):
|
|||
return file_path
|
||||
|
||||
|
||||
async def process_parquet_upload(filename: str, table_name: str):
|
||||
from main import db_pool
|
||||
file_path = os.path.join("tmp", filename)
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
print(f"File {file_path} tidak ditemukan")
|
||||
return
|
||||
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
df = await loop.run_in_executor(None, pd.read_parquet, file_path)
|
||||
|
||||
# =====================================================================
|
||||
# 1. CLEANING NAMA KOLOM (PENTING!)
|
||||
# =====================================================================
|
||||
df.columns = [str(col).strip().upper() for col in df.columns]
|
||||
|
||||
# Cek kolom GEOM (bisa GEOM atau geom setelah upper)
|
||||
# Kita standarkan nama kolom GEOM di DF menjadi "GEOM" untuk memudahkan logic
|
||||
if "GEOM" in df.columns:
|
||||
df.rename(columns={"GEOM": "GEOM"}, inplace=True)
|
||||
|
||||
if "GEOM" not in df.columns:
|
||||
raise Exception("Kolom GEOM tidak ditemukan")
|
||||
|
||||
# =====================================================================
|
||||
# 2. PERSIAPAN DATA (Row Processing)
|
||||
# =====================================================================
|
||||
clean_rows = []
|
||||
geom_types = set()
|
||||
|
||||
# Ambil semua kolom atribut selain GEOM
|
||||
# Pastikan list ini yang dipakai untuk CREATE TABLE dan COPY (SINKRON)
|
||||
attr_columns = [col for col in df.columns if col != "GEOM"]
|
||||
|
||||
for row in df.itertuples(index=False):
|
||||
# --- Handle GEOM ---
|
||||
raw_geom = getattr(row, "GEOM", None)
|
||||
if not raw_geom: continue
|
||||
|
||||
try:
|
||||
geom = None
|
||||
if isinstance(raw_geom, str):
|
||||
geom = wkt.loads(raw_geom)
|
||||
elif isinstance(raw_geom, bytes):
|
||||
from shapely import wkb
|
||||
geom = wkb.loads(raw_geom)
|
||||
|
||||
if not geom: continue
|
||||
if not geom.is_valid: geom = geom.buffer(0)
|
||||
|
||||
gtype = geom.geom_type.upper()
|
||||
if gtype == "POLYGON": geom = MultiPolygon([geom])
|
||||
elif gtype == "LINESTRING": geom = MultiLineString([geom])
|
||||
|
||||
geom_types.add(geom.geom_type)
|
||||
ewkt = f"SRID=4326;{geom.wkt}"
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# --- Handle Attributes (FORCE STRING) ---
|
||||
row_data = []
|
||||
for col in attr_columns:
|
||||
# getattr menggunakan nama kolom uppercase dari attr_columns
|
||||
val = getattr(row, col, None)
|
||||
if val is not None:
|
||||
row_data.append(str(val)) # Convert int/float ke string
|
||||
else:
|
||||
row_data.append(None)
|
||||
|
||||
row_data.append(ewkt)
|
||||
clean_rows.append(tuple(row_data))
|
||||
|
||||
if not clean_rows:
|
||||
raise Exception("Data valid kosong")
|
||||
|
||||
# =====================================================================
|
||||
# 3. DATABASE OPERATIONS
|
||||
# =====================================================================
|
||||
final_geom_type = list(geom_types)[0].upper() if geom_types else "GEOM"
|
||||
if "MULTI" not in final_geom_type and final_geom_type != "GEOM":
|
||||
final_geom_type = "MULTI" + final_geom_type
|
||||
|
||||
# A. BUILD DDL (CREATE TABLE)
|
||||
# Kita pakai f-string quotes f'"{col}"' agar di DB jadi UPPERCASE ("ID", "NAMA")
|
||||
col_defs = [f'"{col}" TEXT' for col in attr_columns]
|
||||
|
||||
create_sql = f"""
|
||||
CREATE TABLE {table_name} (
|
||||
_id SERIAL PRIMARY KEY, -- lowercase default
|
||||
{', '.join(col_defs)}, -- UPPERCASE (Hasil loop attr_columns)
|
||||
geom TEXT -- lowercase
|
||||
);
|
||||
"""
|
||||
|
||||
async with db_pool.acquire() as conn:
|
||||
# Drop table jika ada (untuk safety dev, production hati-hati)
|
||||
# await conn.execute(f"DROP TABLE IF EXISTS {table_name}")
|
||||
|
||||
# 1. Create Table
|
||||
await conn.execute(create_sql)
|
||||
|
||||
# 2. COPY Data
|
||||
# target_cols harus PERSIS sama dengan attr_columns
|
||||
# asyncpg akan meng-quote string ini otomatis ("ID", "NAMA", "geom")
|
||||
target_cols = attr_columns + ['geom']
|
||||
await conn.copy_records_to_table(
|
||||
table_name,
|
||||
records=clean_rows,
|
||||
columns=target_cols
|
||||
)
|
||||
|
||||
# 3. Alter ke GEOM 2D
|
||||
alter_sql = f"""
|
||||
ALTER TABLE {table_name}
|
||||
ALTER COLUMN geom TYPE geometry({final_geom_type}, 4326)
|
||||
USING ST_Force2D(geom::geometry)::geometry({final_geom_type}, 4326);
|
||||
|
||||
CREATE INDEX idx_{table_name}_geom ON {table_name} USING GIST (geom);
|
||||
"""
|
||||
await conn.execute(alter_sql)
|
||||
|
||||
print(f"Sukses upload {len(clean_rows)} baris ke {table_name}.")
|
||||
os.remove(file_path)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing parquet: {e}")
|
||||
# Log error
|
||||
|
||||
|
||||
async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
||||
|
|
@ -580,26 +573,10 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
|||
raise HTTPException(400, f"CRS {detected_crs} tidak valid")
|
||||
|
||||
# =====================================================================
|
||||
# 7. SIMPAN KE POSTGIS (synchronous)
|
||||
# 7. SIMPAN KE POSTGIS
|
||||
# =====================================================================
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
lambda: gdf.to_postgis(
|
||||
table_name,
|
||||
sync_engine,
|
||||
if_exists="replace",
|
||||
index=False
|
||||
)
|
||||
)
|
||||
|
||||
# =====================================================================
|
||||
# 8. ADD PRIMARY KEY (wajib untuk QGIS API)
|
||||
# =====================================================================
|
||||
async with engine.begin() as conn:
|
||||
await conn.execute(text(
|
||||
f'ALTER TABLE "{table_name}" ADD COLUMN _ID SERIAL PRIMARY KEY;'
|
||||
))
|
||||
job_id = generate_job_id(str(user_id))
|
||||
await process_parquet_upload(payload.path, table_name)
|
||||
|
||||
# =====================================================================
|
||||
# 9. SIMPAN METADATA (geom_type, author metadata)
|
||||
|
|
@ -646,7 +623,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
|||
"dataset_title": payload.title,
|
||||
"dataset_abstract": author.get("abstract"),
|
||||
"keywords": author.get("keywords"),
|
||||
# "topic_category": author.get("topicCategory"),
|
||||
"topic_category": ", ".join(author.get("topicCategory")),
|
||||
"date_created": str_to_date(author.get("dateCreated")),
|
||||
"dataset_status": author.get("status"),
|
||||
|
|
@ -660,7 +636,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
|||
"geometry_count": row_count
|
||||
})
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# 10. LOGGING
|
||||
# =====================================================================
|
||||
|
|
@ -671,7 +646,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
|||
details={"table_name": table_name, "rows": len(gdf)}
|
||||
)
|
||||
|
||||
job_id = generate_job_id(str(user_id))
|
||||
result = {
|
||||
"job_id": job_id,
|
||||
"job_status": "wait",
|
||||
|
|
@ -685,9 +659,6 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
|||
}
|
||||
save_xml_to_sld(payload.style, job_id)
|
||||
|
||||
# await report_progress(job_id, "upload", 20, "Upload selesai")
|
||||
# cleansing_data(table_name, job_id)
|
||||
|
||||
cleansing = await query_cleansing_data(table_name)
|
||||
result['job_status'] = cleansing
|
||||
|
||||
|
|
@ -698,13 +669,13 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
|||
"name": payload.title,
|
||||
"description": author.get("abstract"),
|
||||
"scale": "1:25000",
|
||||
"projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
|
||||
'projection_system_id': '0196c746-d1ba-7f1c-9706-5df738679cc7',
|
||||
"category_id": author.get("mapsetCategory"),
|
||||
"data_status": "sementara",
|
||||
"classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
|
||||
"producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
|
||||
'classification_id': '01968b4b-d3f9-76c9-888c-ee887ac31ce4',
|
||||
'producer_id': '01968b54-0000-7a67-bd10-975b8923b93e',
|
||||
"layer_type": unified_geom_type[0],
|
||||
"source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
|
||||
'source_id': ['019c03ef-35e1-738b-858d-871dc7d1e4d6'],
|
||||
"layer_url": publish['geos_link'],
|
||||
"metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}",
|
||||
"coverage_level": "provinsi",
|
||||
|
|
@ -713,12 +684,11 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
|||
"data_version": "2026",
|
||||
"is_popular": False,
|
||||
"is_active": True,
|
||||
"regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
|
||||
'regional_id': '01968b53-a910-7a67-bd10-975b8923b92e',
|
||||
"notes": "Mapset baru dibuat",
|
||||
"status_validation": "on_verification",
|
||||
}
|
||||
|
||||
print("mapset data",mapset)
|
||||
await upload_to_main(mapset)
|
||||
|
||||
return successRes(data=result)
|
||||
|
|
@ -731,265 +701,3 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
|||
details={"error": str(e)}
|
||||
)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
|
||||
|
||||
# async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
||||
# try:
|
||||
# job_id = generate_job_id(str(user_id))
|
||||
# result = {
|
||||
# "job_id": job_id,
|
||||
# "job_status": "done",
|
||||
# "table_name": "just for test",
|
||||
# "status": "success",
|
||||
# "message": f"Tabel test berhasil dibuat.",
|
||||
# "total_rows": 10,
|
||||
# "geometry_type": "Polygon",
|
||||
# "crs": "EPSG 4326",
|
||||
# "metadata_uuid": "-"
|
||||
# }
|
||||
|
||||
# mapset = {
|
||||
# "name": "Resiko Letusan Gunung Arjuno",
|
||||
# "description": "Testing Automation Upload",
|
||||
# "scale": "1:25000",
|
||||
# "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
|
||||
# "category_id": "0196c80c-855f-77f9-abd0-0c8a30b8c2f5",
|
||||
# "data_status": "sementara",
|
||||
# "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
|
||||
# "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
|
||||
# "layer_type": "polygon",
|
||||
# "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
|
||||
# "layer_url": "http://192.168.60.24:8888/geoserver/wms?service=WMS&version=1.1.0&request=GetMap&layers=labai:risiko_letusan_gunung_arjuno_bromo&bbox=110.89528623700005,-8.780412043999945,116.26994997700001,-5.042971664999925&width=768&height=534&srs=EPSG:4326&styles=&format=application/openlayers",
|
||||
# "metadata_url": "http://192.168.60.24:7777/geonetwork/srv/eng/catalog.search#/metadata/9e5e2f09-13ef-49b5-bb49-1cb12136f63b",
|
||||
# "coverage_level": "provinsi",
|
||||
# "coverage_area": "kabupaten",
|
||||
# "data_update_period": "Tahunan",
|
||||
# "data_version": "2026",
|
||||
# "is_popular": False,
|
||||
# "is_active": True,
|
||||
# "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
|
||||
# "notes": "Mapset baru dibuat",
|
||||
# "status_validation": "on_verification",
|
||||
# }
|
||||
|
||||
# await upload_to_main(mapset)
|
||||
|
||||
# return successRes(data=result)
|
||||
|
||||
# except Exception as e:
|
||||
# print("errot", e)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# ===================================
|
||||
# partition +VIEW
|
||||
# ===================================
|
||||
|
||||
|
||||
# Daftar prefix WKT yang valid
|
||||
# VALID_WKT_PREFIXES = ("POINT", "LINESTRING", "POLYGON", "MULTIPOLYGON", "MULTILINESTRING")
|
||||
|
||||
|
||||
def slugify(value: str) -> str:
|
||||
"""Mengubah judul dataset jadi nama aman untuk VIEW"""
|
||||
return re.sub(r'[^a-zA-Z0-9]+', '_', value.lower()).strip('_')
|
||||
|
||||
|
||||
|
||||
# Partition + VIEW
|
||||
# async def create_dataset_view_from_metadata(conn, metadata_id: int, user_id: int, title: str):
|
||||
# norm_title = slugify(title)
|
||||
# view_name = f"v_user_{user_id}_{norm_title}"
|
||||
# base_table = f"test_partition_user_{user_id}"
|
||||
|
||||
# # Ambil daftar field
|
||||
# result = await conn.execute(text("SELECT fields FROM dataset_metadata WHERE id=:mid"), {"mid": metadata_id})
|
||||
# fields_json = result.scalar_one_or_none()
|
||||
|
||||
# base_columns = {"id", "user_id", "metadata_id", "geom"}
|
||||
# columns_sql = ""
|
||||
# field_list = []
|
||||
|
||||
# if fields_json:
|
||||
# fields = json.loads(fields_json) if isinstance(fields_json, str) else fields_json
|
||||
# field_list = fields
|
||||
|
||||
# for f in field_list:
|
||||
# safe_col = slugify(f)
|
||||
# alias_name = safe_col if safe_col not in base_columns else f"attr_{safe_col}"
|
||||
|
||||
# # CAST otomatis
|
||||
# if safe_col in ["longitude", "latitude", "lon", "lat"]:
|
||||
# columns_sql += f", (p.attributes->>'{f}')::float AS {alias_name}"
|
||||
# else:
|
||||
# columns_sql += f", p.attributes->>'{f}' AS {alias_name}"
|
||||
|
||||
# # Drop view lama
|
||||
# await conn.execute(text(f"DROP VIEW IF EXISTS {view_name} CASCADE;"))
|
||||
|
||||
# # 🔥 Buat VIEW baru yang punya FID unik
|
||||
# create_view_query = f"""
|
||||
# CREATE OR REPLACE VIEW {view_name} AS
|
||||
# SELECT
|
||||
# row_number() OVER() AS fid, -- FID unik untuk QGIS
|
||||
# p.id,
|
||||
# p.user_id,
|
||||
# p.metadata_id,
|
||||
# p.geom
|
||||
# {columns_sql},
|
||||
# m.title,
|
||||
# m.year,
|
||||
# m.description
|
||||
# FROM {base_table} p
|
||||
# JOIN dataset_metadata m ON m.id = p.metadata_id
|
||||
# WHERE p.metadata_id = {metadata_id};
|
||||
# """
|
||||
# await conn.execute(text(create_view_query))
|
||||
|
||||
# # Register geometry untuk QGIS
|
||||
# await conn.execute(text(f"DELETE FROM geometry_columns WHERE f_table_name = '{view_name}';"))
|
||||
# await conn.execute(text(f"""
|
||||
# INSERT INTO geometry_columns
|
||||
# (f_table_schema, f_table_name, f_geometry_column, coord_dimension, srid, type)
|
||||
# VALUES ('public', '{view_name}', 'geom', 2, 4326, 'GEOMETRY');
|
||||
# """))
|
||||
|
||||
# print(f"[INFO] VIEW {view_name} dibuat dengan FID unik dan kompatibel dengan QGIS.")
|
||||
|
||||
|
||||
# async def handle_to_postgis(payload, engine, user_id: int = 3):
|
||||
# """
|
||||
# Menangani upload data spasial ke PostGIS (dengan partition per user).
|
||||
# - Jika partisi belum ada, akan dibuat otomatis
|
||||
# - Metadata dataset disimpan di tabel dataset_metadata
|
||||
# - Data spasial dimasukkan ke tabel partisi (test_partition_user_{id})
|
||||
# - VIEW otomatis dibuat untuk QGIS
|
||||
# """
|
||||
|
||||
# try:
|
||||
# df = pd.DataFrame(payload.rows)
|
||||
# print(f"[INFO] Diterima {len(df)} baris data dari frontend.")
|
||||
|
||||
# # --- Validasi kolom geometry ---
|
||||
# if "geometry" not in df.columns:
|
||||
# raise errorRes(status_code=400, message="Kolom 'geometry' tidak ditemukan dalam data.")
|
||||
|
||||
# # --- Parsing geometry ke objek shapely ---
|
||||
# df["geometry"] = df["geometry"].apply(
|
||||
# lambda g: wkt.loads(g)
|
||||
# if isinstance(g, str) and g.strip().upper().startswith(VALID_WKT_PREFIXES)
|
||||
# else None
|
||||
# )
|
||||
|
||||
# # --- Buat GeoDataFrame ---
|
||||
# gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
|
||||
|
||||
# # --- Metadata info dari payload ---
|
||||
# # dataset_title = getattr(payload, "dataset_title", None)
|
||||
# # dataset_year = getattr(payload, "dataset_year", None)
|
||||
# # dataset_desc = getattr(payload, "dataset_description", None)
|
||||
# dataset_title = "hujan 2045"
|
||||
# dataset_year = 2045
|
||||
# dataset_desc = "test metadata"
|
||||
|
||||
# if not dataset_title:
|
||||
# raise errorRes(status_code=400, detail="Field 'dataset_title' wajib ada untuk metadata.")
|
||||
|
||||
# async with engine.begin() as conn:
|
||||
# fields = [col for col in df.columns if col != "geometry"]
|
||||
# # 💾 1️⃣ Simpan Metadata Dataset
|
||||
# print("[INFO] Menyimpan metadata dataset...")
|
||||
# result = await conn.execute(
|
||||
# text("""
|
||||
# INSERT INTO dataset_metadata (user_id, title, year, description, fields, created_at)
|
||||
# VALUES (:user_id, :title, :year, :desc, :fields, :created_at)
|
||||
# RETURNING id;
|
||||
# """),
|
||||
# {
|
||||
# "user_id": user_id,
|
||||
# "title": dataset_title,
|
||||
# "year": dataset_year,
|
||||
# "desc": dataset_desc,
|
||||
# "fields": json.dumps(fields),
|
||||
# "created_at": datetime.utcnow(),
|
||||
# },
|
||||
# )
|
||||
# metadata_id = result.scalar_one()
|
||||
# print(f"[INFO] Metadata disimpan dengan ID {metadata_id}")
|
||||
|
||||
# # ⚙️ 2️⃣ Auto-create Partisi Jika Belum Ada
|
||||
# print(f"[INFO] Memastikan partisi test_partition_user_{user_id} tersedia...")
|
||||
# await conn.execute(
|
||||
# text(f"""
|
||||
# DO $$
|
||||
# BEGIN
|
||||
# IF NOT EXISTS (
|
||||
# SELECT 1 FROM pg_tables WHERE tablename = 'test_partition_user_{user_id}'
|
||||
# ) THEN
|
||||
# EXECUTE format('
|
||||
# CREATE TABLE test_partition_user_%s
|
||||
# PARTITION OF test_partition
|
||||
# FOR VALUES IN (%s);
|
||||
# ', {user_id}, {user_id});
|
||||
# EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_geom ON test_partition_user_%s USING GIST (geom);', {user_id}, {user_id});
|
||||
# EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_metadata ON test_partition_user_%s (metadata_id);', {user_id}, {user_id});
|
||||
# END IF;
|
||||
# END
|
||||
# $$;
|
||||
# """)
|
||||
# )
|
||||
|
||||
# # 🧩 3️⃣ Insert Data Spasial ke Partisi
|
||||
# print(f"[INFO] Memasukkan data ke test_partition_user_{user_id} ...")
|
||||
# insert_count = 0
|
||||
# for _, row in gdf.iterrows():
|
||||
# geom_wkt = row["geometry"].wkt if row["geometry"] is not None else None
|
||||
# attributes = row.drop(labels=["geometry"]).to_dict()
|
||||
|
||||
# await conn.execute(
|
||||
# text("""
|
||||
# INSERT INTO test_partition (user_id, metadata_id, geom, attributes, created_at)
|
||||
# VALUES (:user_id, :metadata_id, ST_Force2D(ST_GeomFromText(:geom, 4326)),
|
||||
# CAST(:attr AS jsonb), :created_at);
|
||||
# """),
|
||||
# {
|
||||
# "user_id": user_id,
|
||||
# "metadata_id": metadata_id,
|
||||
# "geom": geom_wkt,
|
||||
# "attr": json.dumps(attributes),
|
||||
# "created_at": datetime.utcnow(),
|
||||
# },
|
||||
# )
|
||||
# insert_count += 1
|
||||
|
||||
# # 🧩 4️⃣ Membuat VIEW untuk dataset baru di QGIS
|
||||
# await create_dataset_view_from_metadata(conn, metadata_id, user_id, dataset_title)
|
||||
|
||||
# print(f"[INFO] ✅ Berhasil memasukkan {insert_count} baris ke partisi user_id={user_id} (metadata_id={metadata_id}).")
|
||||
|
||||
# return {
|
||||
# "status": "success",
|
||||
# "user_id": user_id,
|
||||
# "metadata_id": metadata_id,
|
||||
# "dataset_title": dataset_title,
|
||||
# "inserted_rows": insert_count,
|
||||
# "geometry_type": list(gdf.geom_type.unique()),
|
||||
# }
|
||||
|
||||
# except Exception as e:
|
||||
# print(f"[ERROR] Gagal upload ke PostGIS partition: {e}")
|
||||
# raise errorRes(status_code=500, message="Gagal upload ke PostGIS partition", details=str(e))
|
||||
|
||||
|
|
|
|||
843
services/upload_file/upload_new.py
Normal file
843
services/upload_file/upload_new.py
Normal file
|
|
@ -0,0 +1,843 @@
|
|||
import json
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
import uuid
|
||||
import asyncpg
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
import numpy as np
|
||||
import re
|
||||
import zipfile
|
||||
import tempfile
|
||||
import asyncio
|
||||
from pyproj import CRS
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
from shapely.geometry import base as shapely_base
|
||||
from fastapi import Depends, File, Form, UploadFile, HTTPException
|
||||
from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main
|
||||
from core.config import DB_DSN, DB_HOST, DB_NAME, DB_PASS, DB_PORT, DB_USER, UPLOAD_FOLDER, MAX_FILE_MB, GEONETWORK_URL
|
||||
from services.upload_file.ai_generate import send_metadata
|
||||
from services.upload_file.readers.reader_csv import read_csv
|
||||
from services.upload_file.readers.reader_shp import read_shp
|
||||
from services.upload_file.readers.reader_gdb import read_gdb
|
||||
from services.upload_file.readers.reader_mpk import read_mpk
|
||||
from services.upload_file.readers.reader_pdf import convert_df, read_pdf
|
||||
from services.upload_file.utils.df_validation import process_dataframe_synchronous
|
||||
from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto
|
||||
from database.connection import engine, sync_engine
|
||||
from pydantic import BaseModel
|
||||
from typing import Any, Dict, List, Optional
|
||||
from shapely import MultiLineString, MultiPolygon, wkt
|
||||
from sqlalchemy import text
|
||||
from datetime import datetime
|
||||
from response import successRes, errorRes
|
||||
from utils.logger_config import log_activity
|
||||
# Base.metadata.create_all(bind=engine)
|
||||
|
||||
|
||||
def is_geom_empty(g):
|
||||
if g is None:
|
||||
return True
|
||||
if isinstance(g, float) and pd.isna(g):
|
||||
return True
|
||||
if isinstance(g, BaseGeometry):
|
||||
return g.is_empty
|
||||
return False
|
||||
|
||||
|
||||
def safe_json(value):
|
||||
"""Konversi aman untuk semua tipe numpy/pandas/shapely ke tipe JSON-serializable"""
|
||||
if isinstance(value, (np.int64, np.int32)):
|
||||
return int(value)
|
||||
if isinstance(value, (np.float64, np.float32)):
|
||||
return float(value)
|
||||
if isinstance(value, pd.Timestamp):
|
||||
return value.isoformat()
|
||||
if isinstance(value, shapely_base.BaseGeometry):
|
||||
return str(value) # convert to WKT string
|
||||
if pd.isna(value):
|
||||
return None
|
||||
return value
|
||||
|
||||
|
||||
def detect_zip_type(zip_path: str) -> str:
|
||||
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
files = zip_ref.namelist()
|
||||
|
||||
if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files):
|
||||
return "gdb"
|
||||
|
||||
if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files):
|
||||
return "gdb"
|
||||
|
||||
if any(f.lower().endswith(".shp") for f in files):
|
||||
return "shp"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
async def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
|
||||
result = detect_and_build_geometry(df, master_polygons=None)
|
||||
|
||||
if not hasattr(result, "geometry") or result.geometry.isna().all():
|
||||
result = attach_polygon_geometry_auto(result)
|
||||
|
||||
# if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
|
||||
# geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
|
||||
# if not result.empty else "None"
|
||||
|
||||
# null_geom = result.geometry.isna().sum()
|
||||
|
||||
def normalize_geom_type(geom_type):
|
||||
if geom_type.startswith("Multi"):
|
||||
return geom_type.replace("Multi", "")
|
||||
return geom_type
|
||||
|
||||
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
|
||||
geom_types = (
|
||||
result.geometry
|
||||
.dropna()
|
||||
.geom_type
|
||||
.apply(normalize_geom_type)
|
||||
.unique()
|
||||
)
|
||||
|
||||
geom_type = geom_types[0] if len(geom_types) > 0 else "None"
|
||||
null_geom = result.geometry.isna().sum()
|
||||
|
||||
print(f"[INFO] Tipe Geometry: {geom_type}")
|
||||
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
|
||||
else:
|
||||
res = {
|
||||
"message": "Tidak menemukan tabel yang relevan.",
|
||||
"file_type": ext,
|
||||
"rows": 0,
|
||||
"columns": 0,
|
||||
"geometry_valid": 0,
|
||||
"geometry_empty": 0,
|
||||
"geometry_valid_percent": 0,
|
||||
"warnings": [],
|
||||
"warning_examples": [],
|
||||
"preview": []
|
||||
}
|
||||
|
||||
return errorRes(message="Tidak berhasil mencocokan geometry pada tabel." ,details=res, status_code=422)
|
||||
|
||||
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
|
||||
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
|
||||
result['geometry'] = result['geometry'].apply(
|
||||
lambda g: g.wkt if g is not None else None
|
||||
)
|
||||
|
||||
empty_count = result['geometry'].apply(is_geom_empty).sum()
|
||||
valid_count = len(result) - empty_count
|
||||
match_percentage = (valid_count / len(result)) * 100
|
||||
|
||||
warnings = []
|
||||
if empty_count > 0:
|
||||
warnings.append(
|
||||
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
|
||||
f"({100 - match_percentage:.2f}% data gagal cocok)."
|
||||
)
|
||||
|
||||
if empty_count > 0:
|
||||
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
|
||||
warning_examples = examples.to_dict(orient="records")
|
||||
else:
|
||||
warning_examples = []
|
||||
|
||||
# preview_data = result.head(15).to_dict(orient="records")
|
||||
preview_data = result.to_dict(orient="records")
|
||||
|
||||
preview_safe = [
|
||||
{k: safe_json(v) for k, v in row.items()} for row in preview_data
|
||||
]
|
||||
|
||||
warning_safe = [
|
||||
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
|
||||
]
|
||||
|
||||
ai_context = {
|
||||
"nama_file_peta": filename,
|
||||
"nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim",
|
||||
"tipe_data_spasial": geom_type,
|
||||
"deskripsi_singkat": fileDesc,
|
||||
"struktur_atribut_data": {},
|
||||
# "metadata": {
|
||||
# "judul": "",
|
||||
# "abstrak": "",
|
||||
# "tujuan": "",
|
||||
# "keyword": [],
|
||||
# "kategori": [],
|
||||
# "kategori_mapset": ""
|
||||
# }
|
||||
}
|
||||
ai_suggest = send_metadata(ai_context)
|
||||
# ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
|
||||
# print(ai_suggest)
|
||||
|
||||
tmp_file = generate_unique_filename()
|
||||
# tmp_file = f"tmp/{filename}.parquet"
|
||||
|
||||
# export_df = result.copy()
|
||||
# export_df["geom"] = export_df["geometry"].apply(wkt.loads)
|
||||
# export_df = export_df.drop(columns=["geometry"])
|
||||
# export_df = export_df.set_geometry("geom")
|
||||
# export_df = export_df.set_crs("EPSG:4326")
|
||||
# export_df = export_df.rename(
|
||||
# columns=lambda c: c.upper() if c != "geom" else c
|
||||
# )
|
||||
# await asyncio.to_thread(export_df.to_parquet, tmp_file)
|
||||
await asyncio.to_thread(process_dataframe_synchronous, result, tmp_file)
|
||||
|
||||
response = {
|
||||
"message": "File berhasil dibaca dan dianalisis.",
|
||||
"file_name": filename,
|
||||
"file_type": ext,
|
||||
"rows": int(len(result)),
|
||||
"columns": list(map(str, result.columns)),
|
||||
"geometry_valid": int(valid_count),
|
||||
"geometry_empty": int(empty_count),
|
||||
"geometry_valid_percent": float(round(match_percentage, 2)),
|
||||
"geometry_type": geom_type,
|
||||
"warnings": warnings,
|
||||
"warning_rows": warning_safe,
|
||||
"preview": preview_safe,
|
||||
"metadata_suggest": ai_suggest,
|
||||
"tmp_path": tmp_file
|
||||
}
|
||||
|
||||
# return successRes(content=response)
|
||||
return response
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form(""), fileDesc: Optional[str] = Form("")):
|
||||
fname = file.filename
|
||||
ext = os.path.splitext(fname)[1].lower()
|
||||
contents = await file.read()
|
||||
size_mb = len(contents) / (1024*1024)
|
||||
if size_mb > MAX_FILE_MB:
|
||||
raise errorRes(status_code=413, message="Ukuran File Terlalu Besar")
|
||||
tmp_path = UPLOAD_FOLDER / fname
|
||||
with open(tmp_path, "wb") as f:
|
||||
f.write(contents)
|
||||
try:
|
||||
df = None
|
||||
print('ext', ext)
|
||||
|
||||
if ext == ".csv":
|
||||
df = read_csv(str(tmp_path))
|
||||
elif ext == ".xlsx":
|
||||
df = read_csv(str(tmp_path), sheet)
|
||||
elif ext == ".mpk":
|
||||
df = read_mpk(str(tmp_path))
|
||||
elif ext == ".pdf":
|
||||
tbl = read_pdf(tmp_path, page)
|
||||
if len(tbl) == 0:
|
||||
res = {
|
||||
"message": "Tidak ditemukan tabel valid pada halaman yang dipilih",
|
||||
"tables": {},
|
||||
"file_type": ext
|
||||
}
|
||||
return successRes(message="Tidak ditemukan tabel valid pada halaman yang dipilih", data=res)
|
||||
elif len(tbl) > 1:
|
||||
res = {
|
||||
"message": "File berhasil dibaca dan dianalisis.",
|
||||
"tables": tbl,
|
||||
"file_type": ext
|
||||
}
|
||||
return successRes(data=res, message="File berhasil dibaca dan dianalisis.")
|
||||
else:
|
||||
df = convert_df(tbl[0])
|
||||
elif ext == ".zip":
|
||||
zip_type = detect_zip_type(str(tmp_path))
|
||||
|
||||
if zip_type == "shp":
|
||||
print("[INFO] ZIP terdeteksi sebagai Shapefile.")
|
||||
df = read_shp(str(tmp_path))
|
||||
|
||||
elif zip_type == "gdb":
|
||||
print("[INFO] ZIP terdeteksi sebagai Geodatabase (GDB).")
|
||||
df = read_gdb(str(tmp_path))
|
||||
|
||||
else:
|
||||
return successRes(message="ZIP file tidak mengandung SHP / GDB valid.")
|
||||
else:
|
||||
raise errorRes(status_code=400, message="Unsupported file type")
|
||||
|
||||
if df is None or (hasattr(df, "empty") and df.empty):
|
||||
return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
|
||||
|
||||
res = await process_data(df, ext, fname, fileDesc)
|
||||
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
||||
return successRes(data=res)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] {e}")
|
||||
return errorRes(
|
||||
message="Internal Server Error",
|
||||
details=str(e),
|
||||
status_code=500
|
||||
)
|
||||
|
||||
# finally:
|
||||
# db_session.close()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class PdfRequest(BaseModel):
|
||||
title: str
|
||||
columns: List[str]
|
||||
rows: List[List]
|
||||
fileName: str
|
||||
fileDesc: str
|
||||
|
||||
async def handle_process_pdf(payload: PdfRequest):
|
||||
try:
|
||||
df = convert_df(payload.model_dump())
|
||||
if df is None or (hasattr(df, "empty") and df.empty):
|
||||
return errorRes(message="Tidak ada tabel")
|
||||
|
||||
res = await process_data(df, '.pdf', payload.fileName, payload.fileDesc)
|
||||
return successRes(data=res)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] {e}")
|
||||
|
||||
return errorRes(message="Internal Server Error", details= str(e), status_code=500)
|
||||
|
||||
# finally:
|
||||
# db_session.close()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class UploadRequest(BaseModel):
|
||||
title: str
|
||||
path: str
|
||||
rows: List[dict]
|
||||
columns: List[str]
|
||||
author: Dict[str, Any]
|
||||
style: str
|
||||
|
||||
# generate _2 if exist
|
||||
async def generate_unique_table_name(base_name: str):
|
||||
base_name = base_name.lower().replace(" ", "_").replace("-", "_")
|
||||
table_name = base_name
|
||||
counter = 2
|
||||
|
||||
async with engine.connect() as conn:
|
||||
while True:
|
||||
result = await conn.execute(
|
||||
text("SELECT to_regclass(:tname)"),
|
||||
{"tname": table_name}
|
||||
)
|
||||
exists = result.scalar()
|
||||
|
||||
if not exists:
|
||||
return table_name
|
||||
|
||||
table_name = f"{base_name}_{counter}"
|
||||
counter += 1
|
||||
|
||||
def str_to_date(raw_date: str):
|
||||
if raw_date:
|
||||
try:
|
||||
return datetime.strptime(raw_date, "%Y-%m-%d").date()
|
||||
except Exception as e:
|
||||
print("[WARNING] Tidak bisa parse dateCreated:", e)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
def generate_unique_filename(folder="tmp", ext="parquet", digits=6):
|
||||
os.makedirs(folder, exist_ok=True)
|
||||
while True:
|
||||
file_id = file_id = uuid.uuid4().int
|
||||
filename = f"{folder}/{file_id}.{ext}"
|
||||
|
||||
if not os.path.exists(filename):
|
||||
return filename
|
||||
|
||||
def generate_job_id(user_id: str) -> str:
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
return f"{user_id}_{timestamp}"
|
||||
|
||||
|
||||
|
||||
def save_xml_to_sld(xml_string, filename):
|
||||
folder_path = 'style_temp'
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
|
||||
file_path = os.path.join(folder_path, f"{filename}.sld")
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(xml_string)
|
||||
|
||||
return file_path
|
||||
|
||||
|
||||
async def process_parquet_upload(filename: str, table_name: str):
|
||||
from main import db_pool
|
||||
file_path = os.path.join("tmp", filename)
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
print(f"File {file_path} tidak ditemukan")
|
||||
return
|
||||
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
df = await loop.run_in_executor(None, pd.read_parquet, file_path)
|
||||
|
||||
# =====================================================================
|
||||
# 1. CLEANING NAMA KOLOM (PENTING!)
|
||||
# =====================================================================
|
||||
df.columns = [str(col).strip().upper() for col in df.columns]
|
||||
|
||||
# Cek kolom GEOM (bisa GEOM atau geom setelah upper)
|
||||
# Kita standarkan nama kolom GEOM di DF menjadi "GEOM" untuk memudahkan logic
|
||||
if "GEOM" in df.columns:
|
||||
df.rename(columns={"GEOM": "GEOM"}, inplace=True)
|
||||
|
||||
if "GEOM" not in df.columns:
|
||||
raise Exception("Kolom GEOM tidak ditemukan")
|
||||
|
||||
# =====================================================================
|
||||
# 2. PERSIAPAN DATA (Row Processing)
|
||||
# =====================================================================
|
||||
clean_rows = []
|
||||
geom_types = set()
|
||||
|
||||
# Ambil semua kolom atribut selain GEOM
|
||||
# Pastikan list ini yang dipakai untuk CREATE TABLE dan COPY (SINKRON)
|
||||
attr_columns = [col for col in df.columns if col != "GEOM"]
|
||||
|
||||
for row in df.itertuples(index=False):
|
||||
# --- Handle GEOM ---
|
||||
raw_geom = getattr(row, "GEOM", None)
|
||||
if not raw_geom: continue
|
||||
|
||||
try:
|
||||
geom = None
|
||||
if isinstance(raw_geom, str):
|
||||
geom = wkt.loads(raw_geom)
|
||||
elif isinstance(raw_geom, bytes):
|
||||
from shapely import wkb
|
||||
geom = wkb.loads(raw_geom)
|
||||
|
||||
if not geom: continue
|
||||
if not geom.is_valid: geom = geom.buffer(0)
|
||||
|
||||
gtype = geom.geom_type.upper()
|
||||
if gtype == "POLYGON": geom = MultiPolygon([geom])
|
||||
elif gtype == "LINESTRING": geom = MultiLineString([geom])
|
||||
|
||||
geom_types.add(geom.geom_type)
|
||||
ewkt = f"SRID=4326;{geom.wkt}"
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# --- Handle Attributes (FORCE STRING) ---
|
||||
row_data = []
|
||||
for col in attr_columns:
|
||||
# getattr menggunakan nama kolom uppercase dari attr_columns
|
||||
val = getattr(row, col, None)
|
||||
if val is not None:
|
||||
row_data.append(str(val)) # Convert int/float ke string
|
||||
else:
|
||||
row_data.append(None)
|
||||
|
||||
row_data.append(ewkt)
|
||||
clean_rows.append(tuple(row_data))
|
||||
|
||||
if not clean_rows:
|
||||
raise Exception("Data valid kosong")
|
||||
|
||||
# =====================================================================
|
||||
# 3. DATABASE OPERATIONS
|
||||
# =====================================================================
|
||||
final_geom_type = list(geom_types)[0].upper() if geom_types else "GEOM"
|
||||
if "MULTI" not in final_geom_type and final_geom_type != "GEOM":
|
||||
final_geom_type = "MULTI" + final_geom_type
|
||||
|
||||
# A. BUILD DDL (CREATE TABLE)
|
||||
# Kita pakai f-string quotes f'"{col}"' agar di DB jadi UPPERCASE ("ID", "NAMA")
|
||||
col_defs = [f'"{col}" TEXT' for col in attr_columns]
|
||||
|
||||
create_sql = f"""
|
||||
CREATE TABLE {table_name} (
|
||||
_id SERIAL PRIMARY KEY, -- lowercase default
|
||||
{', '.join(col_defs)}, -- UPPERCASE (Hasil loop attr_columns)
|
||||
geom TEXT -- lowercase
|
||||
);
|
||||
"""
|
||||
|
||||
async with db_pool.acquire() as conn:
|
||||
# Drop table jika ada (untuk safety dev, production hati-hati)
|
||||
# await conn.execute(f"DROP TABLE IF EXISTS {table_name}")
|
||||
|
||||
# 1. Create Table
|
||||
await conn.execute(create_sql)
|
||||
|
||||
# 2. COPY Data
|
||||
# target_cols harus PERSIS sama dengan attr_columns
|
||||
# asyncpg akan meng-quote string ini otomatis ("ID", "NAMA", "geom")
|
||||
target_cols = attr_columns + ['geom']
|
||||
await conn.copy_records_to_table(
|
||||
table_name,
|
||||
records=clean_rows,
|
||||
columns=target_cols
|
||||
)
|
||||
|
||||
# 3. Alter ke GEOM 2D
|
||||
alter_sql = f"""
|
||||
ALTER TABLE {table_name}
|
||||
ALTER COLUMN geom TYPE geometry({final_geom_type}, 4326)
|
||||
USING ST_Force2D(geom::geometry)::geometry({final_geom_type}, 4326);
|
||||
|
||||
CREATE INDEX idx_{table_name}_geom ON {table_name} USING GIST (geom);
|
||||
"""
|
||||
await conn.execute(alter_sql)
|
||||
|
||||
print(f"Sukses upload {len(clean_rows)} baris ke {table_name}.")
|
||||
os.remove(file_path)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing parquet: {e}")
|
||||
# Log error
|
||||
|
||||
|
||||
async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
||||
try:
|
||||
table_name = await generate_unique_table_name(payload.title)
|
||||
# DataFrame
|
||||
df = pd.DataFrame(payload.rows)
|
||||
df.columns = [col.upper() for col in df.columns]
|
||||
if "GEOMETRY" not in df.columns:
|
||||
raise HTTPException(400, "Kolom GEOMETRY tidak ditemukan")
|
||||
|
||||
# =====================================================================
|
||||
# 1. LOAD WKT → SHAPELY
|
||||
# =====================================================================
|
||||
def safe_load_wkt(g):
|
||||
if not isinstance(g, str):
|
||||
return None
|
||||
try:
|
||||
geom = wkt.loads(g)
|
||||
return geom
|
||||
except:
|
||||
return None
|
||||
|
||||
df["GEOMETRY"] = df["GEOMETRY"].apply(safe_load_wkt)
|
||||
df = df.rename(columns={"GEOMETRY": "geom"})
|
||||
|
||||
# =====================================================================
|
||||
# 2. DROP ROW geometry NULL
|
||||
# =====================================================================
|
||||
df = df[df["geom"].notnull()]
|
||||
if df.empty:
|
||||
raise HTTPException(400, "Semua geometry invalid atau NULL")
|
||||
|
||||
# =====================================================================
|
||||
# 3. VALIDATE geometry (very important)
|
||||
# =====================================================================
|
||||
df["geom"] = df["geom"].apply(lambda g: g if g.is_valid else g.buffer(0))
|
||||
|
||||
# =====================================================================
|
||||
# 4. SERAGAMKAN TIPE GEOMETRY (Polygon→MultiPolygon, Line→MultiLine)
|
||||
# =====================================================================
|
||||
def unify_geometry_type(g):
|
||||
gtype = g.geom_type.upper()
|
||||
if gtype == "POLYGON":
|
||||
return MultiPolygon([g])
|
||||
if gtype == "LINESTRING":
|
||||
return MultiLineString([g])
|
||||
return g # sudah MULTI atau POINT
|
||||
df["geom"] = df["geom"].apply(unify_geometry_type)
|
||||
|
||||
# =====================================================================
|
||||
# 5. DETEKSI CRS DARI METADATA / INPUT / DEFAULT
|
||||
# =====================================================================
|
||||
detected_crs = payload.author.get("crs")
|
||||
|
||||
detected = payload.author.get("crs")
|
||||
print('crs', detected)
|
||||
|
||||
if not detected_crs:
|
||||
detected_crs = "EPSG:4326"
|
||||
|
||||
detected_crs = 'EPSG:4326'
|
||||
# Buat GeoDataFrame
|
||||
gdf = gpd.GeoDataFrame(df, geometry="geom", crs=detected_crs)
|
||||
row_count = len(gdf)
|
||||
|
||||
# =====================================================================
|
||||
# 6. VERIFY CRS (SRID) VALID di PROJ / PostGIS
|
||||
# =====================================================================
|
||||
try:
|
||||
_ = gdf.to_crs(gdf.crs) # test CRS valid
|
||||
except:
|
||||
raise HTTPException(400, f"CRS {detected_crs} tidak valid")
|
||||
|
||||
# =====================================================================
|
||||
# 7. SIMPAN KE POSTGIS (synchronous)
|
||||
# =====================================================================
|
||||
# print("run")
|
||||
# # 1. Konfigurasi Database (Ambil dari Config/Env Variable)
|
||||
# # Jangan hardcode password di sini
|
||||
# db_host = DB_HOST
|
||||
# db_port = DB_PORT
|
||||
# db_name = DB_NAME
|
||||
# db_user = DB_USER
|
||||
# db_pass = DB_PASS
|
||||
|
||||
# # Connection string untuk OGR (tanpa password agar aman)
|
||||
# conn_str = f"PG:host={db_host} port={db_port} dbname={db_name} user={db_user}"
|
||||
|
||||
# # 2. Siapkan Environment Variable khusus untuk subprocess ini
|
||||
# # Password dimasukkan lewat ENV agar tidak muncul di process list server
|
||||
# env = os.environ.copy()
|
||||
# env["PGPASSWORD"] = db_pass
|
||||
|
||||
# temp_parquet = f"tmp/123123.parquet"
|
||||
|
||||
# # 3. Jalankan ogr2ogr secara Async
|
||||
# # Command: ogr2ogr -f PostgreSQL "PG:..." input.parquet -nln nama_tabel -overwrite
|
||||
# process = await asyncio.create_subprocess_exec(
|
||||
# "ogr2ogr",
|
||||
# "-f", "PostgreSQL",
|
||||
# conn_str,
|
||||
# temp_parquet,
|
||||
# "-nln", table_name,
|
||||
# "-overwrite",
|
||||
# "-nlt", "PROMOTE_TO_MULTI",
|
||||
# "-a_srs", "EPSG:4326",
|
||||
# "-lco", "GEOMETRY_NAME=geom", # Nama kolom di DB
|
||||
# "-lco", "FID=_id", # Nama Primary Key
|
||||
# "-lco", "SPATIAL_INDEX=YES",
|
||||
# "-lco", "LAUNDER=NO",
|
||||
# "--config", "PG_USE_COPY", "YES",
|
||||
# "-dim", "2",
|
||||
# stdout=asyncio.subprocess.PIPE,
|
||||
# stderr=asyncio.subprocess.PIPE,
|
||||
# env=env # Pass password lewat sini
|
||||
# )
|
||||
|
||||
# # 4. Tunggu proses selesai
|
||||
# stdout, stderr = await process.communicate()
|
||||
|
||||
# # 5. Cek apakah sukses
|
||||
# if process.returncode != 0:
|
||||
# error_msg = stderr.decode().strip()
|
||||
# print(f"OGR2OGR Error: {error_msg}")
|
||||
# raise HTTPException(500, detail=f"Gagal import ke DB: {error_msg}")
|
||||
|
||||
# if os.path.exists(temp_parquet):
|
||||
# os.remove(temp_parquet)
|
||||
|
||||
job_id = generate_job_id(str(user_id))
|
||||
await process_parquet_upload(payload.path, table_name)
|
||||
|
||||
# =====================================================================
|
||||
# 9. SIMPAN METADATA (geom_type, author metadata)
|
||||
# =====================================================================
|
||||
unified_geom_type = list(gdf.geom_type.unique())
|
||||
author = payload.author
|
||||
async with engine.begin() as conn:
|
||||
await conn.execute(text("""
|
||||
INSERT INTO backend.author_metadata (
|
||||
table_title,
|
||||
dataset_title,
|
||||
dataset_abstract,
|
||||
keywords,
|
||||
topic_category,
|
||||
date_created,
|
||||
dataset_status,
|
||||
organization_name,
|
||||
contact_person_name,
|
||||
contact_email,
|
||||
contact_phone,
|
||||
geom_type,
|
||||
user_id,
|
||||
process,
|
||||
geometry_count
|
||||
) VALUES (
|
||||
:table_title,
|
||||
:dataset_title,
|
||||
:dataset_abstract,
|
||||
:keywords,
|
||||
:topic_category,
|
||||
:date_created,
|
||||
:dataset_status,
|
||||
:organization_name,
|
||||
:contact_person_name,
|
||||
:contact_email,
|
||||
:contact_phone,
|
||||
:geom_type,
|
||||
:user_id,
|
||||
:process,
|
||||
:geometry_count
|
||||
)
|
||||
"""), {
|
||||
"table_title": table_name,
|
||||
"dataset_title": payload.title,
|
||||
"dataset_abstract": author.get("abstract"),
|
||||
"keywords": author.get("keywords"),
|
||||
"topic_category": ", ".join(author.get("topicCategory")),
|
||||
"date_created": str_to_date(author.get("dateCreated")),
|
||||
"dataset_status": author.get("status"),
|
||||
"organization_name": author.get("organization"),
|
||||
"contact_person_name": author.get("contactName"),
|
||||
"contact_email": author.get("contactEmail"),
|
||||
"contact_phone": author.get("contactPhone"),
|
||||
"geom_type": json.dumps(unified_geom_type),
|
||||
"user_id": user_id,
|
||||
"process": 'CLEANSING',
|
||||
"geometry_count": row_count
|
||||
})
|
||||
|
||||
# =====================================================================
|
||||
# 10. LOGGING
|
||||
# =====================================================================
|
||||
await log_activity(
|
||||
user_id=user_id,
|
||||
action_type="UPLOAD",
|
||||
action_title=f"Upload dataset {table_name}",
|
||||
details={"table_name": table_name, "rows": len(gdf)}
|
||||
)
|
||||
|
||||
# job_id = generate_job_id(str(user_id))
|
||||
result = {
|
||||
"job_id": job_id,
|
||||
"job_status": "wait",
|
||||
"table_name": table_name,
|
||||
"status": "success",
|
||||
"message": f"Tabel '{table_name}' berhasil dibuat.",
|
||||
"total_rows": len(gdf),
|
||||
"geometry_type": unified_geom_type,
|
||||
"crs": detected_crs,
|
||||
"metadata_uuid": ""
|
||||
}
|
||||
save_xml_to_sld(payload.style, job_id)
|
||||
|
||||
# await report_progress(job_id, "upload", 20, "Upload selesai")
|
||||
|
||||
# cleansing_data(table_name, job_id)
|
||||
cleansing = await query_cleansing_data(table_name)
|
||||
result['job_status'] = cleansing
|
||||
|
||||
publish = await publish_layer(table_name, job_id)
|
||||
result['metadata_uuid'] = publish['uuid']
|
||||
|
||||
mapset = {
|
||||
"name": payload.title,
|
||||
"description": author.get("abstract"),
|
||||
"scale": "1:25000",
|
||||
'projection_system_id': '0196c746-d1ba-7f1c-9706-5df738679cc7',
|
||||
"category_id": author.get("mapsetCategory"),
|
||||
"data_status": "sementara",
|
||||
'classification_id': '01968b4b-d3f9-76c9-888c-ee887ac31ce4',
|
||||
'producer_id': '01968b54-0000-7a67-bd10-975b8923b93e',
|
||||
"layer_type": unified_geom_type[0],
|
||||
'source_id': ['019c03ef-35e1-738b-858d-871dc7d1e4d6'],
|
||||
"layer_url": publish['geos_link'],
|
||||
"metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}",
|
||||
"coverage_level": "provinsi",
|
||||
"coverage_area": "kabupaten",
|
||||
"data_update_period": "Tahunan",
|
||||
"data_version": "2026",
|
||||
"is_popular": False,
|
||||
"is_active": True,
|
||||
'regional_id': '01968b53-a910-7a67-bd10-975b8923b92e',
|
||||
"notes": "Mapset baru dibuat",
|
||||
"status_validation": "on_verification",
|
||||
}
|
||||
|
||||
print("mapset data",mapset)
|
||||
await upload_to_main(mapset)
|
||||
|
||||
return successRes(data=result)
|
||||
|
||||
except Exception as e:
|
||||
await log_activity(
|
||||
user_id=user_id,
|
||||
action_type="ERROR",
|
||||
action_title="Upload gagal",
|
||||
details={"error": str(e)}
|
||||
)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
||||
# try:
|
||||
# os.remove(payload.path)
|
||||
# job_id = generate_job_id(str(user_id))
|
||||
# result = {
|
||||
# "job_id": job_id,
|
||||
# "job_status": "done",
|
||||
# "table_name": "just for test",
|
||||
# "status": "success",
|
||||
# "message": f"Tabel test berhasil dibuat.",
|
||||
# "total_rows": 10,
|
||||
# "geometry_type": "Polygon",
|
||||
# "crs": "EPSG 4326",
|
||||
# "metadata_uuid": "-"
|
||||
# }
|
||||
|
||||
# # mapset = {
|
||||
# # 'name': 'TEST Risiko Letusan Gunung Arjuno',
|
||||
# # 'description': 'Peta ini menampilkan area yang berpotensi mengalami letusan Gunung Arjuno dan Gunung Bromo di Provinsi Jawa Timur. Data disusun dalam bentuk poligon yang mengindikasikan tingkat risiko berdasarkan sejarah aktivitas dan karakteristik geologi di wilayah tersebut.',
|
||||
# # 'scale': '1:25000',
|
||||
# # 'projection_system_id': '0196c746-d1ba-7f1c-9706-5df738679cc7',
|
||||
# # 'category_id': '0196c80c-855f-77f9-abd0-0c8a30b8c2f5',
|
||||
# # 'data_status': 'sementara',
|
||||
# # 'classification_id': '01968b4b-d3f9-76c9-888c-ee887ac31ce4',
|
||||
# # 'producer_id': '01968b54-0000-7a67-bd10-975b8923b93e',
|
||||
# # 'layer_type': 'MultiPolygon',
|
||||
# # 'source_id': ['019c03ef-35e1-738b-858d-871dc7d1e4d6'],
|
||||
# # 'layer_url': 'http://192.168.60.24:8888/geoserver/labai/wms?service=WMS&version=1.1.0&request=GetMap&layers=labai:test_risiko_letusan_gunung_arjuno&styles=&bbox=110.89528623700005%2C-8.780412043999945%2C116.26994997700001%2C-5.042971664999925&width=768&height=384&srs=EPSG:4326&format=application/openlayers',
|
||||
# # 'metadata_url': 'http://192.168.60.24:7777/geonetwork/srv/eng/catalog.search#/metadata/123123',
|
||||
# # 'coverage_level': 'provinsi',
|
||||
# # 'coverage_area': 'kabupaten',
|
||||
# # 'data_update_period': 'Tahunan',
|
||||
# # 'data_version': '2026',
|
||||
# # 'is_popular': False,
|
||||
# # 'is_active': True,
|
||||
# # 'regional_id': '01968b53-a910-7a67-bd10-975b8923b92e',
|
||||
# # 'notes': 'Mapset baru dibuat',
|
||||
# # 'status_validation': 'on_verification'
|
||||
# # }
|
||||
|
||||
# # await upload_to_main(mapset)
|
||||
|
||||
# return successRes(data=result)
|
||||
|
||||
# except Exception as e:
|
||||
# print("errot", e)
|
||||
|
||||
|
||||
992
services/upload_file/upload_old.py
Normal file
992
services/upload_file/upload_old.py
Normal file
|
|
@ -0,0 +1,992 @@
|
|||
import json
|
||||
import os
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
import numpy as np
|
||||
import re
|
||||
import zipfile
|
||||
import tempfile
|
||||
import asyncio
|
||||
from pyproj import CRS
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
from shapely.geometry import base as shapely_base
|
||||
from fastapi import Depends, File, Form, UploadFile, HTTPException
|
||||
from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main
|
||||
from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES, GEONETWORK_URL
|
||||
from services.upload_file.ai_generate import send_metadata
|
||||
from services.upload_file.readers.reader_csv import read_csv
|
||||
from services.upload_file.readers.reader_shp import read_shp
|
||||
from services.upload_file.readers.reader_gdb import read_gdb
|
||||
from services.upload_file.readers.reader_mpk import read_mpk
|
||||
from services.upload_file.readers.reader_pdf import convert_df, read_pdf
|
||||
from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto
|
||||
from database.connection import engine, sync_engine
|
||||
from pydantic import BaseModel
|
||||
from typing import Any, Dict, List, Optional
|
||||
from shapely import MultiLineString, MultiPolygon, wkt
|
||||
from sqlalchemy import text
|
||||
from datetime import datetime
|
||||
from response import successRes, errorRes
|
||||
from utils.logger_config import log_activity
|
||||
# Base.metadata.create_all(bind=engine)
|
||||
|
||||
|
||||
def is_geom_empty(g):
|
||||
if g is None:
|
||||
return True
|
||||
if isinstance(g, float) and pd.isna(g):
|
||||
return True
|
||||
if isinstance(g, BaseGeometry):
|
||||
return g.is_empty
|
||||
return False
|
||||
|
||||
|
||||
def safe_json(value):
|
||||
"""Konversi aman untuk semua tipe numpy/pandas/shapely ke tipe JSON-serializable"""
|
||||
if isinstance(value, (np.int64, np.int32)):
|
||||
return int(value)
|
||||
if isinstance(value, (np.float64, np.float32)):
|
||||
return float(value)
|
||||
if isinstance(value, pd.Timestamp):
|
||||
return value.isoformat()
|
||||
if isinstance(value, shapely_base.BaseGeometry):
|
||||
return str(value) # convert to WKT string
|
||||
if pd.isna(value):
|
||||
return None
|
||||
return value
|
||||
|
||||
|
||||
def detect_zip_type(zip_path: str) -> str:
|
||||
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
files = zip_ref.namelist()
|
||||
|
||||
if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files):
|
||||
return "gdb"
|
||||
|
||||
if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files):
|
||||
return "gdb"
|
||||
|
||||
if any(f.lower().endswith(".shp") for f in files):
|
||||
return "shp"
|
||||
|
||||
return "unknown"
|
||||
|
||||
# def detect_zip_type(zip_path: str) -> str:
|
||||
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
# files = zip_ref.namelist()
|
||||
|
||||
# # -------------------------------------------------------------
|
||||
# # 1) DETECT FileGDB
|
||||
# # -------------------------------------------------------------
|
||||
# is_gdb = (
|
||||
# any(".gdb/" in f.lower() for f in files)
|
||||
# or any(f.lower().endswith(ext) for ext in
|
||||
# [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files)
|
||||
# )
|
||||
|
||||
# if is_gdb:
|
||||
# print("\n[INFO] ZIP terdeteksi berisi FileGDB.")
|
||||
|
||||
# with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# # extract ZIP
|
||||
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
# zip_ref.extractall(temp_dir)
|
||||
|
||||
# # find folder *.gdb
|
||||
# gdb_path = None
|
||||
# for root, dirs, _ in os.walk(temp_dir):
|
||||
# for d in dirs:
|
||||
# if d.lower().endswith(".gdb"):
|
||||
# gdb_path = os.path.join(root, d)
|
||||
# break
|
||||
|
||||
# if not gdb_path:
|
||||
# print("[ERROR] Folder .gdb tidak ditemukan.")
|
||||
# return "gdb"
|
||||
|
||||
# print(f"[INFO] GDB Path: {gdb_path}")
|
||||
|
||||
# # Cari seluruh file .gdbtable
|
||||
# table_files = [
|
||||
# os.path.join(gdb_path, f)
|
||||
# for f in os.listdir(gdb_path)
|
||||
# if f.lower().endswith(".gdbtable")
|
||||
# ]
|
||||
|
||||
# if not table_files:
|
||||
# print("[ERROR] Tidak ada file .gdbtable ditemukan.")
|
||||
# return "gdb"
|
||||
|
||||
# # Scan semua table file untuk mencari SpatialReference
|
||||
# found_crs = False
|
||||
|
||||
# for table_file in table_files:
|
||||
# try:
|
||||
# with open(table_file, "rb") as f:
|
||||
# raw = f.read(15000) # baca awal file, cukup untuk header JSON
|
||||
|
||||
# text = raw.decode("utf-8", errors="ignore")
|
||||
|
||||
# start = text.find("{")
|
||||
# end = text.rfind("}") + 1
|
||||
|
||||
# if start == -1 or end == -1:
|
||||
# continue
|
||||
|
||||
# json_str = text[start:end]
|
||||
# meta = json.loads(json_str)
|
||||
|
||||
# spatial_ref = meta.get("SpatialReference")
|
||||
# if not spatial_ref:
|
||||
# continue
|
||||
|
||||
# wkt = spatial_ref.get("WKT")
|
||||
# if not wkt:
|
||||
# continue
|
||||
|
||||
# print(f"[FOUND] CRS metadata pada: {os.path.basename(table_file)}")
|
||||
# print(f"[CRS WKT] {wkt[:200]}...")
|
||||
|
||||
# # Convert to EPSG
|
||||
# try:
|
||||
# epsg = CRS.from_wkt(wkt).to_epsg()
|
||||
# print(f"[EPSG] {epsg}")
|
||||
# except:
|
||||
# print("[EPSG] Tidak ditemukan EPSG.")
|
||||
|
||||
# found_crs = True
|
||||
# break
|
||||
|
||||
# except Exception:
|
||||
# continue
|
||||
|
||||
# if not found_crs:
|
||||
# print("[WARNING] Tidak ditemukan CRS di file .gdbtable manapun.")
|
||||
|
||||
# return "gdb"
|
||||
|
||||
# # -----------------------------------------------------
|
||||
# # 2. DETEKSI SHP
|
||||
# # -----------------------------------------------------
|
||||
# if any(f.lower().endswith(".shp") for f in files):
|
||||
# print("\n[INFO] ZIP terdeteksi berisi SHP.")
|
||||
|
||||
# # cari file .prj
|
||||
# prj_files = [f for f in files if f.lower().endswith(".prj")]
|
||||
|
||||
# if not prj_files:
|
||||
# print("[WARNING] Tidak ada file .prj → CRS tidak diketahui.")
|
||||
# return "shp"
|
||||
|
||||
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
# with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# prj_path = os.path.join(temp_dir, os.path.basename(prj_files[0]))
|
||||
# zip_ref.extract(prj_files[0], temp_dir)
|
||||
|
||||
# # baca isi prj
|
||||
# with open(prj_path, "r") as f:
|
||||
# prj_text = f.read()
|
||||
|
||||
# try:
|
||||
# crs = CRS.from_wkt(prj_text)
|
||||
# print(f"[CRS WKT] {crs.to_wkt()[:200]}...")
|
||||
|
||||
# epsg = crs.to_epsg()
|
||||
# if epsg:
|
||||
# print(f"[EPSG] {epsg}")
|
||||
# else:
|
||||
# print("[EPSG] Tidak ditemukan dalam database EPSG.")
|
||||
|
||||
# except Exception as e:
|
||||
# print("[ERROR] Gagal membaca CRS dari file PRJ:", e)
|
||||
|
||||
# return "shp"
|
||||
|
||||
# # -----------------------------------------------------
|
||||
# # 3. UNKNOWN
|
||||
# # -----------------------------------------------------
|
||||
# return "unknown"
|
||||
|
||||
|
||||
def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
|
||||
result = detect_and_build_geometry(df, master_polygons=None)
|
||||
|
||||
if not hasattr(result, "geometry") or result.geometry.isna().all():
|
||||
result = attach_polygon_geometry_auto(result)
|
||||
|
||||
# if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
|
||||
# geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
|
||||
# if not result.empty else "None"
|
||||
|
||||
# null_geom = result.geometry.isna().sum()
|
||||
|
||||
def normalize_geom_type(geom_type):
|
||||
if geom_type.startswith("Multi"):
|
||||
return geom_type.replace("Multi", "")
|
||||
return geom_type
|
||||
|
||||
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
|
||||
geom_types = (
|
||||
result.geometry
|
||||
.dropna()
|
||||
.geom_type
|
||||
.apply(normalize_geom_type)
|
||||
.unique()
|
||||
)
|
||||
|
||||
geom_type = geom_types[0] if len(geom_types) > 0 else "None"
|
||||
null_geom = result.geometry.isna().sum()
|
||||
|
||||
print(f"[INFO] Tipe Geometry: {geom_type}")
|
||||
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
|
||||
else:
|
||||
res = {
|
||||
"message": "Tidak menemukan tabel yang relevan.",
|
||||
"file_type": ext,
|
||||
"rows": 0,
|
||||
"columns": 0,
|
||||
"geometry_valid": 0,
|
||||
"geometry_empty": 0,
|
||||
"geometry_valid_percent": 0,
|
||||
"warnings": [],
|
||||
"warning_examples": [],
|
||||
"preview": []
|
||||
}
|
||||
|
||||
return errorRes(message="Tidak berhasil mencocokan geometry pada tabel." ,details=res, status_code=422)
|
||||
|
||||
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
|
||||
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
|
||||
result['geometry'] = result['geometry'].apply(
|
||||
lambda g: g.wkt if g is not None else None
|
||||
)
|
||||
|
||||
empty_count = result['geometry'].apply(is_geom_empty).sum()
|
||||
valid_count = len(result) - empty_count
|
||||
match_percentage = (valid_count / len(result)) * 100
|
||||
|
||||
warnings = []
|
||||
if empty_count > 0:
|
||||
warnings.append(
|
||||
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
|
||||
f"({100 - match_percentage:.2f}% data gagal cocok)."
|
||||
)
|
||||
|
||||
if empty_count > 0:
|
||||
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
|
||||
warning_examples = examples.to_dict(orient="records")
|
||||
else:
|
||||
warning_examples = []
|
||||
|
||||
# preview_data = result.head(15).to_dict(orient="records")
|
||||
preview_data = result.to_dict(orient="records")
|
||||
|
||||
preview_safe = [
|
||||
{k: safe_json(v) for k, v in row.items()} for row in preview_data
|
||||
]
|
||||
|
||||
warning_safe = [
|
||||
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
|
||||
]
|
||||
|
||||
ai_context = {
|
||||
"nama_file_peta": filename,
|
||||
"nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim",
|
||||
"tipe_data_spasial": geom_type,
|
||||
"deskripsi_singkat": fileDesc,
|
||||
"struktur_atribut_data": {},
|
||||
# "metadata": {
|
||||
# "judul": "",
|
||||
# "abstrak": "",
|
||||
# "tujuan": "",
|
||||
# "keyword": [],
|
||||
# "kategori": [],
|
||||
# "kategori_mapset": ""
|
||||
# }
|
||||
}
|
||||
ai_suggest = send_metadata(ai_context)
|
||||
# ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
|
||||
# print(ai_suggest)
|
||||
|
||||
response = {
|
||||
"message": "File berhasil dibaca dan dianalisis.",
|
||||
"file_name": filename,
|
||||
"file_type": ext,
|
||||
"rows": int(len(result)),
|
||||
"columns": list(map(str, result.columns)),
|
||||
"geometry_valid": int(valid_count),
|
||||
"geometry_empty": int(empty_count),
|
||||
"geometry_valid_percent": float(round(match_percentage, 2)),
|
||||
"geometry_type": geom_type,
|
||||
"warnings": warnings,
|
||||
"warning_rows": warning_safe,
|
||||
"preview": preview_safe,
|
||||
"metadata_suggest": ai_suggest
|
||||
}
|
||||
|
||||
# return successRes(content=response)
|
||||
return response
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form(""), fileDesc: Optional[str] = Form("")):
|
||||
fname = file.filename
|
||||
ext = os.path.splitext(fname)[1].lower()
|
||||
contents = await file.read()
|
||||
size_mb = len(contents) / (1024*1024)
|
||||
if size_mb > MAX_FILE_MB:
|
||||
raise errorRes(status_code=413, message="Ukuran File Terlalu Besar")
|
||||
tmp_path = UPLOAD_FOLDER / fname
|
||||
with open(tmp_path, "wb") as f:
|
||||
f.write(contents)
|
||||
try:
|
||||
df = None
|
||||
print('ext', ext)
|
||||
|
||||
if ext == ".csv":
|
||||
df = read_csv(str(tmp_path))
|
||||
elif ext == ".xlsx":
|
||||
df = read_csv(str(tmp_path), sheet)
|
||||
elif ext == ".mpk":
|
||||
df = read_mpk(str(tmp_path))
|
||||
elif ext == ".pdf":
|
||||
tbl = read_pdf(tmp_path, page)
|
||||
if len(tbl) == 0:
|
||||
res = {
|
||||
"message": "Tidak ditemukan tabel valid pada halaman yang dipilih",
|
||||
"tables": {},
|
||||
"file_type": ext
|
||||
}
|
||||
return successRes(message="Tidak ditemukan tabel valid pada halaman yang dipilih", data=res)
|
||||
elif len(tbl) > 1:
|
||||
res = {
|
||||
"message": "File berhasil dibaca dan dianalisis.",
|
||||
"tables": tbl,
|
||||
"file_type": ext
|
||||
}
|
||||
return successRes(data=res, message="File berhasil dibaca dan dianalisis.")
|
||||
else:
|
||||
df = convert_df(tbl[0])
|
||||
elif ext == ".zip":
|
||||
zip_type = detect_zip_type(str(tmp_path))
|
||||
|
||||
if zip_type == "shp":
|
||||
print("[INFO] ZIP terdeteksi sebagai Shapefile.")
|
||||
df = read_shp(str(tmp_path))
|
||||
|
||||
elif zip_type == "gdb":
|
||||
print("[INFO] ZIP terdeteksi sebagai Geodatabase (GDB).")
|
||||
df = read_gdb(str(tmp_path))
|
||||
|
||||
else:
|
||||
return successRes(message="ZIP file tidak mengandung SHP / GDB valid.")
|
||||
else:
|
||||
raise errorRes(status_code=400, message="Unsupported file type")
|
||||
|
||||
if df is None or (hasattr(df, "empty") and df.empty):
|
||||
return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
|
||||
|
||||
res = process_data(df, ext, fname, fileDesc)
|
||||
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
||||
return successRes(data=res)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] {e}")
|
||||
return errorRes(
|
||||
message="Internal Server Error",
|
||||
details=str(e),
|
||||
status_code=500
|
||||
)
|
||||
|
||||
# finally:
|
||||
# db_session.close()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class PdfRequest(BaseModel):
|
||||
title: str
|
||||
columns: List[str]
|
||||
rows: List[List]
|
||||
fileName: str
|
||||
fileDesc: str
|
||||
|
||||
async def handle_process_pdf(payload: PdfRequest):
|
||||
try:
|
||||
df = convert_df(payload.model_dump())
|
||||
if df is None or (hasattr(df, "empty") and df.empty):
|
||||
return errorRes(message="Tidak ada tabel")
|
||||
|
||||
res = process_data(df, '.pdf', payload.fileName, payload.fileDesc)
|
||||
return successRes(data=res)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] {e}")
|
||||
|
||||
return errorRes(message="Internal Server Error", details= str(e), status_code=500)
|
||||
|
||||
# finally:
|
||||
# db_session.close()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class UploadRequest(BaseModel):
|
||||
title: str
|
||||
rows: List[dict]
|
||||
columns: List[str]
|
||||
author: Dict[str, Any]
|
||||
style: str
|
||||
|
||||
# generate _2 if exist
|
||||
async def generate_unique_table_name(base_name: str):
|
||||
base_name = base_name.lower().replace(" ", "_").replace("-", "_")
|
||||
table_name = base_name
|
||||
counter = 2
|
||||
|
||||
async with engine.connect() as conn:
|
||||
while True:
|
||||
result = await conn.execute(
|
||||
text("SELECT to_regclass(:tname)"),
|
||||
{"tname": table_name}
|
||||
)
|
||||
exists = result.scalar()
|
||||
|
||||
if not exists:
|
||||
return table_name
|
||||
|
||||
table_name = f"{base_name}_{counter}"
|
||||
counter += 1
|
||||
|
||||
def str_to_date(raw_date: str):
|
||||
if raw_date:
|
||||
try:
|
||||
return datetime.strptime(raw_date, "%Y-%m-%d").date()
|
||||
except Exception as e:
|
||||
print("[WARNING] Tidak bisa parse dateCreated:", e)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def generate_job_id(user_id: str) -> str:
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
return f"{user_id}_{timestamp}"
|
||||
|
||||
|
||||
|
||||
def save_xml_to_sld(xml_string, filename):
|
||||
folder_path = 'style_temp'
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
|
||||
file_path = os.path.join(folder_path, f"{filename}.sld")
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(xml_string)
|
||||
|
||||
return file_path
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
||||
try:
|
||||
table_name = await generate_unique_table_name(payload.title)
|
||||
# DataFrame
|
||||
df = pd.DataFrame(payload.rows)
|
||||
df.columns = [col.upper() for col in df.columns]
|
||||
if "GEOMETRY" not in df.columns:
|
||||
raise HTTPException(400, "Kolom GEOMETRY tidak ditemukan")
|
||||
|
||||
# =====================================================================
|
||||
# 1. LOAD WKT → SHAPELY
|
||||
# =====================================================================
|
||||
def safe_load_wkt(g):
|
||||
if not isinstance(g, str):
|
||||
return None
|
||||
try:
|
||||
geom = wkt.loads(g)
|
||||
return geom
|
||||
except:
|
||||
return None
|
||||
|
||||
df["GEOMETRY"] = df["GEOMETRY"].apply(safe_load_wkt)
|
||||
df = df.rename(columns={"GEOMETRY": "geom"})
|
||||
|
||||
# =====================================================================
|
||||
# 2. DROP ROW geometry NULL
|
||||
# =====================================================================
|
||||
df = df[df["geom"].notnull()]
|
||||
if df.empty:
|
||||
raise HTTPException(400, "Semua geometry invalid atau NULL")
|
||||
|
||||
# =====================================================================
|
||||
# 3. VALIDATE geometry (very important)
|
||||
# =====================================================================
|
||||
df["geom"] = df["geom"].apply(lambda g: g if g.is_valid else g.buffer(0))
|
||||
|
||||
# =====================================================================
|
||||
# 4. SERAGAMKAN TIPE GEOMETRY (Polygon→MultiPolygon, Line→MultiLine)
|
||||
# =====================================================================
|
||||
def unify_geometry_type(g):
|
||||
gtype = g.geom_type.upper()
|
||||
if gtype == "POLYGON":
|
||||
return MultiPolygon([g])
|
||||
if gtype == "LINESTRING":
|
||||
return MultiLineString([g])
|
||||
return g # sudah MULTI atau POINT
|
||||
df["geom"] = df["geom"].apply(unify_geometry_type)
|
||||
|
||||
# =====================================================================
|
||||
# 5. DETEKSI CRS DARI METADATA / INPUT / DEFAULT
|
||||
# =====================================================================
|
||||
detected_crs = payload.author.get("crs")
|
||||
|
||||
detected = payload.author.get("crs")
|
||||
print('crs', detected)
|
||||
|
||||
if not detected_crs:
|
||||
detected_crs = "EPSG:4326"
|
||||
|
||||
detected_crs = 'EPSG:4326'
|
||||
# Buat GeoDataFrame
|
||||
gdf = gpd.GeoDataFrame(df, geometry="geom", crs=detected_crs)
|
||||
row_count = len(gdf)
|
||||
|
||||
# =====================================================================
|
||||
# 6. VERIFY CRS (SRID) VALID di PROJ / PostGIS
|
||||
# =====================================================================
|
||||
try:
|
||||
_ = gdf.to_crs(gdf.crs) # test CRS valid
|
||||
except:
|
||||
raise HTTPException(400, f"CRS {detected_crs} tidak valid")
|
||||
|
||||
# =====================================================================
|
||||
# 7. SIMPAN KE POSTGIS (synchronous)
|
||||
# =====================================================================
|
||||
await asyncio.to_thread(
|
||||
gdf.to_postgis,
|
||||
table_name,
|
||||
sync_engine,
|
||||
if_exists="replace",
|
||||
index=False
|
||||
)
|
||||
|
||||
# =====================================================================
|
||||
# 8. ADD PRIMARY KEY (wajib untuk QGIS API)
|
||||
# =====================================================================
|
||||
async with engine.begin() as conn:
|
||||
await conn.execute(text(
|
||||
f'ALTER TABLE "{table_name}" ADD COLUMN _ID SERIAL PRIMARY KEY;'
|
||||
))
|
||||
|
||||
# =====================================================================
|
||||
# 9. SIMPAN METADATA (geom_type, author metadata)
|
||||
# =====================================================================
|
||||
unified_geom_type = list(gdf.geom_type.unique())
|
||||
author = payload.author
|
||||
async with engine.begin() as conn:
|
||||
await conn.execute(text("""
|
||||
INSERT INTO backend.author_metadata (
|
||||
table_title,
|
||||
dataset_title,
|
||||
dataset_abstract,
|
||||
keywords,
|
||||
topic_category,
|
||||
date_created,
|
||||
dataset_status,
|
||||
organization_name,
|
||||
contact_person_name,
|
||||
contact_email,
|
||||
contact_phone,
|
||||
geom_type,
|
||||
user_id,
|
||||
process,
|
||||
geometry_count
|
||||
) VALUES (
|
||||
:table_title,
|
||||
:dataset_title,
|
||||
:dataset_abstract,
|
||||
:keywords,
|
||||
:topic_category,
|
||||
:date_created,
|
||||
:dataset_status,
|
||||
:organization_name,
|
||||
:contact_person_name,
|
||||
:contact_email,
|
||||
:contact_phone,
|
||||
:geom_type,
|
||||
:user_id,
|
||||
:process,
|
||||
:geometry_count
|
||||
)
|
||||
"""), {
|
||||
"table_title": table_name,
|
||||
"dataset_title": payload.title,
|
||||
"dataset_abstract": author.get("abstract"),
|
||||
"keywords": author.get("keywords"),
|
||||
# "topic_category": author.get("topicCategory"),
|
||||
"topic_category": ", ".join(author.get("topicCategory")),
|
||||
"date_created": str_to_date(author.get("dateCreated")),
|
||||
"dataset_status": author.get("status"),
|
||||
"organization_name": author.get("organization"),
|
||||
"contact_person_name": author.get("contactName"),
|
||||
"contact_email": author.get("contactEmail"),
|
||||
"contact_phone": author.get("contactPhone"),
|
||||
"geom_type": json.dumps(unified_geom_type),
|
||||
"user_id": user_id,
|
||||
"process": 'CLEANSING',
|
||||
"geometry_count": row_count
|
||||
})
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# 10. LOGGING
|
||||
# =====================================================================
|
||||
await log_activity(
|
||||
user_id=user_id,
|
||||
action_type="UPLOAD",
|
||||
action_title=f"Upload dataset {table_name}",
|
||||
details={"table_name": table_name, "rows": len(gdf)}
|
||||
)
|
||||
|
||||
job_id = generate_job_id(str(user_id))
|
||||
result = {
|
||||
"job_id": job_id,
|
||||
"job_status": "wait",
|
||||
"table_name": table_name,
|
||||
"status": "success",
|
||||
"message": f"Tabel '{table_name}' berhasil dibuat.",
|
||||
"total_rows": len(gdf),
|
||||
"geometry_type": unified_geom_type,
|
||||
"crs": detected_crs,
|
||||
"metadata_uuid": ""
|
||||
}
|
||||
save_xml_to_sld(payload.style, job_id)
|
||||
|
||||
# await report_progress(job_id, "upload", 20, "Upload selesai")
|
||||
# cleansing_data(table_name, job_id)
|
||||
|
||||
cleansing = await query_cleansing_data(table_name)
|
||||
result['job_status'] = cleansing
|
||||
|
||||
publish = await publish_layer(table_name, job_id)
|
||||
result['metadata_uuid'] = publish['uuid']
|
||||
|
||||
mapset = {
|
||||
"name": payload.title,
|
||||
"description": author.get("abstract"),
|
||||
"scale": "1:25000",
|
||||
"projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
|
||||
"category_id": author.get("mapsetCategory"),
|
||||
"data_status": "sementara",
|
||||
"classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
|
||||
"producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
|
||||
"layer_type": unified_geom_type[0],
|
||||
"source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
|
||||
"layer_url": publish['geos_link'],
|
||||
"metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}",
|
||||
"coverage_level": "provinsi",
|
||||
"coverage_area": "kabupaten",
|
||||
"data_update_period": "Tahunan",
|
||||
"data_version": "2026",
|
||||
"is_popular": False,
|
||||
"is_active": True,
|
||||
"regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
|
||||
"notes": "Mapset baru dibuat",
|
||||
"status_validation": "on_verification",
|
||||
}
|
||||
|
||||
print("mapset data",mapset)
|
||||
await upload_to_main(mapset)
|
||||
|
||||
return successRes(data=result)
|
||||
|
||||
except Exception as e:
|
||||
await log_activity(
|
||||
user_id=user_id,
|
||||
action_type="ERROR",
|
||||
action_title="Upload gagal",
|
||||
details={"error": str(e)}
|
||||
)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
|
||||
|
||||
# async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
|
||||
# try:
|
||||
# job_id = generate_job_id(str(user_id))
|
||||
# result = {
|
||||
# "job_id": job_id,
|
||||
# "job_status": "done",
|
||||
# "table_name": "just for test",
|
||||
# "status": "success",
|
||||
# "message": f"Tabel test berhasil dibuat.",
|
||||
# "total_rows": 10,
|
||||
# "geometry_type": "Polygon",
|
||||
# "crs": "EPSG 4326",
|
||||
# "metadata_uuid": "-"
|
||||
# }
|
||||
|
||||
# mapset = {
|
||||
# "name": "Resiko Letusan Gunung Arjuno",
|
||||
# "description": "Testing Automation Upload",
|
||||
# "scale": "1:25000",
|
||||
# "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
|
||||
# "category_id": "0196c80c-855f-77f9-abd0-0c8a30b8c2f5",
|
||||
# "data_status": "sementara",
|
||||
# "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
|
||||
# "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
|
||||
# "layer_type": "polygon",
|
||||
# "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
|
||||
# "layer_url": "http://192.168.60.24:8888/geoserver/wms?service=WMS&version=1.1.0&request=GetMap&layers=labai:risiko_letusan_gunung_arjuno_bromo&bbox=110.89528623700005,-8.780412043999945,116.26994997700001,-5.042971664999925&width=768&height=534&srs=EPSG:4326&styles=&format=application/openlayers",
|
||||
# "metadata_url": "http://192.168.60.24:7777/geonetwork/srv/eng/catalog.search#/metadata/9e5e2f09-13ef-49b5-bb49-1cb12136f63b",
|
||||
# "coverage_level": "provinsi",
|
||||
# "coverage_area": "kabupaten",
|
||||
# "data_update_period": "Tahunan",
|
||||
# "data_version": "2026",
|
||||
# "is_popular": False,
|
||||
# "is_active": True,
|
||||
# "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
|
||||
# "notes": "Mapset baru dibuat",
|
||||
# "status_validation": "on_verification",
|
||||
# }
|
||||
|
||||
# await upload_to_main(mapset)
|
||||
|
||||
# return successRes(data=result)
|
||||
|
||||
# except Exception as e:
|
||||
# print("errot", e)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# ===================================
|
||||
# partition +VIEW
|
||||
# ===================================
|
||||
|
||||
|
||||
# Daftar prefix WKT yang valid
|
||||
# VALID_WKT_PREFIXES = ("POINT", "LINESTRING", "POLYGON", "MULTIPOLYGON", "MULTILINESTRING")
|
||||
|
||||
|
||||
def slugify(value: str) -> str:
|
||||
"""Mengubah judul dataset jadi nama aman untuk VIEW"""
|
||||
return re.sub(r'[^a-zA-Z0-9]+', '_', value.lower()).strip('_')
|
||||
|
||||
|
||||
|
||||
# Partition + VIEW
|
||||
# async def create_dataset_view_from_metadata(conn, metadata_id: int, user_id: int, title: str):
|
||||
# norm_title = slugify(title)
|
||||
# view_name = f"v_user_{user_id}_{norm_title}"
|
||||
# base_table = f"test_partition_user_{user_id}"
|
||||
|
||||
# # Ambil daftar field
|
||||
# result = await conn.execute(text("SELECT fields FROM dataset_metadata WHERE id=:mid"), {"mid": metadata_id})
|
||||
# fields_json = result.scalar_one_or_none()
|
||||
|
||||
# base_columns = {"id", "user_id", "metadata_id", "geom"}
|
||||
# columns_sql = ""
|
||||
# field_list = []
|
||||
|
||||
# if fields_json:
|
||||
# fields = json.loads(fields_json) if isinstance(fields_json, str) else fields_json
|
||||
# field_list = fields
|
||||
|
||||
# for f in field_list:
|
||||
# safe_col = slugify(f)
|
||||
# alias_name = safe_col if safe_col not in base_columns else f"attr_{safe_col}"
|
||||
|
||||
# # CAST otomatis
|
||||
# if safe_col in ["longitude", "latitude", "lon", "lat"]:
|
||||
# columns_sql += f", (p.attributes->>'{f}')::float AS {alias_name}"
|
||||
# else:
|
||||
# columns_sql += f", p.attributes->>'{f}' AS {alias_name}"
|
||||
|
||||
# # Drop view lama
|
||||
# await conn.execute(text(f"DROP VIEW IF EXISTS {view_name} CASCADE;"))
|
||||
|
||||
# # 🔥 Buat VIEW baru yang punya FID unik
|
||||
# create_view_query = f"""
|
||||
# CREATE OR REPLACE VIEW {view_name} AS
|
||||
# SELECT
|
||||
# row_number() OVER() AS fid, -- FID unik untuk QGIS
|
||||
# p.id,
|
||||
# p.user_id,
|
||||
# p.metadata_id,
|
||||
# p.geom
|
||||
# {columns_sql},
|
||||
# m.title,
|
||||
# m.year,
|
||||
# m.description
|
||||
# FROM {base_table} p
|
||||
# JOIN dataset_metadata m ON m.id = p.metadata_id
|
||||
# WHERE p.metadata_id = {metadata_id};
|
||||
# """
|
||||
# await conn.execute(text(create_view_query))
|
||||
|
||||
# # Register geometry untuk QGIS
|
||||
# await conn.execute(text(f"DELETE FROM geometry_columns WHERE f_table_name = '{view_name}';"))
|
||||
# await conn.execute(text(f"""
|
||||
# INSERT INTO geometry_columns
|
||||
# (f_table_schema, f_table_name, f_geometry_column, coord_dimension, srid, type)
|
||||
# VALUES ('public', '{view_name}', 'geom', 2, 4326, 'GEOMETRY');
|
||||
# """))
|
||||
|
||||
# print(f"[INFO] VIEW {view_name} dibuat dengan FID unik dan kompatibel dengan QGIS.")
|
||||
|
||||
|
||||
# async def handle_to_postgis(payload, engine, user_id: int = 3):
|
||||
# """
|
||||
# Menangani upload data spasial ke PostGIS (dengan partition per user).
|
||||
# - Jika partisi belum ada, akan dibuat otomatis
|
||||
# - Metadata dataset disimpan di tabel dataset_metadata
|
||||
# - Data spasial dimasukkan ke tabel partisi (test_partition_user_{id})
|
||||
# - VIEW otomatis dibuat untuk QGIS
|
||||
# """
|
||||
|
||||
# try:
|
||||
# df = pd.DataFrame(payload.rows)
|
||||
# print(f"[INFO] Diterima {len(df)} baris data dari frontend.")
|
||||
|
||||
# # --- Validasi kolom geometry ---
|
||||
# if "geometry" not in df.columns:
|
||||
# raise errorRes(status_code=400, message="Kolom 'geometry' tidak ditemukan dalam data.")
|
||||
|
||||
# # --- Parsing geometry ke objek shapely ---
|
||||
# df["geometry"] = df["geometry"].apply(
|
||||
# lambda g: wkt.loads(g)
|
||||
# if isinstance(g, str) and g.strip().upper().startswith(VALID_WKT_PREFIXES)
|
||||
# else None
|
||||
# )
|
||||
|
||||
# # --- Buat GeoDataFrame ---
|
||||
# gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
|
||||
|
||||
# # --- Metadata info dari payload ---
|
||||
# # dataset_title = getattr(payload, "dataset_title", None)
|
||||
# # dataset_year = getattr(payload, "dataset_year", None)
|
||||
# # dataset_desc = getattr(payload, "dataset_description", None)
|
||||
# dataset_title = "hujan 2045"
|
||||
# dataset_year = 2045
|
||||
# dataset_desc = "test metadata"
|
||||
|
||||
# if not dataset_title:
|
||||
# raise errorRes(status_code=400, detail="Field 'dataset_title' wajib ada untuk metadata.")
|
||||
|
||||
# async with engine.begin() as conn:
|
||||
# fields = [col for col in df.columns if col != "geometry"]
|
||||
# # 💾 1️⃣ Simpan Metadata Dataset
|
||||
# print("[INFO] Menyimpan metadata dataset...")
|
||||
# result = await conn.execute(
|
||||
# text("""
|
||||
# INSERT INTO dataset_metadata (user_id, title, year, description, fields, created_at)
|
||||
# VALUES (:user_id, :title, :year, :desc, :fields, :created_at)
|
||||
# RETURNING id;
|
||||
# """),
|
||||
# {
|
||||
# "user_id": user_id,
|
||||
# "title": dataset_title,
|
||||
# "year": dataset_year,
|
||||
# "desc": dataset_desc,
|
||||
# "fields": json.dumps(fields),
|
||||
# "created_at": datetime.utcnow(),
|
||||
# },
|
||||
# )
|
||||
# metadata_id = result.scalar_one()
|
||||
# print(f"[INFO] Metadata disimpan dengan ID {metadata_id}")
|
||||
|
||||
# # ⚙️ 2️⃣ Auto-create Partisi Jika Belum Ada
|
||||
# print(f"[INFO] Memastikan partisi test_partition_user_{user_id} tersedia...")
|
||||
# await conn.execute(
|
||||
# text(f"""
|
||||
# DO $$
|
||||
# BEGIN
|
||||
# IF NOT EXISTS (
|
||||
# SELECT 1 FROM pg_tables WHERE tablename = 'test_partition_user_{user_id}'
|
||||
# ) THEN
|
||||
# EXECUTE format('
|
||||
# CREATE TABLE test_partition_user_%s
|
||||
# PARTITION OF test_partition
|
||||
# FOR VALUES IN (%s);
|
||||
# ', {user_id}, {user_id});
|
||||
# EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_geom ON test_partition_user_%s USING GIST (geom);', {user_id}, {user_id});
|
||||
# EXECUTE format('CREATE INDEX IF NOT EXISTS idx_partition_user_%s_metadata ON test_partition_user_%s (metadata_id);', {user_id}, {user_id});
|
||||
# END IF;
|
||||
# END
|
||||
# $$;
|
||||
# """)
|
||||
# )
|
||||
|
||||
# # 🧩 3️⃣ Insert Data Spasial ke Partisi
|
||||
# print(f"[INFO] Memasukkan data ke test_partition_user_{user_id} ...")
|
||||
# insert_count = 0
|
||||
# for _, row in gdf.iterrows():
|
||||
# geom_wkt = row["geometry"].wkt if row["geometry"] is not None else None
|
||||
# attributes = row.drop(labels=["geometry"]).to_dict()
|
||||
|
||||
# await conn.execute(
|
||||
# text("""
|
||||
# INSERT INTO test_partition (user_id, metadata_id, geom, attributes, created_at)
|
||||
# VALUES (:user_id, :metadata_id, ST_Force2D(ST_GeomFromText(:geom, 4326)),
|
||||
# CAST(:attr AS jsonb), :created_at);
|
||||
# """),
|
||||
# {
|
||||
# "user_id": user_id,
|
||||
# "metadata_id": metadata_id,
|
||||
# "geom": geom_wkt,
|
||||
# "attr": json.dumps(attributes),
|
||||
# "created_at": datetime.utcnow(),
|
||||
# },
|
||||
# )
|
||||
# insert_count += 1
|
||||
|
||||
# # 🧩 4️⃣ Membuat VIEW untuk dataset baru di QGIS
|
||||
# await create_dataset_view_from_metadata(conn, metadata_id, user_id, dataset_title)
|
||||
|
||||
# print(f"[INFO] ✅ Berhasil memasukkan {insert_count} baris ke partisi user_id={user_id} (metadata_id={metadata_id}).")
|
||||
|
||||
# return {
|
||||
# "status": "success",
|
||||
# "user_id": user_id,
|
||||
# "metadata_id": metadata_id,
|
||||
# "dataset_title": dataset_title,
|
||||
# "inserted_rows": insert_count,
|
||||
# "geometry_type": list(gdf.geom_type.unique()),
|
||||
# }
|
||||
|
||||
# except Exception as e:
|
||||
# print(f"[ERROR] Gagal upload ke PostGIS partition: {e}")
|
||||
# raise errorRes(status_code=500, message="Gagal upload ke PostGIS partition", details=str(e))
|
||||
|
||||
70
services/upload_file/utils/df_validation.py
Normal file
70
services/upload_file/utils/df_validation.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
from shapely import wkt
|
||||
from shapely.errors import WKTReadingError
|
||||
|
||||
def process_dataframe_synchronous(df_input, tmp_file):
|
||||
"""
|
||||
Fungsi ini berjalan di thread terpisah (CPU bound).
|
||||
Melakukan validasi, cleaning, dan export ke parquet.
|
||||
"""
|
||||
# 1. Copy agar tidak mengubah data asli
|
||||
export_df = df_input.copy()
|
||||
|
||||
# =========================================================================
|
||||
# TAHAP 1: SAFE WKT LOADING
|
||||
# =========================================================================
|
||||
def safe_load_wkt(raw):
|
||||
if not isinstance(raw, str):
|
||||
return None
|
||||
try:
|
||||
return wkt.loads(raw)
|
||||
except (WKTReadingError, Exception):
|
||||
return None # Return None jika WKT corrupt
|
||||
|
||||
# Terapkan safe load
|
||||
export_df["geom"] = export_df["geometry"].apply(safe_load_wkt)
|
||||
|
||||
# =========================================================================
|
||||
# TAHAP 2: FILTER NULL & INVALID GEOMETRY
|
||||
# =========================================================================
|
||||
# Hapus baris di mana konversi WKT gagal (None)
|
||||
export_df = export_df[export_df["geom"].notnull()]
|
||||
print("df", export_df)
|
||||
if export_df.empty:
|
||||
raise ValueError("Tidak ada data spasial valid yang ditemukan.")
|
||||
|
||||
# Jadikan GeoDataFrame
|
||||
export_df = gpd.GeoDataFrame(export_df, geometry="geom")
|
||||
|
||||
# =========================================================================
|
||||
# TAHAP 3: FIX TOPOLOGY (PENTING!)
|
||||
# =========================================================================
|
||||
# Cek validitas (misal: Polygon yang garisnya menabrak diri sendiri)
|
||||
# buffer(0) adalah trik standar GIS untuk memperbaiki topologi ringan
|
||||
export_df["geom"] = export_df["geom"].apply(
|
||||
lambda g: g.buffer(0) if not g.is_valid else g
|
||||
)
|
||||
|
||||
# Hapus lagi jika setelah di-fix malah jadi kosong (jarang terjadi, tapi aman)
|
||||
export_df = export_df[~export_df["geom"].is_empty]
|
||||
|
||||
# =========================================================================
|
||||
# TAHAP 4: FINALISASI (CRS & RENAME)
|
||||
# =========================================================================
|
||||
export_df = export_df.drop(columns=["geometry"]) # Buang kolom string WKT lama
|
||||
export_df = export_df.set_crs("EPSG:4326", allow_override=True)
|
||||
|
||||
# Rename kolom atribut ke UPPERCASE, biarkan 'geom' lowercase
|
||||
# .strip() untuk membuang spasi hantu (" ID " -> "ID")
|
||||
export_df = export_df.rename(
|
||||
columns=lambda c: str(c).strip().upper() if c != "geom" else c
|
||||
)
|
||||
|
||||
# Simpan ke Parquet
|
||||
export_df.to_parquet(tmp_file)
|
||||
|
||||
return len(export_df)
|
||||
|
||||
# --- Cara Pemanggilan di Async Function ---
|
||||
# await asyncio.to_thread(process_dataframe_synchronous, result, tmp_file)
|
||||
Loading…
Reference in New Issue
Block a user