From 9c856c75dabe8b5a49e5b5f96fd1dfd9a726fe09 Mon Sep 17 00:00:00 2001 From: dmsanhrProject Date: Thu, 6 Nov 2025 14:23:24 +0700 Subject: [PATCH] refactor folder structure --- .gitignore | 3 + core/config.py | 56 ++- main.py | 414 +----------------- routes/auth_router.py | 15 + routes/router.py | 28 ++ routes/upload_file_router.py | 34 ++ services/auth/auth.py | 0 .../geom_detector}/geometry_detector.py | 8 +- .../{ => upload_file/read_csv}/reader_csv.py | 0 .../{ => upload_file/read_gdb}/reader_gdb.py | 0 .../read_pdf}/filter_column.py | 0 .../{ => upload_file/read_pdf}/reader_pdf.py | 2 +- .../{ => upload_file/read_shp}/reader_shp.py | 0 services/upload_file/upload.py | 298 +++++++++++++ 14 files changed, 446 insertions(+), 412 deletions(-) create mode 100644 routes/auth_router.py create mode 100644 routes/router.py create mode 100644 routes/upload_file_router.py create mode 100644 services/auth/auth.py rename services/{ => upload_file/geom_detector}/geometry_detector.py (99%) rename services/{ => upload_file/read_csv}/reader_csv.py (100%) rename services/{ => upload_file/read_gdb}/reader_gdb.py (100%) rename services/{ => upload_file/read_pdf}/filter_column.py (100%) rename services/{ => upload_file/read_pdf}/reader_pdf.py (99%) rename services/{ => upload_file/read_shp}/reader_shp.py (100%) create mode 100644 services/upload_file/upload.py diff --git a/.gitignore b/.gitignore index 4b0084a..65b9f02 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,14 @@ .env main_old.py +main_mess.py sijalinmaja.json venv/ pdf/ data_cache/ +service_tmp/ testing/ test-ai/ uploads/ +scrapp/ diff --git a/core/config.py b/core/config.py index 65999ba..4889546 100644 --- a/core/config.py +++ b/core/config.py @@ -4,16 +4,58 @@ import os load_dotenv() +API_VERSION = "2.1.3" + POSTGIS_URL = os.getenv("POSTGIS_URL") UPLOAD_FOLDER = Path(os.getenv("UPLOAD_FOLDER", "./uploads")) -MAX_FILE_MB = int(os.getenv("MAX_FILE_MB", 200)) +MAX_FILE_MB = int(os.getenv("MAX_FILE_MB", 30)) + +ALLOWED_ORIGINS = [ + "http://localhost:3000", + "http://127.0.0.1:3000", + "http://localhost:5173", + "http://127.0.0.1:5173", + + "192.168.60.24:5173", + "http://labai.polinema.ac.id:666", +] REFERENCE_DB_URL = os.getenv("REFERENCE_DB_URL") REFERENCE_SCHEMA = os.getenv("REFERENCE_SCHEMA", "batas_wilayah") -REF_COLUMN_MAP = { - 'desa': 'NAMOBJ', - 'kecamatan': 'NAMA_KECAMATAN', - 'kabupaten': 'NAMOBJ' -} +DESA_REF = "WADMKD" +KEC_REF = "WADMKC" +KAB_REF = "WADMKK" -CACHE_FOLDER = Path(os.getenv("CACHE_FOLDER", "./cache")) \ No newline at end of file +CACHE_FOLDER = Path(os.getenv("CACHE_FOLDER", "./cache")) + + +VALID_WKT_PREFIXES = ( + "POINT", + "POINT Z", + "POINT M", + "POINT ZM", + "MULTIPOINT", + "MULTIPOINT Z", + "MULTIPOINT M", + "MULTIPOINT ZM", + "LINESTRING", + "LINESTRING Z", + "LINESTRING M", + "LINESTRING ZM", + "MULTILINESTRING", + "MULTILINESTRING Z", + "MULTILINESTRING M", + "MULTILINESTRING ZM", + "POLYGON", + "POLYGON Z", + "POLYGON M", + "POLYGON ZM", + "MULTIPOLYGON", + "MULTIPOLYGON Z", + "MULTIPOLYGON M", + "MULTIPOLYGON ZM", + "GEOMETRYCOLLECTION", + "GEOMETRYCOLLECTION Z", + "GEOMETRYCOLLECTION M", + "GEOMETRYCOLLECTION ZM", +) \ No newline at end of file diff --git a/main.py b/main.py index 49477c1..799ee8b 100644 --- a/main.py +++ b/main.py @@ -1,415 +1,29 @@ -import os -import pandas as pd -import geopandas as gpd -import numpy as np -import zipfile -from shapely.geometry.base import BaseGeometry -from shapely.geometry import base as shapely_base -from fastapi import FastAPI, File, Form, UploadFile, HTTPException -from fastapi.responses import JSONResponse -from core.config import UPLOAD_FOLDER, MAX_FILE_MB -from services.reader_csv import read_csv -from services.reader_shp import read_shp -from services.reader_gdb import read_gdb -from services.reader_pdf import convert_df, read_pdf -from services.geometry_detector import detect_and_build_geometry -from services.geometry_detector import attach_polygon_geometry_auto +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from core.config import API_VERSION, ALLOWED_ORIGINS from database.connection import engine from database.models import Base -import time -from datetime import datetime, timedelta -import pathlib -from fastapi.middleware.cors import CORSMiddleware +from routes.router import router as system_router +from routes.upload_file_router import router as upload_router +from routes.auth_router import router as auth_router -from pydantic import BaseModel -from typing import List, Optional -from shapely import wkt -from sqlalchemy import text - - -UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) - -apiVersion = "2.1.3" app = FastAPI( title="ETL Geo Upload Service", - version=apiVersion, + version=API_VERSION, description="Upload Automation API" ) - - -origins = [ - "http://localhost:3000", - "http://127.0.0.1:3000", - "http://localhost:5173", - "http://127.0.0.1:5173", - - "192.168.60.24:5173", - "http://labai.polinema.ac.id:666", -] - app.add_middleware( CORSMiddleware, - allow_origins=origins, + allow_origins=ALLOWED_ORIGINS, allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], + allow_methods=["*"], + allow_headers=["*"], ) - - -# Create upload_logs table if not exists Base.metadata.create_all(bind=engine) -def generate_table_name(filename: str, prefix: str = "data"): - name = pathlib.Path(filename).stem - ts = time.strftime("%Y%m%d%H%M%S") - safe = "".join([c if c.isalnum() or c=='_' else '_' for c in name]) - return f"{prefix}_{safe}_{ts}" - - -def is_geom_empty(g): - if g is None: - return True - if isinstance(g, float) and pd.isna(g): - return True - if isinstance(g, BaseGeometry): - return g.is_empty - return False - - -def safe_json(value): - """Konversi aman untuk semua tipe numpy/pandas/shapely ke tipe JSON-serializable""" - if isinstance(value, (np.int64, np.int32)): - return int(value) - if isinstance(value, (np.float64, np.float32)): - return float(value) - if isinstance(value, pd.Timestamp): - return value.isoformat() - if isinstance(value, shapely_base.BaseGeometry): - return str(value) # ubah ke WKT string - if pd.isna(value): - return None - return value - - -def detect_zip_type(zip_path: str) -> str: - with zipfile.ZipFile(zip_path, "r") as zip_ref: - files = zip_ref.namelist() - - if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files): - return "gdb" - - if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files): - return "gdb" - - if any(f.lower().endswith(".shp") for f in files): - return "shp" - - return "unknown" - - - -def process_data(df: pd.DataFrame, ext: str): - result = detect_and_build_geometry(df, master_polygons=None) - - if not hasattr(result, "geometry") or result.geometry.isna().all(): - result = attach_polygon_geometry_auto(result) - - if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns: - geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \ - if not result.empty else "None" - - null_geom = result.geometry.isna().sum() - print(f"[INFO] Tipe Geometry: {geom_type}") - print(f"[INFO] Jumlah geometry kosong: {null_geom}") - else: - response = { - "message": "Tidak menemukan tabel yang relevan.", - "file_type": ext, - "rows": 0, - "columns": 0, - "geometry_valid": 0, - "geometry_empty": 0, - "geometry_valid_percent": 0, - "warnings": [], - "warning_examples": [], - "preview": [] - } - - return JSONResponse(content=response) - - result = result.replace([pd.NA, float('inf'), float('-inf')], None) - if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns: - result['geometry'] = result['geometry'].apply( - lambda g: g.wkt if g is not None else None - ) - - empty_count = result['geometry'].apply(is_geom_empty).sum() - valid_count = len(result) - empty_count - match_percentage = (valid_count / len(result)) * 100 - - warnings = [] - if empty_count > 0: - warnings.append( - f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid " - f"({100 - match_percentage:.2f}% data gagal cocok)." - ) - - if empty_count > 0: - examples = result[result['geometry'].apply(is_geom_empty)].head(500) - warning_examples = examples.to_dict(orient="records") - else: - warning_examples = [] - - preview_data = result.to_dict(orient="records") - - preview_safe = [ - {k: safe_json(v) for k, v in row.items()} for row in preview_data - ] - - warning_safe = [ - {k: safe_json(v) for k, v in row.items()} for row in warning_examples - ] - - response = { - "message": "File berhasil dibaca dan dianalisis.", - "file_type": ext, - "rows": int(len(result)), - "columns": list(map(str, result.columns)), - "geometry_valid": int(valid_count), - "geometry_empty": int(empty_count), - "geometry_valid_percent": float(round(match_percentage, 2)), - "warnings": warnings, - "warning_examples": warning_safe, - "preview": preview_safe - } - - # return JSONResponse(content=response) - return response - - - - - - - - - - - - - - - - - -from datetime import datetime -@app.get("/status", tags=["System"]) -async def server_status(): - utc_time = datetime.utcnow() - wib_time = utc_time + timedelta(hours=7) - formatted_time = wib_time.strftime("%d-%m-%Y %H:%M:%S") - response = { - "status": "success", - "message": "Server is running smoothly ✅", - "data": { - "service": "upload_automation", - "status_code": 200, - "timestamp": f"{formatted_time} WIB" - }, - "meta": { - "version": apiVersion, - "environment": "deployment" - } - } - return response - - - -@app.post("/upload") -async def upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form("")): - fname = file.filename - ext = os.path.splitext(fname)[1].lower() - contents = await file.read() - size_mb = len(contents) / (1024*1024) - if size_mb > MAX_FILE_MB: - raise HTTPException(status_code=413, detail="File too large") - tmp_path = UPLOAD_FOLDER / fname - with open(tmp_path, "wb") as f: - f.write(contents) - - try: - df = None - - print('ext', ext) - - if ext == ".csv": - df = read_csv(str(tmp_path)) - elif ext == ".xlsx": - df = read_csv(str(tmp_path), sheet) - elif ext == ".pdf": - tbl = read_pdf(tmp_path, page) - if len(tbl) == 0: - response = { - "message": "Tidak ditemukan tabel valid", - "tables": tbl, - "file_type": ext - } - return JSONResponse(content=response) - elif len(tbl) > 1: - response = { - "message": "File berhasil dibaca dan dianalisis.", - "tables": tbl, - "file_type": ext - } - return JSONResponse(content=response) - else: - df = convert_df(tbl[0]) - elif ext == ".zip": - zip_type = detect_zip_type(str(tmp_path)) - - if zip_type == "shp": - print("[INFO] ZIP terdeteksi sebagai Shapefile.") - df = read_shp(str(tmp_path)) - - elif zip_type == "gdb": - print("[INFO] ZIP terdeteksi sebagai Geodatabase (GDB).") - df = read_gdb(str(tmp_path)) - - else: - raise HTTPException( - status_code=400, - detail="ZIP file tidak mengandung SHP atau GDB yang valid." - ) - else: - raise HTTPException(status_code=400, detail="Unsupported file type") - - if df is None or (hasattr(df, "empty") and df.empty): - return JSONResponse({"error": "No valid table detected"}, status_code=400) - - res = process_data(df, ext) - - tmp_path.unlink(missing_ok=True) - - return JSONResponse(content=res) - - except Exception as e: - print(f"[ERROR] {e}") - return JSONResponse({"error": str(e)}, status_code=500) - - # finally: - # db_session.close() - - - - - -class PdfRequest(BaseModel): - title: str - columns: List[str] - rows: List[List] - -@app.post("/process-pdf") -async def upload_file(payload: PdfRequest): - try: - df = convert_df(payload.model_dump()) - if df is None or (hasattr(df, "empty") and df.empty): - return JSONResponse({"error": "No valid table detected"}, status_code=400) - - res = process_data(df, '.pdf') - - return JSONResponse(content=res) - - except Exception as e: - print(f"[ERROR] {e}") - - return JSONResponse({"error": str(e)}, status_code=500) - - # finally: - # db_session.close() - - - - - - - - -VALID_WKT_PREFIXES = ( - "POINT", - "POINT Z", - "POINT M", - "POINT ZM", - "MULTIPOINT", - "MULTIPOINT Z", - "MULTIPOINT M", - "MULTIPOINT ZM", - "LINESTRING", - "LINESTRING Z", - "LINESTRING M", - "LINESTRING ZM", - "MULTILINESTRING", - "MULTILINESTRING Z", - "MULTILINESTRING M", - "MULTILINESTRING ZM", - "POLYGON", - "POLYGON Z", - "POLYGON M", - "POLYGON ZM", - "MULTIPOLYGON", - "MULTIPOLYGON Z", - "MULTIPOLYGON M", - "MULTIPOLYGON ZM", - "GEOMETRYCOLLECTION", - "GEOMETRYCOLLECTION Z", - "GEOMETRYCOLLECTION M", - "GEOMETRYCOLLECTION ZM", -) - - -class UploadRequest(BaseModel): - title: str - rows: List[dict] - columns: List[str] - -@app.post("/upload_to_postgis") -def upload_to_postgis(payload: UploadRequest): - try: - table_name = payload.title.lower().replace(" ", "_").replace("-","_") - - df = pd.DataFrame(payload.rows) - print(f"[INFO] Diterima {len(df)} baris data dari frontend.") - - if "geometry" in df.columns: - df["geometry"] = df["geometry"].apply( - lambda g: wkt.loads(g) if isinstance(g, str) and g.strip().upper().startswith(VALID_WKT_PREFIXES) else None - ) - gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326") - else: - raise HTTPException(status_code=400, detail="Kolom geometry tidak ditemukan dalam data.") - - with engine.begin() as conn: - conn.execute(text(f"DROP TABLE IF EXISTS {table_name}")) - - gdf.to_postgis(table_name, engine, if_exists="replace", index=False) - - with engine.begin() as conn: - conn.execute(text(f'ALTER TABLE "{table_name}" ADD COLUMN _id SERIAL PRIMARY KEY;')) - - print(f"[INFO] Tabel '{table_name}' berhasil dibuat di PostGIS ({len(gdf)} baris).") - - return { - "table_name": table_name, - "status": "success", - "message": f"Tabel '{table_name}' berhasil diunggah ke PostGIS.", - "total_rows": len(gdf), - "geometry_type": list(gdf.geom_type.unique()) - } - - except Exception as e: - print(f"[ERROR] Gagal upload ke PostGIS: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - - +# Register routers +app.include_router(system_router, tags=["System"]) +app.include_router(auth_router, prefix="/auth", tags=["Auth"]) +app.include_router(upload_router, prefix="/upload", tags=["Upload"]) diff --git a/routes/auth_router.py b/routes/auth_router.py new file mode 100644 index 0000000..7896e26 --- /dev/null +++ b/routes/auth_router.py @@ -0,0 +1,15 @@ +from fastapi import APIRouter +from core.config import API_VERSION + +router = APIRouter() + +@router.get("/login") +async def login(): + return {"status": "success"} + + + + + + + diff --git a/routes/router.py b/routes/router.py new file mode 100644 index 0000000..27c272d --- /dev/null +++ b/routes/router.py @@ -0,0 +1,28 @@ +from fastapi import APIRouter +from datetime import datetime, timedelta +from core.config import API_VERSION + +router = APIRouter() + +@router.get("/status") +async def server_status(): + utc_time = datetime.utcnow() + wib_time = utc_time + timedelta(hours=7) + formatted_time = wib_time.strftime("%d-%m-%Y %H:%M:%S") + + return { + "status": "success", + "message": "Server is running smoothly ✅", + "data": { + "service": "upload_automation", + "timestamp": f"{formatted_time} WIB" + }, + "meta": {"version": API_VERSION, "environment": "deployment"} + } + + + + + + + diff --git a/routes/upload_file_router.py b/routes/upload_file_router.py new file mode 100644 index 0000000..9daadb6 --- /dev/null +++ b/routes/upload_file_router.py @@ -0,0 +1,34 @@ + +from fastapi import APIRouter, File, Form, UploadFile +from pydantic import BaseModel +from typing import List, Optional +from services.upload_file.upload import handle_upload_file, handle_process_pdf, handle_to_postgis + +router = APIRouter() + + +@router.post("/file") +async def upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form("")): + return await handle_upload_file(file, page, sheet) + + + +class PdfRequest(BaseModel): + title: str + columns: List[str] + rows: List[List] + +@router.post("/process-pdf") +async def upload_file(payload: PdfRequest): + return await handle_process_pdf(payload) + + + +class UploadRequest(BaseModel): + title: str + rows: List[dict] + columns: List[str] + +@router.post("/to-postgis") +def upload_to_postgis(payload: UploadRequest): + return handle_to_postgis(payload) \ No newline at end of file diff --git a/services/auth/auth.py b/services/auth/auth.py new file mode 100644 index 0000000..e69de29 diff --git a/services/geometry_detector.py b/services/upload_file/geom_detector/geometry_detector.py similarity index 99% rename from services/geometry_detector.py rename to services/upload_file/geom_detector/geometry_detector.py index f89d0f2..ac00b31 100644 --- a/services/geometry_detector.py +++ b/services/upload_file/geom_detector/geometry_detector.py @@ -8,7 +8,7 @@ from shapely import wkt from rapidfuzz import process, fuzz from sqlalchemy import create_engine from shapely.geometry.base import BaseGeometry -from core.config import REFERENCE_DB_URL, REFERENCE_SCHEMA +from core.config import REFERENCE_DB_URL, REFERENCE_SCHEMA, DESA_REF, KEC_REF, KAB_REF # ============================================================ # KONFIGURASI DAN KONSTANTA @@ -362,9 +362,9 @@ def attach_polygon_geometry_auto(df: pd.DataFrame): return df # kolom di referensi - desa_ref = "WADMKD" - kec_ref = "WADMKC" - kab_ref = "WADMKK" + desa_ref = DESA_REF + kec_ref = KEC_REF + kab_ref = KAB_REF if desa_col is not None: df[desa_col] = df[desa_col].astype(str).apply(lambda x: normalize_name(x, "desa")) diff --git a/services/reader_csv.py b/services/upload_file/read_csv/reader_csv.py similarity index 100% rename from services/reader_csv.py rename to services/upload_file/read_csv/reader_csv.py diff --git a/services/reader_gdb.py b/services/upload_file/read_gdb/reader_gdb.py similarity index 100% rename from services/reader_gdb.py rename to services/upload_file/read_gdb/reader_gdb.py diff --git a/services/filter_column.py b/services/upload_file/read_pdf/filter_column.py similarity index 100% rename from services/filter_column.py rename to services/upload_file/read_pdf/filter_column.py diff --git a/services/reader_pdf.py b/services/upload_file/read_pdf/reader_pdf.py similarity index 99% rename from services/reader_pdf.py rename to services/upload_file/read_pdf/reader_pdf.py index 0ed620e..20ff291 100644 --- a/services/reader_pdf.py +++ b/services/upload_file/read_pdf/reader_pdf.py @@ -1,7 +1,7 @@ import pdfplumber import re import pandas as pd -from services.filter_column import filter_geo_admin_column +from services.upload_file.read_pdf.filter_column import filter_geo_admin_column def is_number(s): if s is None: diff --git a/services/reader_shp.py b/services/upload_file/read_shp/reader_shp.py similarity index 100% rename from services/reader_shp.py rename to services/upload_file/read_shp/reader_shp.py diff --git a/services/upload_file/upload.py b/services/upload_file/upload.py new file mode 100644 index 0000000..1e60690 --- /dev/null +++ b/services/upload_file/upload.py @@ -0,0 +1,298 @@ +import os +import pandas as pd +import geopandas as gpd +import numpy as np +import zipfile +from shapely.geometry.base import BaseGeometry +from shapely.geometry import base as shapely_base +from fastapi import File, Form, UploadFile, HTTPException +from fastapi.responses import JSONResponse +from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES +from services.upload_file.read_csv.reader_csv import read_csv +from services.upload_file.read_shp.reader_shp import read_shp +from services.upload_file.read_gdb.reader_gdb import read_gdb +from services.upload_file.read_pdf.reader_pdf import convert_df, read_pdf +from services.upload_file.geom_detector.geometry_detector import detect_and_build_geometry +from services.upload_file.geom_detector.geometry_detector import attach_polygon_geometry_auto +from database.connection import engine +from database.models import Base +from pydantic import BaseModel +from typing import List, Optional +from shapely import wkt +from sqlalchemy import text +Base.metadata.create_all(bind=engine) + + +def is_geom_empty(g): + if g is None: + return True + if isinstance(g, float) and pd.isna(g): + return True + if isinstance(g, BaseGeometry): + return g.is_empty + return False + + +def safe_json(value): + """Konversi aman untuk semua tipe numpy/pandas/shapely ke tipe JSON-serializable""" + if isinstance(value, (np.int64, np.int32)): + return int(value) + if isinstance(value, (np.float64, np.float32)): + return float(value) + if isinstance(value, pd.Timestamp): + return value.isoformat() + if isinstance(value, shapely_base.BaseGeometry): + return str(value) # convert to WKT string + if pd.isna(value): + return None + return value + + +def detect_zip_type(zip_path: str) -> str: + with zipfile.ZipFile(zip_path, "r") as zip_ref: + files = zip_ref.namelist() + + if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files): + return "gdb" + + if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files): + return "gdb" + + if any(f.lower().endswith(".shp") for f in files): + return "shp" + + return "unknown" + +def process_data(df: pd.DataFrame, ext: str): + result = detect_and_build_geometry(df, master_polygons=None) + + if not hasattr(result, "geometry") or result.geometry.isna().all(): + result = attach_polygon_geometry_auto(result) + + if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns: + geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \ + if not result.empty else "None" + + null_geom = result.geometry.isna().sum() + print(f"[INFO] Tipe Geometry: {geom_type}") + print(f"[INFO] Jumlah geometry kosong: {null_geom}") + else: + response = { + "message": "Tidak menemukan tabel yang relevan.", + "file_type": ext, + "rows": 0, + "columns": 0, + "geometry_valid": 0, + "geometry_empty": 0, + "geometry_valid_percent": 0, + "warnings": [], + "warning_examples": [], + "preview": [] + } + + return JSONResponse(content=response) + + result = result.replace([pd.NA, float('inf'), float('-inf')], None) + if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns: + result['geometry'] = result['geometry'].apply( + lambda g: g.wkt if g is not None else None + ) + + empty_count = result['geometry'].apply(is_geom_empty).sum() + valid_count = len(result) - empty_count + match_percentage = (valid_count / len(result)) * 100 + + warnings = [] + if empty_count > 0: + warnings.append( + f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid " + f"({100 - match_percentage:.2f}% data gagal cocok)." + ) + + if empty_count > 0: + examples = result[result['geometry'].apply(is_geom_empty)].head(500) + warning_examples = examples.to_dict(orient="records") + else: + warning_examples = [] + + preview_data = result.to_dict(orient="records") + + preview_safe = [ + {k: safe_json(v) for k, v in row.items()} for row in preview_data + ] + + warning_safe = [ + {k: safe_json(v) for k, v in row.items()} for row in warning_examples + ] + + response = { + "message": "File berhasil dibaca dan dianalisis.", + "file_type": ext, + "rows": int(len(result)), + "columns": list(map(str, result.columns)), + "geometry_valid": int(valid_count), + "geometry_empty": int(empty_count), + "geometry_valid_percent": float(round(match_percentage, 2)), + "warnings": warnings, + "warning_examples": warning_safe, + "preview": preview_safe + } + + # return JSONResponse(content=response) + return response + + + + + + + + + + +async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form("")): + fname = file.filename + ext = os.path.splitext(fname)[1].lower() + contents = await file.read() + size_mb = len(contents) / (1024*1024) + if size_mb > MAX_FILE_MB: + raise HTTPException(status_code=413, detail="File too large") + tmp_path = UPLOAD_FOLDER / fname + with open(tmp_path, "wb") as f: + f.write(contents) + try: + df = None + print('ext', ext) + + if ext == ".csv": + df = read_csv(str(tmp_path)) + elif ext == ".xlsx": + df = read_csv(str(tmp_path), sheet) + elif ext == ".pdf": + tbl = read_pdf(tmp_path, page) + if len(tbl) == 0: + response = { + "message": "Tidak ditemukan tabel valid", + "tables": tbl, + "file_type": ext + } + return JSONResponse(content=response) + elif len(tbl) > 1: + response = { + "message": "File berhasil dibaca dan dianalisis.", + "tables": tbl, + "file_type": ext + } + return JSONResponse(content=response) + else: + df = convert_df(tbl[0]) + elif ext == ".zip": + zip_type = detect_zip_type(str(tmp_path)) + + if zip_type == "shp": + print("[INFO] ZIP terdeteksi sebagai Shapefile.") + df = read_shp(str(tmp_path)) + + elif zip_type == "gdb": + print("[INFO] ZIP terdeteksi sebagai Geodatabase (GDB).") + df = read_gdb(str(tmp_path)) + + else: + raise HTTPException( + status_code=400, + detail="ZIP file tidak mengandung SHP atau GDB yang valid." + ) + else: + raise HTTPException(status_code=400, detail="Unsupported file type") + + if df is None or (hasattr(df, "empty") and df.empty): + return JSONResponse({"error": "No valid table detected"}, status_code=400) + + res = process_data(df, ext) + + tmp_path.unlink(missing_ok=True) + + return JSONResponse(content=res) + + except Exception as e: + print(f"[ERROR] {e}") + return JSONResponse({"error": str(e)}, status_code=500) + + # finally: + # db_session.close() + + + + + +class PdfRequest(BaseModel): + title: str + columns: List[str] + rows: List[List] + +async def handle_process_pdf(payload: PdfRequest): + try: + df = convert_df(payload.model_dump()) + if df is None or (hasattr(df, "empty") and df.empty): + return JSONResponse({"error": "No valid table detected"}, status_code=400) + + res = process_data(df, '.pdf') + + return JSONResponse(content=res) + + except Exception as e: + print(f"[ERROR] {e}") + + return JSONResponse({"error": str(e)}, status_code=500) + + # finally: + # db_session.close() + + + + + + + + +class UploadRequest(BaseModel): + title: str + rows: List[dict] + columns: List[str] + +def handle_to_postgis(payload: UploadRequest): + try: + table_name = payload.title.lower().replace(" ", "_").replace("-","_") + + df = pd.DataFrame(payload.rows) + print(f"[INFO] Diterima {len(df)} baris data dari frontend.") + + if "geometry" in df.columns: + df["geometry"] = df["geometry"].apply( + lambda g: wkt.loads(g) if isinstance(g, str) and g.strip().upper().startswith(VALID_WKT_PREFIXES) else None + ) + gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326") + else: + raise HTTPException(status_code=400, detail="Kolom geometry tidak ditemukan dalam data.") + + with engine.begin() as conn: + conn.execute(text(f"DROP TABLE IF EXISTS {table_name}")) + + gdf.to_postgis(table_name, engine, if_exists="replace", index=False) + + with engine.begin() as conn: + conn.execute(text(f'ALTER TABLE "{table_name}" ADD COLUMN _id SERIAL PRIMARY KEY;')) + + print(f"[INFO] Tabel '{table_name}' berhasil dibuat di PostGIS ({len(gdf)} baris).") + + return { + "table_name": table_name, + "status": "success", + "message": f"Tabel '{table_name}' berhasil diunggah ke PostGIS.", + "total_rows": len(gdf), + "geometry_type": list(gdf.geom_type.unique()) + } + + except Exception as e: + print(f"[ERROR] Gagal upload ke PostGIS: {e}") + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file