diff --git a/main.py b/main.py index fa5d7be..09501de 100644 --- a/main.py +++ b/main.py @@ -5,14 +5,13 @@ import numpy as np import zipfile from shapely.geometry.base import BaseGeometry from shapely.geometry import base as shapely_base -from fastapi import FastAPI, File, UploadFile, HTTPException +from fastapi import FastAPI, File, Form, UploadFile, HTTPException from fastapi.responses import JSONResponse from core.config import UPLOAD_FOLDER, MAX_FILE_MB from services.reader_csv import read_csv from services.reader_shp import read_shp from services.reader_gdb import read_gdb -# from services.reader_pdf import convert_df, read_pdf -from testing.test_pdf_multi import convert_df, read_pdf +from services.reader_pdf import convert_df, read_pdf from services.geometry_detector import detect_and_build_geometry from services.geometry_detector import attach_polygon_geometry_auto from database.connection import engine @@ -22,14 +21,14 @@ import pathlib from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel -from typing import List +from typing import List, Optional from shapely import wkt from sqlalchemy import text UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) -apiVersion = "1.1.0" +apiVersion = "2.1.0" app = FastAPI( title="ETL Geo Upload Service", version=apiVersion, @@ -109,6 +108,102 @@ def detect_zip_type(zip_path: str) -> str: return "unknown" + +def process_data(df: pd.DataFrame, ext: str): + result = detect_and_build_geometry(df, master_polygons=None) + + if not hasattr(result, "geometry") or result.geometry.isna().all(): + result = attach_polygon_geometry_auto(result) + + if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns: + geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \ + if not result.empty else "None" + + null_geom = result.geometry.isna().sum() + print(f"[INFO] Tipe Geometry: {geom_type}") + print(f"[INFO] Jumlah geometry kosong: {null_geom}") + else: + response = { + "message": "Tidak menemukan tabel yang relevan.", + "file_type": ext, + "rows": 0, + "columns": 0, + "geometry_valid": 0, + "geometry_empty": 0, + "geometry_valid_percent": 0, + "warnings": [], + "warning_examples": [], + "preview": [] + } + + return JSONResponse(content=response) + + result = result.replace([pd.NA, float('inf'), float('-inf')], None) + if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns: + result['geometry'] = result['geometry'].apply( + lambda g: g.wkt if g is not None else None + ) + + empty_count = result['geometry'].apply(is_geom_empty).sum() + valid_count = len(result) - empty_count + match_percentage = (valid_count / len(result)) * 100 + + warnings = [] + if empty_count > 0: + warnings.append( + f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid " + f"({100 - match_percentage:.2f}% data gagal cocok)." + ) + + if empty_count > 0: + examples = result[result['geometry'].apply(is_geom_empty)].head(500) + warning_examples = examples.to_dict(orient="records") + else: + warning_examples = [] + + preview_data = result.to_dict(orient="records") + + preview_safe = [ + {k: safe_json(v) for k, v in row.items()} for row in preview_data + ] + + warning_safe = [ + {k: safe_json(v) for k, v in row.items()} for row in warning_examples + ] + + response = { + "message": "File berhasil dibaca dan dianalisis.", + "file_type": ext, + "rows": int(len(result)), + "columns": list(map(str, result.columns)), + "geometry_valid": int(valid_count), + "geometry_empty": int(empty_count), + "geometry_valid_percent": float(round(match_percentage, 2)), + "warnings": warnings, + "warning_examples": warning_safe, + "preview": preview_safe + } + + # return JSONResponse(content=response) + return response + + + + + + + + + + + + + + + + + + from datetime import datetime @app.get("/status", tags=["System"]) async def server_status(): @@ -130,7 +225,7 @@ async def server_status(): @app.post("/upload") -async def upload_file(file: UploadFile = File(...)): +async def upload_file(file: UploadFile = File(...), page: Optional[str] = Form(None)): fname = file.filename ext = os.path.splitext(fname)[1].lower() contents = await file.read() @@ -152,7 +247,14 @@ async def upload_file(file: UploadFile = File(...)): df = read_csv(str(tmp_path)) elif ext == ".pdf": tbl = read_pdf(tmp_path) - if len(tbl) > 1: + if len(tbl) == 0: + response = { + "message": "Tidak ditemukan tabel valid", + "tables": tbl, + "file_type": ext + } + return JSONResponse(content=response) + elif len(tbl) > 1: response = { "message": "File berhasil dibaca dan dianalisis.", "tables": tbl, @@ -182,84 +284,12 @@ async def upload_file(file: UploadFile = File(...)): if df is None or (hasattr(df, "empty") and df.empty): return JSONResponse({"error": "No valid table detected"}, status_code=400) - - result = detect_and_build_geometry(df, master_polygons=None) - - if not hasattr(result, "geometry") or result.geometry.isna().all(): - result = attach_polygon_geometry_auto(result) - - if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns: - geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \ - if not result.empty else "None" - - null_geom = result.geometry.isna().sum() - print(f"[INFO] Tipe Geometry: {geom_type}") - print(f"[INFO] Jumlah geometry kosong: {null_geom}") - else: - response = { - "message": "Tidak menemukan tabel yang relevan.", - "file_type": ext, - "rows": 0, - "columns": 0, - "geometry_valid": 0, - "geometry_empty": 0, - "geometry_valid_percent": 0, - "warnings": [], - "warning_examples": [], - "preview": [] - } - - return JSONResponse(content=response) + + res = process_data(df, ext) tmp_path.unlink(missing_ok=True) - - result = result.replace([pd.NA, float('inf'), float('-inf')], None) - - if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns: - result['geometry'] = result['geometry'].apply( - lambda g: g.wkt if g is not None else None - ) - - empty_count = result['geometry'].apply(is_geom_empty).sum() - valid_count = len(result) - empty_count - match_percentage = (valid_count / len(result)) * 100 - - warnings = [] - if empty_count > 0: - warnings.append( - f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid " - f"({100 - match_percentage:.2f}% data gagal cocok)." - ) - - if empty_count > 0: - examples = result[result['geometry'].apply(is_geom_empty)].head(500) - warning_examples = examples.to_dict(orient="records") - else: - warning_examples = [] - - preview_data = result.to_dict(orient="records") - - preview_safe = [ - {k: safe_json(v) for k, v in row.items()} for row in preview_data - ] - - warning_safe = [ - {k: safe_json(v) for k, v in row.items()} for row in warning_examples - ] - - response = { - "message": "File berhasil dibaca dan dianalisis.", - "rows": int(len(result)), - "columns": list(map(str, result.columns)), - "geometry_valid": int(valid_count), - "geometry_empty": int(empty_count), - "geometry_valid_percent": float(round(match_percentage, 2)), - "warnings": warnings, - "warning_examples": warning_safe, - "preview": preview_safe - } - - return JSONResponse(content=response) + + return JSONResponse(content=res) except Exception as e: print(f"[ERROR] {e}") @@ -272,9 +302,6 @@ async def upload_file(file: UploadFile = File(...)): - - - class PdfRequest(BaseModel): title: str columns: List[str] @@ -287,84 +314,9 @@ async def upload_file(payload: PdfRequest): if df is None or (hasattr(df, "empty") and df.empty): return JSONResponse({"error": "No valid table detected"}, status_code=400) - result = detect_and_build_geometry(df, master_polygons=None) + res = process_data(df, '.pdf') - if not hasattr(result, "geometry") or result.geometry.isna().all(): - print("[INFO] Mencoba menambahkan geometry (MultiPolygon) berdasarkan nama wilayah...") - result = attach_polygon_geometry_auto(result) - - if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns: - geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \ - if not result.empty else "None" - - null_geom = result.geometry.isna().sum() - print(f"[INFO] Tipe Geometry: {geom_type}") - print(f"[INFO] Jumlah geometry kosong: {null_geom}") - else: - print("[WARN] Object bukan GeoDataFrame atau tidak punya kolom geometry.") - print(f"[DEBUG] Kolom saat ini: {list(result.columns)}") - response = { - "message": "Tidak menemukan tabel yang relevan.", - "file_type": ".pdf", - "rows": 0, - "columns": 0, - "geometry_valid": 0, - "geometry_empty": 0, - "geometry_valid_percent": 0, - "warnings": [], - "warning_examples": [], - "preview": [] - } - - return JSONResponse(content=response) - - result = result.replace([pd.NA, float('inf'), float('-inf')], None) - if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns: - result['geometry'] = result['geometry'].apply( - lambda g: g.wkt if g is not None else None - ) - - empty_count = result['geometry'].apply(is_geom_empty).sum() - valid_count = len(result) - empty_count - match_percentage = (valid_count / len(result)) * 100 - - warnings = [] - if empty_count > 0: - warnings.append( - f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid " - f"({100 - match_percentage:.2f}% data gagal cocok)." - ) - - if empty_count > 0: - examples = result[result['geometry'].apply(is_geom_empty)].head(500) - warning_examples = examples.to_dict(orient="records") - else: - warning_examples = [] - - # preview_data = result.head(5).to_dict(orient="records") - preview_data = result.to_dict(orient="records") - - preview_safe = [ - {k: safe_json(v) for k, v in row.items()} for row in preview_data - ] - - warning_safe = [ - {k: safe_json(v) for k, v in row.items()} for row in warning_examples - ] - - response = { - "message": "File berhasil dibaca dan dianalisis.", - "rows": int(len(result)), - "columns": list(map(str, result.columns)), - "geometry_valid": int(valid_count), - "geometry_empty": int(empty_count), - "geometry_valid_percent": float(round(match_percentage, 2)), - "warnings": warnings, - "warning_examples": warning_safe, - "preview": preview_safe - } - - return JSONResponse(content=response) + return JSONResponse(content=res) except Exception as e: print(f"[ERROR] {e}")