split main function

This commit is contained in:
dmsanhrProject 2025-11-04 13:34:06 +07:00
parent 62bd02d660
commit fb8a7b96e7

274
main.py
View File

@ -5,14 +5,13 @@ import numpy as np
import zipfile import zipfile
from shapely.geometry.base import BaseGeometry from shapely.geometry.base import BaseGeometry
from shapely.geometry import base as shapely_base from shapely.geometry import base as shapely_base
from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi import FastAPI, File, Form, UploadFile, HTTPException
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from core.config import UPLOAD_FOLDER, MAX_FILE_MB from core.config import UPLOAD_FOLDER, MAX_FILE_MB
from services.reader_csv import read_csv from services.reader_csv import read_csv
from services.reader_shp import read_shp from services.reader_shp import read_shp
from services.reader_gdb import read_gdb from services.reader_gdb import read_gdb
# from services.reader_pdf import convert_df, read_pdf from services.reader_pdf import convert_df, read_pdf
from testing.test_pdf_multi import convert_df, read_pdf
from services.geometry_detector import detect_and_build_geometry from services.geometry_detector import detect_and_build_geometry
from services.geometry_detector import attach_polygon_geometry_auto from services.geometry_detector import attach_polygon_geometry_auto
from database.connection import engine from database.connection import engine
@ -22,14 +21,14 @@ import pathlib
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel from pydantic import BaseModel
from typing import List from typing import List, Optional
from shapely import wkt from shapely import wkt
from sqlalchemy import text from sqlalchemy import text
UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
apiVersion = "1.1.0" apiVersion = "2.1.0"
app = FastAPI( app = FastAPI(
title="ETL Geo Upload Service", title="ETL Geo Upload Service",
version=apiVersion, version=apiVersion,
@ -109,6 +108,102 @@ def detect_zip_type(zip_path: str) -> str:
return "unknown" return "unknown"
def process_data(df: pd.DataFrame, ext: str):
result = detect_and_build_geometry(df, master_polygons=None)
if not hasattr(result, "geometry") or result.geometry.isna().all():
result = attach_polygon_geometry_auto(result)
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
if not result.empty else "None"
null_geom = result.geometry.isna().sum()
print(f"[INFO] Tipe Geometry: {geom_type}")
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
else:
response = {
"message": "Tidak menemukan tabel yang relevan.",
"file_type": ext,
"rows": 0,
"columns": 0,
"geometry_valid": 0,
"geometry_empty": 0,
"geometry_valid_percent": 0,
"warnings": [],
"warning_examples": [],
"preview": []
}
return JSONResponse(content=response)
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
result['geometry'] = result['geometry'].apply(
lambda g: g.wkt if g is not None else None
)
empty_count = result['geometry'].apply(is_geom_empty).sum()
valid_count = len(result) - empty_count
match_percentage = (valid_count / len(result)) * 100
warnings = []
if empty_count > 0:
warnings.append(
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
f"({100 - match_percentage:.2f}% data gagal cocok)."
)
if empty_count > 0:
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
warning_examples = examples.to_dict(orient="records")
else:
warning_examples = []
preview_data = result.to_dict(orient="records")
preview_safe = [
{k: safe_json(v) for k, v in row.items()} for row in preview_data
]
warning_safe = [
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
]
response = {
"message": "File berhasil dibaca dan dianalisis.",
"file_type": ext,
"rows": int(len(result)),
"columns": list(map(str, result.columns)),
"geometry_valid": int(valid_count),
"geometry_empty": int(empty_count),
"geometry_valid_percent": float(round(match_percentage, 2)),
"warnings": warnings,
"warning_examples": warning_safe,
"preview": preview_safe
}
# return JSONResponse(content=response)
return response
from datetime import datetime from datetime import datetime
@app.get("/status", tags=["System"]) @app.get("/status", tags=["System"])
async def server_status(): async def server_status():
@ -130,7 +225,7 @@ async def server_status():
@app.post("/upload") @app.post("/upload")
async def upload_file(file: UploadFile = File(...)): async def upload_file(file: UploadFile = File(...), page: Optional[str] = Form(None)):
fname = file.filename fname = file.filename
ext = os.path.splitext(fname)[1].lower() ext = os.path.splitext(fname)[1].lower()
contents = await file.read() contents = await file.read()
@ -152,7 +247,14 @@ async def upload_file(file: UploadFile = File(...)):
df = read_csv(str(tmp_path)) df = read_csv(str(tmp_path))
elif ext == ".pdf": elif ext == ".pdf":
tbl = read_pdf(tmp_path) tbl = read_pdf(tmp_path)
if len(tbl) > 1: if len(tbl) == 0:
response = {
"message": "Tidak ditemukan tabel valid",
"tables": tbl,
"file_type": ext
}
return JSONResponse(content=response)
elif len(tbl) > 1:
response = { response = {
"message": "File berhasil dibaca dan dianalisis.", "message": "File berhasil dibaca dan dianalisis.",
"tables": tbl, "tables": tbl,
@ -183,83 +285,11 @@ async def upload_file(file: UploadFile = File(...)):
if df is None or (hasattr(df, "empty") and df.empty): if df is None or (hasattr(df, "empty") and df.empty):
return JSONResponse({"error": "No valid table detected"}, status_code=400) return JSONResponse({"error": "No valid table detected"}, status_code=400)
result = detect_and_build_geometry(df, master_polygons=None) res = process_data(df, ext)
if not hasattr(result, "geometry") or result.geometry.isna().all():
result = attach_polygon_geometry_auto(result)
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
if not result.empty else "None"
null_geom = result.geometry.isna().sum()
print(f"[INFO] Tipe Geometry: {geom_type}")
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
else:
response = {
"message": "Tidak menemukan tabel yang relevan.",
"file_type": ext,
"rows": 0,
"columns": 0,
"geometry_valid": 0,
"geometry_empty": 0,
"geometry_valid_percent": 0,
"warnings": [],
"warning_examples": [],
"preview": []
}
return JSONResponse(content=response)
tmp_path.unlink(missing_ok=True) tmp_path.unlink(missing_ok=True)
result = result.replace([pd.NA, float('inf'), float('-inf')], None) return JSONResponse(content=res)
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
result['geometry'] = result['geometry'].apply(
lambda g: g.wkt if g is not None else None
)
empty_count = result['geometry'].apply(is_geom_empty).sum()
valid_count = len(result) - empty_count
match_percentage = (valid_count / len(result)) * 100
warnings = []
if empty_count > 0:
warnings.append(
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
f"({100 - match_percentage:.2f}% data gagal cocok)."
)
if empty_count > 0:
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
warning_examples = examples.to_dict(orient="records")
else:
warning_examples = []
preview_data = result.to_dict(orient="records")
preview_safe = [
{k: safe_json(v) for k, v in row.items()} for row in preview_data
]
warning_safe = [
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
]
response = {
"message": "File berhasil dibaca dan dianalisis.",
"rows": int(len(result)),
"columns": list(map(str, result.columns)),
"geometry_valid": int(valid_count),
"geometry_empty": int(empty_count),
"geometry_valid_percent": float(round(match_percentage, 2)),
"warnings": warnings,
"warning_examples": warning_safe,
"preview": preview_safe
}
return JSONResponse(content=response)
except Exception as e: except Exception as e:
print(f"[ERROR] {e}") print(f"[ERROR] {e}")
@ -272,9 +302,6 @@ async def upload_file(file: UploadFile = File(...)):
class PdfRequest(BaseModel): class PdfRequest(BaseModel):
title: str title: str
columns: List[str] columns: List[str]
@ -287,84 +314,9 @@ async def upload_file(payload: PdfRequest):
if df is None or (hasattr(df, "empty") and df.empty): if df is None or (hasattr(df, "empty") and df.empty):
return JSONResponse({"error": "No valid table detected"}, status_code=400) return JSONResponse({"error": "No valid table detected"}, status_code=400)
result = detect_and_build_geometry(df, master_polygons=None) res = process_data(df, '.pdf')
if not hasattr(result, "geometry") or result.geometry.isna().all(): return JSONResponse(content=res)
print("[INFO] Mencoba menambahkan geometry (MultiPolygon) berdasarkan nama wilayah...")
result = attach_polygon_geometry_auto(result)
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
if not result.empty else "None"
null_geom = result.geometry.isna().sum()
print(f"[INFO] Tipe Geometry: {geom_type}")
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
else:
print("[WARN] Object bukan GeoDataFrame atau tidak punya kolom geometry.")
print(f"[DEBUG] Kolom saat ini: {list(result.columns)}")
response = {
"message": "Tidak menemukan tabel yang relevan.",
"file_type": ".pdf",
"rows": 0,
"columns": 0,
"geometry_valid": 0,
"geometry_empty": 0,
"geometry_valid_percent": 0,
"warnings": [],
"warning_examples": [],
"preview": []
}
return JSONResponse(content=response)
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
result['geometry'] = result['geometry'].apply(
lambda g: g.wkt if g is not None else None
)
empty_count = result['geometry'].apply(is_geom_empty).sum()
valid_count = len(result) - empty_count
match_percentage = (valid_count / len(result)) * 100
warnings = []
if empty_count > 0:
warnings.append(
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
f"({100 - match_percentage:.2f}% data gagal cocok)."
)
if empty_count > 0:
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
warning_examples = examples.to_dict(orient="records")
else:
warning_examples = []
# preview_data = result.head(5).to_dict(orient="records")
preview_data = result.to_dict(orient="records")
preview_safe = [
{k: safe_json(v) for k, v in row.items()} for row in preview_data
]
warning_safe = [
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
]
response = {
"message": "File berhasil dibaca dan dianalisis.",
"rows": int(len(result)),
"columns": list(map(str, result.columns)),
"geometry_valid": int(valid_count),
"geometry_empty": int(empty_count),
"geometry_valid_percent": float(round(match_percentage, 2)),
"warnings": warnings,
"warning_examples": warning_safe,
"preview": preview_safe
}
return JSONResponse(content=response)
except Exception as e: except Exception as e:
print(f"[ERROR] {e}") print(f"[ERROR] {e}")