split main function

This commit is contained in:
dmsanhrProject 2025-11-04 13:34:06 +07:00
parent 62bd02d660
commit fb8a7b96e7

274
main.py
View File

@ -5,14 +5,13 @@ import numpy as np
import zipfile
from shapely.geometry.base import BaseGeometry
from shapely.geometry import base as shapely_base
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi import FastAPI, File, Form, UploadFile, HTTPException
from fastapi.responses import JSONResponse
from core.config import UPLOAD_FOLDER, MAX_FILE_MB
from services.reader_csv import read_csv
from services.reader_shp import read_shp
from services.reader_gdb import read_gdb
# from services.reader_pdf import convert_df, read_pdf
from testing.test_pdf_multi import convert_df, read_pdf
from services.reader_pdf import convert_df, read_pdf
from services.geometry_detector import detect_and_build_geometry
from services.geometry_detector import attach_polygon_geometry_auto
from database.connection import engine
@ -22,14 +21,14 @@ import pathlib
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List
from typing import List, Optional
from shapely import wkt
from sqlalchemy import text
UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
apiVersion = "1.1.0"
apiVersion = "2.1.0"
app = FastAPI(
title="ETL Geo Upload Service",
version=apiVersion,
@ -109,6 +108,102 @@ def detect_zip_type(zip_path: str) -> str:
return "unknown"
def process_data(df: pd.DataFrame, ext: str):
result = detect_and_build_geometry(df, master_polygons=None)
if not hasattr(result, "geometry") or result.geometry.isna().all():
result = attach_polygon_geometry_auto(result)
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
if not result.empty else "None"
null_geom = result.geometry.isna().sum()
print(f"[INFO] Tipe Geometry: {geom_type}")
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
else:
response = {
"message": "Tidak menemukan tabel yang relevan.",
"file_type": ext,
"rows": 0,
"columns": 0,
"geometry_valid": 0,
"geometry_empty": 0,
"geometry_valid_percent": 0,
"warnings": [],
"warning_examples": [],
"preview": []
}
return JSONResponse(content=response)
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
result['geometry'] = result['geometry'].apply(
lambda g: g.wkt if g is not None else None
)
empty_count = result['geometry'].apply(is_geom_empty).sum()
valid_count = len(result) - empty_count
match_percentage = (valid_count / len(result)) * 100
warnings = []
if empty_count > 0:
warnings.append(
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
f"({100 - match_percentage:.2f}% data gagal cocok)."
)
if empty_count > 0:
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
warning_examples = examples.to_dict(orient="records")
else:
warning_examples = []
preview_data = result.to_dict(orient="records")
preview_safe = [
{k: safe_json(v) for k, v in row.items()} for row in preview_data
]
warning_safe = [
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
]
response = {
"message": "File berhasil dibaca dan dianalisis.",
"file_type": ext,
"rows": int(len(result)),
"columns": list(map(str, result.columns)),
"geometry_valid": int(valid_count),
"geometry_empty": int(empty_count),
"geometry_valid_percent": float(round(match_percentage, 2)),
"warnings": warnings,
"warning_examples": warning_safe,
"preview": preview_safe
}
# return JSONResponse(content=response)
return response
from datetime import datetime
@app.get("/status", tags=["System"])
async def server_status():
@ -130,7 +225,7 @@ async def server_status():
@app.post("/upload")
async def upload_file(file: UploadFile = File(...)):
async def upload_file(file: UploadFile = File(...), page: Optional[str] = Form(None)):
fname = file.filename
ext = os.path.splitext(fname)[1].lower()
contents = await file.read()
@ -152,7 +247,14 @@ async def upload_file(file: UploadFile = File(...)):
df = read_csv(str(tmp_path))
elif ext == ".pdf":
tbl = read_pdf(tmp_path)
if len(tbl) > 1:
if len(tbl) == 0:
response = {
"message": "Tidak ditemukan tabel valid",
"tables": tbl,
"file_type": ext
}
return JSONResponse(content=response)
elif len(tbl) > 1:
response = {
"message": "File berhasil dibaca dan dianalisis.",
"tables": tbl,
@ -183,83 +285,11 @@ async def upload_file(file: UploadFile = File(...)):
if df is None or (hasattr(df, "empty") and df.empty):
return JSONResponse({"error": "No valid table detected"}, status_code=400)
result = detect_and_build_geometry(df, master_polygons=None)
if not hasattr(result, "geometry") or result.geometry.isna().all():
result = attach_polygon_geometry_auto(result)
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
if not result.empty else "None"
null_geom = result.geometry.isna().sum()
print(f"[INFO] Tipe Geometry: {geom_type}")
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
else:
response = {
"message": "Tidak menemukan tabel yang relevan.",
"file_type": ext,
"rows": 0,
"columns": 0,
"geometry_valid": 0,
"geometry_empty": 0,
"geometry_valid_percent": 0,
"warnings": [],
"warning_examples": [],
"preview": []
}
return JSONResponse(content=response)
res = process_data(df, ext)
tmp_path.unlink(missing_ok=True)
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
result['geometry'] = result['geometry'].apply(
lambda g: g.wkt if g is not None else None
)
empty_count = result['geometry'].apply(is_geom_empty).sum()
valid_count = len(result) - empty_count
match_percentage = (valid_count / len(result)) * 100
warnings = []
if empty_count > 0:
warnings.append(
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
f"({100 - match_percentage:.2f}% data gagal cocok)."
)
if empty_count > 0:
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
warning_examples = examples.to_dict(orient="records")
else:
warning_examples = []
preview_data = result.to_dict(orient="records")
preview_safe = [
{k: safe_json(v) for k, v in row.items()} for row in preview_data
]
warning_safe = [
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
]
response = {
"message": "File berhasil dibaca dan dianalisis.",
"rows": int(len(result)),
"columns": list(map(str, result.columns)),
"geometry_valid": int(valid_count),
"geometry_empty": int(empty_count),
"geometry_valid_percent": float(round(match_percentage, 2)),
"warnings": warnings,
"warning_examples": warning_safe,
"preview": preview_safe
}
return JSONResponse(content=response)
return JSONResponse(content=res)
except Exception as e:
print(f"[ERROR] {e}")
@ -272,9 +302,6 @@ async def upload_file(file: UploadFile = File(...)):
class PdfRequest(BaseModel):
title: str
columns: List[str]
@ -287,84 +314,9 @@ async def upload_file(payload: PdfRequest):
if df is None or (hasattr(df, "empty") and df.empty):
return JSONResponse({"error": "No valid table detected"}, status_code=400)
result = detect_and_build_geometry(df, master_polygons=None)
res = process_data(df, '.pdf')
if not hasattr(result, "geometry") or result.geometry.isna().all():
print("[INFO] Mencoba menambahkan geometry (MultiPolygon) berdasarkan nama wilayah...")
result = attach_polygon_geometry_auto(result)
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
if not result.empty else "None"
null_geom = result.geometry.isna().sum()
print(f"[INFO] Tipe Geometry: {geom_type}")
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
else:
print("[WARN] Object bukan GeoDataFrame atau tidak punya kolom geometry.")
print(f"[DEBUG] Kolom saat ini: {list(result.columns)}")
response = {
"message": "Tidak menemukan tabel yang relevan.",
"file_type": ".pdf",
"rows": 0,
"columns": 0,
"geometry_valid": 0,
"geometry_empty": 0,
"geometry_valid_percent": 0,
"warnings": [],
"warning_examples": [],
"preview": []
}
return JSONResponse(content=response)
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
result['geometry'] = result['geometry'].apply(
lambda g: g.wkt if g is not None else None
)
empty_count = result['geometry'].apply(is_geom_empty).sum()
valid_count = len(result) - empty_count
match_percentage = (valid_count / len(result)) * 100
warnings = []
if empty_count > 0:
warnings.append(
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
f"({100 - match_percentage:.2f}% data gagal cocok)."
)
if empty_count > 0:
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
warning_examples = examples.to_dict(orient="records")
else:
warning_examples = []
# preview_data = result.head(5).to_dict(orient="records")
preview_data = result.to_dict(orient="records")
preview_safe = [
{k: safe_json(v) for k, v in row.items()} for row in preview_data
]
warning_safe = [
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
]
response = {
"message": "File berhasil dibaca dan dianalisis.",
"rows": int(len(result)),
"columns": list(map(str, result.columns)),
"geometry_valid": int(valid_count),
"geometry_empty": int(empty_count),
"geometry_valid_percent": float(round(match_percentage, 2)),
"warnings": warnings,
"warning_examples": warning_safe,
"preview": preview_safe
}
return JSONResponse(content=response)
return JSONResponse(content=res)
except Exception as e:
print(f"[ERROR] {e}")