split main function
This commit is contained in:
parent
62bd02d660
commit
fb8a7b96e7
274
main.py
274
main.py
|
|
@ -5,14 +5,13 @@ import numpy as np
|
|||
import zipfile
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
from shapely.geometry import base as shapely_base
|
||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||
from fastapi import FastAPI, File, Form, UploadFile, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from core.config import UPLOAD_FOLDER, MAX_FILE_MB
|
||||
from services.reader_csv import read_csv
|
||||
from services.reader_shp import read_shp
|
||||
from services.reader_gdb import read_gdb
|
||||
# from services.reader_pdf import convert_df, read_pdf
|
||||
from testing.test_pdf_multi import convert_df, read_pdf
|
||||
from services.reader_pdf import convert_df, read_pdf
|
||||
from services.geometry_detector import detect_and_build_geometry
|
||||
from services.geometry_detector import attach_polygon_geometry_auto
|
||||
from database.connection import engine
|
||||
|
|
@ -22,14 +21,14 @@ import pathlib
|
|||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
from shapely import wkt
|
||||
from sqlalchemy import text
|
||||
|
||||
|
||||
UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
apiVersion = "1.1.0"
|
||||
apiVersion = "2.1.0"
|
||||
app = FastAPI(
|
||||
title="ETL Geo Upload Service",
|
||||
version=apiVersion,
|
||||
|
|
@ -109,6 +108,102 @@ def detect_zip_type(zip_path: str) -> str:
|
|||
return "unknown"
|
||||
|
||||
|
||||
|
||||
def process_data(df: pd.DataFrame, ext: str):
|
||||
result = detect_and_build_geometry(df, master_polygons=None)
|
||||
|
||||
if not hasattr(result, "geometry") or result.geometry.isna().all():
|
||||
result = attach_polygon_geometry_auto(result)
|
||||
|
||||
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
|
||||
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
|
||||
if not result.empty else "None"
|
||||
|
||||
null_geom = result.geometry.isna().sum()
|
||||
print(f"[INFO] Tipe Geometry: {geom_type}")
|
||||
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
|
||||
else:
|
||||
response = {
|
||||
"message": "Tidak menemukan tabel yang relevan.",
|
||||
"file_type": ext,
|
||||
"rows": 0,
|
||||
"columns": 0,
|
||||
"geometry_valid": 0,
|
||||
"geometry_empty": 0,
|
||||
"geometry_valid_percent": 0,
|
||||
"warnings": [],
|
||||
"warning_examples": [],
|
||||
"preview": []
|
||||
}
|
||||
|
||||
return JSONResponse(content=response)
|
||||
|
||||
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
|
||||
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
|
||||
result['geometry'] = result['geometry'].apply(
|
||||
lambda g: g.wkt if g is not None else None
|
||||
)
|
||||
|
||||
empty_count = result['geometry'].apply(is_geom_empty).sum()
|
||||
valid_count = len(result) - empty_count
|
||||
match_percentage = (valid_count / len(result)) * 100
|
||||
|
||||
warnings = []
|
||||
if empty_count > 0:
|
||||
warnings.append(
|
||||
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
|
||||
f"({100 - match_percentage:.2f}% data gagal cocok)."
|
||||
)
|
||||
|
||||
if empty_count > 0:
|
||||
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
|
||||
warning_examples = examples.to_dict(orient="records")
|
||||
else:
|
||||
warning_examples = []
|
||||
|
||||
preview_data = result.to_dict(orient="records")
|
||||
|
||||
preview_safe = [
|
||||
{k: safe_json(v) for k, v in row.items()} for row in preview_data
|
||||
]
|
||||
|
||||
warning_safe = [
|
||||
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
|
||||
]
|
||||
|
||||
response = {
|
||||
"message": "File berhasil dibaca dan dianalisis.",
|
||||
"file_type": ext,
|
||||
"rows": int(len(result)),
|
||||
"columns": list(map(str, result.columns)),
|
||||
"geometry_valid": int(valid_count),
|
||||
"geometry_empty": int(empty_count),
|
||||
"geometry_valid_percent": float(round(match_percentage, 2)),
|
||||
"warnings": warnings,
|
||||
"warning_examples": warning_safe,
|
||||
"preview": preview_safe
|
||||
}
|
||||
|
||||
# return JSONResponse(content=response)
|
||||
return response
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
@app.get("/status", tags=["System"])
|
||||
async def server_status():
|
||||
|
|
@ -130,7 +225,7 @@ async def server_status():
|
|||
|
||||
|
||||
@app.post("/upload")
|
||||
async def upload_file(file: UploadFile = File(...)):
|
||||
async def upload_file(file: UploadFile = File(...), page: Optional[str] = Form(None)):
|
||||
fname = file.filename
|
||||
ext = os.path.splitext(fname)[1].lower()
|
||||
contents = await file.read()
|
||||
|
|
@ -152,7 +247,14 @@ async def upload_file(file: UploadFile = File(...)):
|
|||
df = read_csv(str(tmp_path))
|
||||
elif ext == ".pdf":
|
||||
tbl = read_pdf(tmp_path)
|
||||
if len(tbl) > 1:
|
||||
if len(tbl) == 0:
|
||||
response = {
|
||||
"message": "Tidak ditemukan tabel valid",
|
||||
"tables": tbl,
|
||||
"file_type": ext
|
||||
}
|
||||
return JSONResponse(content=response)
|
||||
elif len(tbl) > 1:
|
||||
response = {
|
||||
"message": "File berhasil dibaca dan dianalisis.",
|
||||
"tables": tbl,
|
||||
|
|
@ -183,83 +285,11 @@ async def upload_file(file: UploadFile = File(...)):
|
|||
if df is None or (hasattr(df, "empty") and df.empty):
|
||||
return JSONResponse({"error": "No valid table detected"}, status_code=400)
|
||||
|
||||
result = detect_and_build_geometry(df, master_polygons=None)
|
||||
|
||||
if not hasattr(result, "geometry") or result.geometry.isna().all():
|
||||
result = attach_polygon_geometry_auto(result)
|
||||
|
||||
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
|
||||
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
|
||||
if not result.empty else "None"
|
||||
|
||||
null_geom = result.geometry.isna().sum()
|
||||
print(f"[INFO] Tipe Geometry: {geom_type}")
|
||||
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
|
||||
else:
|
||||
response = {
|
||||
"message": "Tidak menemukan tabel yang relevan.",
|
||||
"file_type": ext,
|
||||
"rows": 0,
|
||||
"columns": 0,
|
||||
"geometry_valid": 0,
|
||||
"geometry_empty": 0,
|
||||
"geometry_valid_percent": 0,
|
||||
"warnings": [],
|
||||
"warning_examples": [],
|
||||
"preview": []
|
||||
}
|
||||
|
||||
return JSONResponse(content=response)
|
||||
res = process_data(df, ext)
|
||||
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
||||
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
|
||||
|
||||
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
|
||||
result['geometry'] = result['geometry'].apply(
|
||||
lambda g: g.wkt if g is not None else None
|
||||
)
|
||||
|
||||
empty_count = result['geometry'].apply(is_geom_empty).sum()
|
||||
valid_count = len(result) - empty_count
|
||||
match_percentage = (valid_count / len(result)) * 100
|
||||
|
||||
warnings = []
|
||||
if empty_count > 0:
|
||||
warnings.append(
|
||||
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
|
||||
f"({100 - match_percentage:.2f}% data gagal cocok)."
|
||||
)
|
||||
|
||||
if empty_count > 0:
|
||||
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
|
||||
warning_examples = examples.to_dict(orient="records")
|
||||
else:
|
||||
warning_examples = []
|
||||
|
||||
preview_data = result.to_dict(orient="records")
|
||||
|
||||
preview_safe = [
|
||||
{k: safe_json(v) for k, v in row.items()} for row in preview_data
|
||||
]
|
||||
|
||||
warning_safe = [
|
||||
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
|
||||
]
|
||||
|
||||
response = {
|
||||
"message": "File berhasil dibaca dan dianalisis.",
|
||||
"rows": int(len(result)),
|
||||
"columns": list(map(str, result.columns)),
|
||||
"geometry_valid": int(valid_count),
|
||||
"geometry_empty": int(empty_count),
|
||||
"geometry_valid_percent": float(round(match_percentage, 2)),
|
||||
"warnings": warnings,
|
||||
"warning_examples": warning_safe,
|
||||
"preview": preview_safe
|
||||
}
|
||||
|
||||
return JSONResponse(content=response)
|
||||
return JSONResponse(content=res)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] {e}")
|
||||
|
|
@ -272,9 +302,6 @@ async def upload_file(file: UploadFile = File(...)):
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class PdfRequest(BaseModel):
|
||||
title: str
|
||||
columns: List[str]
|
||||
|
|
@ -287,84 +314,9 @@ async def upload_file(payload: PdfRequest):
|
|||
if df is None or (hasattr(df, "empty") and df.empty):
|
||||
return JSONResponse({"error": "No valid table detected"}, status_code=400)
|
||||
|
||||
result = detect_and_build_geometry(df, master_polygons=None)
|
||||
res = process_data(df, '.pdf')
|
||||
|
||||
if not hasattr(result, "geometry") or result.geometry.isna().all():
|
||||
print("[INFO] Mencoba menambahkan geometry (MultiPolygon) berdasarkan nama wilayah...")
|
||||
result = attach_polygon_geometry_auto(result)
|
||||
|
||||
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
|
||||
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
|
||||
if not result.empty else "None"
|
||||
|
||||
null_geom = result.geometry.isna().sum()
|
||||
print(f"[INFO] Tipe Geometry: {geom_type}")
|
||||
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
|
||||
else:
|
||||
print("[WARN] Object bukan GeoDataFrame atau tidak punya kolom geometry.")
|
||||
print(f"[DEBUG] Kolom saat ini: {list(result.columns)}")
|
||||
response = {
|
||||
"message": "Tidak menemukan tabel yang relevan.",
|
||||
"file_type": ".pdf",
|
||||
"rows": 0,
|
||||
"columns": 0,
|
||||
"geometry_valid": 0,
|
||||
"geometry_empty": 0,
|
||||
"geometry_valid_percent": 0,
|
||||
"warnings": [],
|
||||
"warning_examples": [],
|
||||
"preview": []
|
||||
}
|
||||
|
||||
return JSONResponse(content=response)
|
||||
|
||||
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
|
||||
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
|
||||
result['geometry'] = result['geometry'].apply(
|
||||
lambda g: g.wkt if g is not None else None
|
||||
)
|
||||
|
||||
empty_count = result['geometry'].apply(is_geom_empty).sum()
|
||||
valid_count = len(result) - empty_count
|
||||
match_percentage = (valid_count / len(result)) * 100
|
||||
|
||||
warnings = []
|
||||
if empty_count > 0:
|
||||
warnings.append(
|
||||
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
|
||||
f"({100 - match_percentage:.2f}% data gagal cocok)."
|
||||
)
|
||||
|
||||
if empty_count > 0:
|
||||
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
|
||||
warning_examples = examples.to_dict(orient="records")
|
||||
else:
|
||||
warning_examples = []
|
||||
|
||||
# preview_data = result.head(5).to_dict(orient="records")
|
||||
preview_data = result.to_dict(orient="records")
|
||||
|
||||
preview_safe = [
|
||||
{k: safe_json(v) for k, v in row.items()} for row in preview_data
|
||||
]
|
||||
|
||||
warning_safe = [
|
||||
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
|
||||
]
|
||||
|
||||
response = {
|
||||
"message": "File berhasil dibaca dan dianalisis.",
|
||||
"rows": int(len(result)),
|
||||
"columns": list(map(str, result.columns)),
|
||||
"geometry_valid": int(valid_count),
|
||||
"geometry_empty": int(empty_count),
|
||||
"geometry_valid_percent": float(round(match_percentage, 2)),
|
||||
"warnings": warnings,
|
||||
"warning_examples": warning_safe,
|
||||
"preview": preview_safe
|
||||
}
|
||||
|
||||
return JSONResponse(content=response)
|
||||
return JSONResponse(content=res)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] {e}")
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user