split main function
This commit is contained in:
parent
62bd02d660
commit
fb8a7b96e7
274
main.py
274
main.py
|
|
@ -5,14 +5,13 @@ import numpy as np
|
||||||
import zipfile
|
import zipfile
|
||||||
from shapely.geometry.base import BaseGeometry
|
from shapely.geometry.base import BaseGeometry
|
||||||
from shapely.geometry import base as shapely_base
|
from shapely.geometry import base as shapely_base
|
||||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
from fastapi import FastAPI, File, Form, UploadFile, HTTPException
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from core.config import UPLOAD_FOLDER, MAX_FILE_MB
|
from core.config import UPLOAD_FOLDER, MAX_FILE_MB
|
||||||
from services.reader_csv import read_csv
|
from services.reader_csv import read_csv
|
||||||
from services.reader_shp import read_shp
|
from services.reader_shp import read_shp
|
||||||
from services.reader_gdb import read_gdb
|
from services.reader_gdb import read_gdb
|
||||||
# from services.reader_pdf import convert_df, read_pdf
|
from services.reader_pdf import convert_df, read_pdf
|
||||||
from testing.test_pdf_multi import convert_df, read_pdf
|
|
||||||
from services.geometry_detector import detect_and_build_geometry
|
from services.geometry_detector import detect_and_build_geometry
|
||||||
from services.geometry_detector import attach_polygon_geometry_auto
|
from services.geometry_detector import attach_polygon_geometry_auto
|
||||||
from database.connection import engine
|
from database.connection import engine
|
||||||
|
|
@ -22,14 +21,14 @@ import pathlib
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing import List
|
from typing import List, Optional
|
||||||
from shapely import wkt
|
from shapely import wkt
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
|
||||||
UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
|
UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
apiVersion = "1.1.0"
|
apiVersion = "2.1.0"
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
title="ETL Geo Upload Service",
|
title="ETL Geo Upload Service",
|
||||||
version=apiVersion,
|
version=apiVersion,
|
||||||
|
|
@ -109,6 +108,102 @@ def detect_zip_type(zip_path: str) -> str:
|
||||||
return "unknown"
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def process_data(df: pd.DataFrame, ext: str):
|
||||||
|
result = detect_and_build_geometry(df, master_polygons=None)
|
||||||
|
|
||||||
|
if not hasattr(result, "geometry") or result.geometry.isna().all():
|
||||||
|
result = attach_polygon_geometry_auto(result)
|
||||||
|
|
||||||
|
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
|
||||||
|
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
|
||||||
|
if not result.empty else "None"
|
||||||
|
|
||||||
|
null_geom = result.geometry.isna().sum()
|
||||||
|
print(f"[INFO] Tipe Geometry: {geom_type}")
|
||||||
|
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
|
||||||
|
else:
|
||||||
|
response = {
|
||||||
|
"message": "Tidak menemukan tabel yang relevan.",
|
||||||
|
"file_type": ext,
|
||||||
|
"rows": 0,
|
||||||
|
"columns": 0,
|
||||||
|
"geometry_valid": 0,
|
||||||
|
"geometry_empty": 0,
|
||||||
|
"geometry_valid_percent": 0,
|
||||||
|
"warnings": [],
|
||||||
|
"warning_examples": [],
|
||||||
|
"preview": []
|
||||||
|
}
|
||||||
|
|
||||||
|
return JSONResponse(content=response)
|
||||||
|
|
||||||
|
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
|
||||||
|
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
|
||||||
|
result['geometry'] = result['geometry'].apply(
|
||||||
|
lambda g: g.wkt if g is not None else None
|
||||||
|
)
|
||||||
|
|
||||||
|
empty_count = result['geometry'].apply(is_geom_empty).sum()
|
||||||
|
valid_count = len(result) - empty_count
|
||||||
|
match_percentage = (valid_count / len(result)) * 100
|
||||||
|
|
||||||
|
warnings = []
|
||||||
|
if empty_count > 0:
|
||||||
|
warnings.append(
|
||||||
|
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
|
||||||
|
f"({100 - match_percentage:.2f}% data gagal cocok)."
|
||||||
|
)
|
||||||
|
|
||||||
|
if empty_count > 0:
|
||||||
|
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
|
||||||
|
warning_examples = examples.to_dict(orient="records")
|
||||||
|
else:
|
||||||
|
warning_examples = []
|
||||||
|
|
||||||
|
preview_data = result.to_dict(orient="records")
|
||||||
|
|
||||||
|
preview_safe = [
|
||||||
|
{k: safe_json(v) for k, v in row.items()} for row in preview_data
|
||||||
|
]
|
||||||
|
|
||||||
|
warning_safe = [
|
||||||
|
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
|
||||||
|
]
|
||||||
|
|
||||||
|
response = {
|
||||||
|
"message": "File berhasil dibaca dan dianalisis.",
|
||||||
|
"file_type": ext,
|
||||||
|
"rows": int(len(result)),
|
||||||
|
"columns": list(map(str, result.columns)),
|
||||||
|
"geometry_valid": int(valid_count),
|
||||||
|
"geometry_empty": int(empty_count),
|
||||||
|
"geometry_valid_percent": float(round(match_percentage, 2)),
|
||||||
|
"warnings": warnings,
|
||||||
|
"warning_examples": warning_safe,
|
||||||
|
"preview": preview_safe
|
||||||
|
}
|
||||||
|
|
||||||
|
# return JSONResponse(content=response)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@app.get("/status", tags=["System"])
|
@app.get("/status", tags=["System"])
|
||||||
async def server_status():
|
async def server_status():
|
||||||
|
|
@ -130,7 +225,7 @@ async def server_status():
|
||||||
|
|
||||||
|
|
||||||
@app.post("/upload")
|
@app.post("/upload")
|
||||||
async def upload_file(file: UploadFile = File(...)):
|
async def upload_file(file: UploadFile = File(...), page: Optional[str] = Form(None)):
|
||||||
fname = file.filename
|
fname = file.filename
|
||||||
ext = os.path.splitext(fname)[1].lower()
|
ext = os.path.splitext(fname)[1].lower()
|
||||||
contents = await file.read()
|
contents = await file.read()
|
||||||
|
|
@ -152,7 +247,14 @@ async def upload_file(file: UploadFile = File(...)):
|
||||||
df = read_csv(str(tmp_path))
|
df = read_csv(str(tmp_path))
|
||||||
elif ext == ".pdf":
|
elif ext == ".pdf":
|
||||||
tbl = read_pdf(tmp_path)
|
tbl = read_pdf(tmp_path)
|
||||||
if len(tbl) > 1:
|
if len(tbl) == 0:
|
||||||
|
response = {
|
||||||
|
"message": "Tidak ditemukan tabel valid",
|
||||||
|
"tables": tbl,
|
||||||
|
"file_type": ext
|
||||||
|
}
|
||||||
|
return JSONResponse(content=response)
|
||||||
|
elif len(tbl) > 1:
|
||||||
response = {
|
response = {
|
||||||
"message": "File berhasil dibaca dan dianalisis.",
|
"message": "File berhasil dibaca dan dianalisis.",
|
||||||
"tables": tbl,
|
"tables": tbl,
|
||||||
|
|
@ -183,83 +285,11 @@ async def upload_file(file: UploadFile = File(...)):
|
||||||
if df is None or (hasattr(df, "empty") and df.empty):
|
if df is None or (hasattr(df, "empty") and df.empty):
|
||||||
return JSONResponse({"error": "No valid table detected"}, status_code=400)
|
return JSONResponse({"error": "No valid table detected"}, status_code=400)
|
||||||
|
|
||||||
result = detect_and_build_geometry(df, master_polygons=None)
|
res = process_data(df, ext)
|
||||||
|
|
||||||
if not hasattr(result, "geometry") or result.geometry.isna().all():
|
|
||||||
result = attach_polygon_geometry_auto(result)
|
|
||||||
|
|
||||||
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
|
|
||||||
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
|
|
||||||
if not result.empty else "None"
|
|
||||||
|
|
||||||
null_geom = result.geometry.isna().sum()
|
|
||||||
print(f"[INFO] Tipe Geometry: {geom_type}")
|
|
||||||
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
|
|
||||||
else:
|
|
||||||
response = {
|
|
||||||
"message": "Tidak menemukan tabel yang relevan.",
|
|
||||||
"file_type": ext,
|
|
||||||
"rows": 0,
|
|
||||||
"columns": 0,
|
|
||||||
"geometry_valid": 0,
|
|
||||||
"geometry_empty": 0,
|
|
||||||
"geometry_valid_percent": 0,
|
|
||||||
"warnings": [],
|
|
||||||
"warning_examples": [],
|
|
||||||
"preview": []
|
|
||||||
}
|
|
||||||
|
|
||||||
return JSONResponse(content=response)
|
|
||||||
|
|
||||||
tmp_path.unlink(missing_ok=True)
|
tmp_path.unlink(missing_ok=True)
|
||||||
|
|
||||||
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
|
return JSONResponse(content=res)
|
||||||
|
|
||||||
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
|
|
||||||
result['geometry'] = result['geometry'].apply(
|
|
||||||
lambda g: g.wkt if g is not None else None
|
|
||||||
)
|
|
||||||
|
|
||||||
empty_count = result['geometry'].apply(is_geom_empty).sum()
|
|
||||||
valid_count = len(result) - empty_count
|
|
||||||
match_percentage = (valid_count / len(result)) * 100
|
|
||||||
|
|
||||||
warnings = []
|
|
||||||
if empty_count > 0:
|
|
||||||
warnings.append(
|
|
||||||
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
|
|
||||||
f"({100 - match_percentage:.2f}% data gagal cocok)."
|
|
||||||
)
|
|
||||||
|
|
||||||
if empty_count > 0:
|
|
||||||
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
|
|
||||||
warning_examples = examples.to_dict(orient="records")
|
|
||||||
else:
|
|
||||||
warning_examples = []
|
|
||||||
|
|
||||||
preview_data = result.to_dict(orient="records")
|
|
||||||
|
|
||||||
preview_safe = [
|
|
||||||
{k: safe_json(v) for k, v in row.items()} for row in preview_data
|
|
||||||
]
|
|
||||||
|
|
||||||
warning_safe = [
|
|
||||||
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
|
|
||||||
]
|
|
||||||
|
|
||||||
response = {
|
|
||||||
"message": "File berhasil dibaca dan dianalisis.",
|
|
||||||
"rows": int(len(result)),
|
|
||||||
"columns": list(map(str, result.columns)),
|
|
||||||
"geometry_valid": int(valid_count),
|
|
||||||
"geometry_empty": int(empty_count),
|
|
||||||
"geometry_valid_percent": float(round(match_percentage, 2)),
|
|
||||||
"warnings": warnings,
|
|
||||||
"warning_examples": warning_safe,
|
|
||||||
"preview": preview_safe
|
|
||||||
}
|
|
||||||
|
|
||||||
return JSONResponse(content=response)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[ERROR] {e}")
|
print(f"[ERROR] {e}")
|
||||||
|
|
@ -272,9 +302,6 @@ async def upload_file(file: UploadFile = File(...)):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PdfRequest(BaseModel):
|
class PdfRequest(BaseModel):
|
||||||
title: str
|
title: str
|
||||||
columns: List[str]
|
columns: List[str]
|
||||||
|
|
@ -287,84 +314,9 @@ async def upload_file(payload: PdfRequest):
|
||||||
if df is None or (hasattr(df, "empty") and df.empty):
|
if df is None or (hasattr(df, "empty") and df.empty):
|
||||||
return JSONResponse({"error": "No valid table detected"}, status_code=400)
|
return JSONResponse({"error": "No valid table detected"}, status_code=400)
|
||||||
|
|
||||||
result = detect_and_build_geometry(df, master_polygons=None)
|
res = process_data(df, '.pdf')
|
||||||
|
|
||||||
if not hasattr(result, "geometry") or result.geometry.isna().all():
|
return JSONResponse(content=res)
|
||||||
print("[INFO] Mencoba menambahkan geometry (MultiPolygon) berdasarkan nama wilayah...")
|
|
||||||
result = attach_polygon_geometry_auto(result)
|
|
||||||
|
|
||||||
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
|
|
||||||
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
|
|
||||||
if not result.empty else "None"
|
|
||||||
|
|
||||||
null_geom = result.geometry.isna().sum()
|
|
||||||
print(f"[INFO] Tipe Geometry: {geom_type}")
|
|
||||||
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
|
|
||||||
else:
|
|
||||||
print("[WARN] Object bukan GeoDataFrame atau tidak punya kolom geometry.")
|
|
||||||
print(f"[DEBUG] Kolom saat ini: {list(result.columns)}")
|
|
||||||
response = {
|
|
||||||
"message": "Tidak menemukan tabel yang relevan.",
|
|
||||||
"file_type": ".pdf",
|
|
||||||
"rows": 0,
|
|
||||||
"columns": 0,
|
|
||||||
"geometry_valid": 0,
|
|
||||||
"geometry_empty": 0,
|
|
||||||
"geometry_valid_percent": 0,
|
|
||||||
"warnings": [],
|
|
||||||
"warning_examples": [],
|
|
||||||
"preview": []
|
|
||||||
}
|
|
||||||
|
|
||||||
return JSONResponse(content=response)
|
|
||||||
|
|
||||||
result = result.replace([pd.NA, float('inf'), float('-inf')], None)
|
|
||||||
if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
|
|
||||||
result['geometry'] = result['geometry'].apply(
|
|
||||||
lambda g: g.wkt if g is not None else None
|
|
||||||
)
|
|
||||||
|
|
||||||
empty_count = result['geometry'].apply(is_geom_empty).sum()
|
|
||||||
valid_count = len(result) - empty_count
|
|
||||||
match_percentage = (valid_count / len(result)) * 100
|
|
||||||
|
|
||||||
warnings = []
|
|
||||||
if empty_count > 0:
|
|
||||||
warnings.append(
|
|
||||||
f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
|
|
||||||
f"({100 - match_percentage:.2f}% data gagal cocok)."
|
|
||||||
)
|
|
||||||
|
|
||||||
if empty_count > 0:
|
|
||||||
examples = result[result['geometry'].apply(is_geom_empty)].head(500)
|
|
||||||
warning_examples = examples.to_dict(orient="records")
|
|
||||||
else:
|
|
||||||
warning_examples = []
|
|
||||||
|
|
||||||
# preview_data = result.head(5).to_dict(orient="records")
|
|
||||||
preview_data = result.to_dict(orient="records")
|
|
||||||
|
|
||||||
preview_safe = [
|
|
||||||
{k: safe_json(v) for k, v in row.items()} for row in preview_data
|
|
||||||
]
|
|
||||||
|
|
||||||
warning_safe = [
|
|
||||||
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
|
|
||||||
]
|
|
||||||
|
|
||||||
response = {
|
|
||||||
"message": "File berhasil dibaca dan dianalisis.",
|
|
||||||
"rows": int(len(result)),
|
|
||||||
"columns": list(map(str, result.columns)),
|
|
||||||
"geometry_valid": int(valid_count),
|
|
||||||
"geometry_empty": int(empty_count),
|
|
||||||
"geometry_valid_percent": float(round(match_percentage, 2)),
|
|
||||||
"warnings": warnings,
|
|
||||||
"warning_examples": warning_safe,
|
|
||||||
"preview": preview_safe
|
|
||||||
}
|
|
||||||
|
|
||||||
return JSONResponse(content=response)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[ERROR] {e}")
|
print(f"[ERROR] {e}")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user