From e8c50aa4602a92eac4baf3b19ff726bd5e28314c Mon Sep 17 00:00:00 2001 From: DmsAnhr Date: Mon, 22 Dec 2025 15:19:20 +0700 Subject: [PATCH] add ai generate suggestion --- services/upload_file/ai_generate.py | 49 ++++ services/upload_file/upload.py | 351 ++++++++++++++++++---------- 2 files changed, 278 insertions(+), 122 deletions(-) create mode 100644 services/upload_file/ai_generate.py diff --git a/services/upload_file/ai_generate.py b/services/upload_file/ai_generate.py new file mode 100644 index 0000000..2467bfa --- /dev/null +++ b/services/upload_file/ai_generate.py @@ -0,0 +1,49 @@ +import requests +from typing import Dict, Any +from core.config import GEN_AI_URL + +URL = GEN_AI_URL + + +def send_metadata(payload: Dict[str, Any]) -> Dict[str, Any]: + headers = { + "Content-Type": "application/json", + "API_KEY": "testsatupeta" + } + + try: + response = requests.post( + f"{URL}", + json=payload, + headers=headers, + ) + + # response.raise_for_status() + return response.json() + + except requests.exceptions.RequestException as e: + return { + "success": False, + "error": str(e) + } + + +if __name__ == "__main__": + # Contoh payload + payload = { + "nama_file_peta": "peta bencana.pdf", + "nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD)", + "tipe_data_spasial": "Multipolygon", + "struktur_atribut_data": {}, + "metadata": { + "judul": "", + "abstrak": "", + "tujuan": "", + "keyword": [], + "kategori": [], + "kategori_mapset": "" + } + } + + result = send_metadata(payload) + print(result) diff --git a/services/upload_file/upload.py b/services/upload_file/upload.py index ccc5089..f781327 100644 --- a/services/upload_file/upload.py +++ b/services/upload_file/upload.py @@ -5,10 +5,15 @@ import geopandas as gpd import numpy as np import re import zipfile +import tempfile +import asyncio +from pyproj import CRS from shapely.geometry.base import BaseGeometry from shapely.geometry import base as shapely_base from fastapi import File, Form, UploadFile, HTTPException +from api.routers.datasets_router import cleansing_data from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES +from services.upload_file.ai_generate import send_metadata from services.upload_file.readers.reader_csv import read_csv from services.upload_file.readers.reader_shp import read_shp from services.upload_file.readers.reader_gdb import read_gdb @@ -16,6 +21,7 @@ from services.upload_file.readers.reader_mpk import read_mpk from services.upload_file.readers.reader_pdf import convert_df, read_pdf from services.upload_file.utils.geometry_detector import detect_and_build_geometry from services.upload_file.utils.geometry_detector import attach_polygon_geometry_auto +from services.upload_file.upload_ws import report_progress from database.connection import engine, sync_engine from database.models import Base from pydantic import BaseModel @@ -68,17 +74,172 @@ def detect_zip_type(zip_path: str) -> str: return "unknown" -def process_data(df: pd.DataFrame, ext: str): +# def detect_zip_type(zip_path: str) -> str: +# with zipfile.ZipFile(zip_path, "r") as zip_ref: +# files = zip_ref.namelist() + +# # ------------------------------------------------------------- +# # 1) DETECT FileGDB +# # ------------------------------------------------------------- +# is_gdb = ( +# any(".gdb/" in f.lower() for f in files) +# or any(f.lower().endswith(ext) for ext in +# [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files) +# ) + +# if is_gdb: +# print("\n[INFO] ZIP terdeteksi berisi FileGDB.") + +# with tempfile.TemporaryDirectory() as temp_dir: +# # extract ZIP +# with zipfile.ZipFile(zip_path, "r") as zip_ref: +# zip_ref.extractall(temp_dir) + +# # find folder *.gdb +# gdb_path = None +# for root, dirs, _ in os.walk(temp_dir): +# for d in dirs: +# if d.lower().endswith(".gdb"): +# gdb_path = os.path.join(root, d) +# break + +# if not gdb_path: +# print("[ERROR] Folder .gdb tidak ditemukan.") +# return "gdb" + +# print(f"[INFO] GDB Path: {gdb_path}") + +# # Cari seluruh file .gdbtable +# table_files = [ +# os.path.join(gdb_path, f) +# for f in os.listdir(gdb_path) +# if f.lower().endswith(".gdbtable") +# ] + +# if not table_files: +# print("[ERROR] Tidak ada file .gdbtable ditemukan.") +# return "gdb" + +# # Scan semua table file untuk mencari SpatialReference +# found_crs = False + +# for table_file in table_files: +# try: +# with open(table_file, "rb") as f: +# raw = f.read(15000) # baca awal file, cukup untuk header JSON + +# text = raw.decode("utf-8", errors="ignore") + +# start = text.find("{") +# end = text.rfind("}") + 1 + +# if start == -1 or end == -1: +# continue + +# json_str = text[start:end] +# meta = json.loads(json_str) + +# spatial_ref = meta.get("SpatialReference") +# if not spatial_ref: +# continue + +# wkt = spatial_ref.get("WKT") +# if not wkt: +# continue + +# print(f"[FOUND] CRS metadata pada: {os.path.basename(table_file)}") +# print(f"[CRS WKT] {wkt[:200]}...") + +# # Convert to EPSG +# try: +# epsg = CRS.from_wkt(wkt).to_epsg() +# print(f"[EPSG] {epsg}") +# except: +# print("[EPSG] Tidak ditemukan EPSG.") + +# found_crs = True +# break + +# except Exception: +# continue + +# if not found_crs: +# print("[WARNING] Tidak ditemukan CRS di file .gdbtable manapun.") + +# return "gdb" + +# # ----------------------------------------------------- +# # 2. DETEKSI SHP +# # ----------------------------------------------------- +# if any(f.lower().endswith(".shp") for f in files): +# print("\n[INFO] ZIP terdeteksi berisi SHP.") + +# # cari file .prj +# prj_files = [f for f in files if f.lower().endswith(".prj")] + +# if not prj_files: +# print("[WARNING] Tidak ada file .prj → CRS tidak diketahui.") +# return "shp" + +# with zipfile.ZipFile(zip_path, "r") as zip_ref: +# with tempfile.TemporaryDirectory() as temp_dir: +# prj_path = os.path.join(temp_dir, os.path.basename(prj_files[0])) +# zip_ref.extract(prj_files[0], temp_dir) + +# # baca isi prj +# with open(prj_path, "r") as f: +# prj_text = f.read() + +# try: +# crs = CRS.from_wkt(prj_text) +# print(f"[CRS WKT] {crs.to_wkt()[:200]}...") + +# epsg = crs.to_epsg() +# if epsg: +# print(f"[EPSG] {epsg}") +# else: +# print("[EPSG] Tidak ditemukan dalam database EPSG.") + +# except Exception as e: +# print("[ERROR] Gagal membaca CRS dari file PRJ:", e) + +# return "shp" + +# # ----------------------------------------------------- +# # 3. UNKNOWN +# # ----------------------------------------------------- +# return "unknown" + + +def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str): result = detect_and_build_geometry(df, master_polygons=None) if not hasattr(result, "geometry") or result.geometry.isna().all(): result = attach_polygon_geometry_auto(result) - if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns: - geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \ - if not result.empty else "None" + # if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns: + # geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \ + # if not result.empty else "None" + # null_geom = result.geometry.isna().sum() + + def normalize_geom_type(geom_type): + if geom_type.startswith("Multi"): + return geom_type.replace("Multi", "") + return geom_type + + if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns: + geom_types = ( + result.geometry + .dropna() + .geom_type + .apply(normalize_geom_type) + .unique() + ) + + geom_type = geom_types[0] if len(geom_types) > 0 else "None" null_geom = result.geometry.isna().sum() + print(f"[INFO] Tipe Geometry: {geom_type}") print(f"[INFO] Jumlah geometry kosong: {null_geom}") else: @@ -130,20 +291,42 @@ def process_data(df: pd.DataFrame, ext: str): warning_safe = [ {k: safe_json(v) for k, v in row.items()} for row in warning_examples ] + + ai_context = { + "nama_file_peta": filename, + "nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim", + "tipe_data_spasial": geom_type, + "deskripsi_singkat": fileDesc, + "struktur_atribut_data": {}, + # "metadata": { + # "judul": "", + # "abstrak": "", + # "tujuan": "", + # "keyword": [], + # "kategori": [], + # "kategori_mapset": "" + # } + } + ai_suggest = send_metadata(ai_context) + # ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'} + print(ai_suggest) response = { "message": "File berhasil dibaca dan dianalisis.", + "file_name": filename, "file_type": ext, "rows": int(len(result)), "columns": list(map(str, result.columns)), "geometry_valid": int(valid_count), "geometry_empty": int(empty_count), "geometry_valid_percent": float(round(match_percentage, 2)), + "geometry_type": geom_type, "warnings": warnings, - "warning_examples": warning_safe, - "preview": preview_safe + "warning_rows": warning_safe, + "preview": preview_safe, + "metadata_suggest": ai_suggest } - + # return successRes(content=response) return response @@ -156,7 +339,7 @@ def process_data(df: pd.DataFrame, ext: str): -async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form("")): +async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form(""), fileDesc: Optional[str] = Form("")): fname = file.filename ext = os.path.splitext(fname)[1].lower() contents = await file.read() @@ -216,7 +399,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = if df is None or (hasattr(df, "empty") and df.empty): return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid") - res = process_data(df, ext) + res = process_data(df, ext, fname, fileDesc) tmp_path.unlink(missing_ok=True) @@ -241,6 +424,8 @@ class PdfRequest(BaseModel): title: str columns: List[str] rows: List[List] + fileName: str + fileDesc: str async def handle_process_pdf(payload: PdfRequest): try: @@ -248,7 +433,7 @@ async def handle_process_pdf(payload: PdfRequest): if df is None or (hasattr(df, "empty") and df.empty): return errorRes(message="Tidak ada tabel") - res = process_data(df, '.pdf') + res = process_data(df, '.pdf', payload.fileName, payload.fileDesc) return successRes(data=res) except Exception as e: @@ -271,6 +456,7 @@ class UploadRequest(BaseModel): rows: List[dict] columns: List[str] author: Dict[str, Any] + style: str # generate _2 if exist async def generate_unique_table_name(base_name: str): @@ -300,125 +486,32 @@ def str_to_date(raw_date: str): print("[WARNING] Tidak bisa parse dateCreated:", e) return None -import asyncio -# async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): -# try: -# table_name = await generate_unique_table_name(payload.title) -# df = pd.DataFrame(payload.rows) -# df.columns = [col.upper() for col in df.columns] -# if "GEOMETRY" not in df.columns: -# raise HTTPException(400, "Kolom GEOMETRY tidak ditemukan") -# df["GEOMETRY"] = df["GEOMETRY"].apply( -# lambda g: wkt.loads(g) -# if isinstance(g, str) else None -# ) +def generate_job_id(user_id: str) -> str: + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + return f"{user_id}_{timestamp}" -# # ====================================================== -# # RENAME kolom GEOMETRY → geom (WAJIB) -# # ====================================================== -# df = df.rename(columns={"GEOMETRY": "geom"}) -# gdf = gpd.GeoDataFrame(df, geometry="geom", crs="EPSG:4326") -# # --- Wajib: gunakan engine sync TANPA asyncpg -# loop = asyncio.get_running_loop() -# await loop.run_in_executor( -# None, -# lambda: gdf.to_postgis( -# table_name, -# sync_engine, -# if_exists="replace", -# index=False -# ) -# ) +def save_xml_to_sld(xml_string, filename): + folder_path = 'style_temp' + os.makedirs(folder_path, exist_ok=True) + + file_path = os.path.join(folder_path, f"{filename}.sld") + + with open(file_path, "w", encoding="utf-8") as f: + f.write(xml_string) + + return file_path -# # === STEP 4: add ID column === -# async with engine.begin() as conn: -# await conn.execute(text( -# f'ALTER TABLE "{table_name}" ADD COLUMN _ID SERIAL PRIMARY KEY;' -# )) -# # === STEP 5: save author metadata === -# author = payload.author -# async with engine.begin() as conn: -# await conn.execute(text(""" -# INSERT INTO backend.author_metadata ( -# table_title, -# dataset_title, -# dataset_abstract, -# keywords, -# topic_category, -# date_created, -# dataset_status, -# organization_name, -# contact_person_name, -# contact_email, -# contact_phone, -# geom_type, -# user_id -# ) VALUES ( -# :table_title, -# :dataset_title, -# :dataset_abstract, -# :keywords, -# :topic_category, -# :date_created, -# :dataset_status, -# :organization_name, -# :contact_person_name, -# :contact_email, -# :contact_phone, -# :geom_type, -# :user_id -# ) -# """), { -# "table_title": table_name, -# "dataset_title": author.get("title") or payload.title, -# "dataset_abstract": author.get("abstract"), -# "keywords": author.get("keywords"), -# "topic_category": author.get("topicCategory"), -# "date_created": str_to_date(author.get("dateCreated")), -# "dataset_status": author.get("status"), -# "organization_name": author.get("organization"), -# "contact_person_name": author.get("contactName"), -# "contact_email": author.get("contactEmail"), -# "contact_phone": author.get("contactPhone"), -# "geom_type": json.dumps(list(gdf.geom_type.unique())), -# "user_id": user_id -# }) -# # === STEP 6: log success === -# await log_activity( -# user_id=user_id, -# action_type="UPLOAD", -# action_title=f"Upload dataset {table_name}", -# details={"table_name": table_name, "rows": len(gdf)} -# ) -# res = { -# "table_name": table_name, -# "status": "success", -# "message": f"Tabel '{table_name}' berhasil dibuat.", -# "total_rows": len(gdf), -# "geometry_type": list(gdf.geom_type.unique()), -# } - -# return successRes(data=res) -# except Exception as e: -# await log_activity( -# user_id=user_id, -# action_type="ERROR", -# action_title="Upload gagal", -# details={"error": str(e)} -# ) -# print(f"error : {str(e)}") -# raise HTTPException(status_code=500, detail=str(e)) async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): try: @@ -479,9 +572,10 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): if not detected_crs: detected_crs = "EPSG:4326" - + detected_crs = 'EPSG:4326' # Buat GeoDataFrame gdf = gpd.GeoDataFrame(df, geometry="geom", crs=detected_crs) + row_count = len(gdf) # ===================================================================== # 6. VERIFY CRS (SRID) VALID di PROJ / PostGIS @@ -533,7 +627,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): contact_email, contact_phone, geom_type, - user_id + user_id, + process, + geometry_count ) VALUES ( :table_title, :dataset_title, @@ -547,14 +643,17 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): :contact_email, :contact_phone, :geom_type, - :user_id + :user_id, + :process, + :geometry_count ) """), { "table_title": table_name, - "dataset_title": author.get("title") or payload.title, + "dataset_title": payload.title, "dataset_abstract": author.get("abstract"), "keywords": author.get("keywords"), - "topic_category": author.get("topicCategory"), + # "topic_category": author.get("topicCategory"), + "topic_category": ", ".join(author.get("topicCategory")), "date_created": str_to_date(author.get("dateCreated")), "dataset_status": author.get("status"), "organization_name": author.get("organization"), @@ -562,7 +661,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): "contact_email": author.get("contactEmail"), "contact_phone": author.get("contactPhone"), "geom_type": json.dumps(unified_geom_type), - "user_id": user_id + "user_id": user_id, + "process": 'CLEANSING', + "geometry_count": row_count }) @@ -576,7 +677,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): details={"table_name": table_name, "rows": len(gdf)} ) + job_id = generate_job_id(str(user_id)) result = { + "job_id": job_id, "table_name": table_name, "status": "success", "message": f"Tabel '{table_name}' berhasil dibuat.", @@ -584,6 +687,10 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): "geometry_type": unified_geom_type, "crs": detected_crs, } + save_xml_to_sld(payload.style, job_id) + + await report_progress(job_id, "upload", 20, "Upload selesai") + cleansing_data(table_name, job_id) return successRes(data=result) except Exception as e: