add ai generate suggestion

2025-12-22 15:19:20 +07:00 · 2025-12-22 15:19:20 +07:00 · e8c50aa460
commit e8c50aa460
parent b4cd6a8a5d
2 changed files with 278 additions and 122 deletions
--- a/services/upload_file/ai_generate.py
+++ b/services/upload_file/ai_generate.py
@ -0,0 +1,49 @@
 import requests
 from typing import Dict, Any
 from core.config import GEN_AI_URL
 URL = GEN_AI_URL
 def send_metadata(payload: Dict[str, Any]) -> Dict[str, Any]:
    headers = {
        "Content-Type": "application/json",
        "API_KEY": "testsatupeta"
    }
    try:
        response = requests.post(
            f"{URL}",
            json=payload,
            headers=headers,
        )
        # response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        return {
            "success": False,
            "error": str(e)
        }
 if __name__ == "__main__":
    # Contoh payload
    payload = {
        "nama_file_peta": "peta bencana.pdf",
        "nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD)",
        "tipe_data_spasial": "Multipolygon",
        "struktur_atribut_data": {},
        "metadata": {
            "judul": "",
            "abstrak": "",
            "tujuan": "",
            "keyword": [],
            "kategori": [],
            "kategori_mapset": ""
        }
    }
    result = send_metadata(payload)
    print(result)
--- a/services/upload_file/upload.py
+++ b/services/upload_file/upload.py
@ -5,10 +5,15 @@ import geopandas as gpd
 import numpy as np
 import re
 import zipfile
 import tempfile
 import asyncio
 from pyproj import CRS
 from shapely.geometry.base import BaseGeometry
 from shapely.geometry import base as shapely_base
 from fastapi import File, Form, UploadFile, HTTPException
 from api.routers.datasets_router import cleansing_data
 from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES
 from services.upload_file.ai_generate import send_metadata
 from services.upload_file.readers.reader_csv import read_csv
 from services.upload_file.readers.reader_shp import read_shp
 from services.upload_file.readers.reader_gdb import read_gdb
@ -16,6 +21,7 @@ from services.upload_file.readers.reader_mpk import read_mpk
 from services.upload_file.readers.reader_pdf import convert_df, read_pdf
 from services.upload_file.utils.geometry_detector import detect_and_build_geometry
 from services.upload_file.utils.geometry_detector import attach_polygon_geometry_auto
 from services.upload_file.upload_ws import report_progress
 from database.connection import engine, sync_engine
 from database.models import Base
 from pydantic import BaseModel
@ -68,17 +74,172 @@ def detect_zip_type(zip_path: str) -> str:
    return "unknown"
-def process_data(df: pd.DataFrame, ext: str):
+# def detect_zip_type(zip_path: str) -> str:
 #     with zipfile.ZipFile(zip_path, "r") as zip_ref:
 #         files = zip_ref.namelist()
 #      # -------------------------------------------------------------
 #     # 1) DETECT FileGDB
 #     # -------------------------------------------------------------
 #     is_gdb = (
 #         any(".gdb/" in f.lower() for f in files)
 #         or any(f.lower().endswith(ext) for ext in 
 #                [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files)
 #     )
 #     if is_gdb:
 #         print("\n[INFO] ZIP terdeteksi berisi FileGDB.")
 #         with tempfile.TemporaryDirectory() as temp_dir:
 #             # extract ZIP
 #             with zipfile.ZipFile(zip_path, "r") as zip_ref:
 #                 zip_ref.extractall(temp_dir)
 #             # find folder *.gdb
 #             gdb_path = None
 #             for root, dirs, _ in os.walk(temp_dir):
 #                 for d in dirs:
 #                     if d.lower().endswith(".gdb"):
 #                         gdb_path = os.path.join(root, d)
 #                         break
 #             if not gdb_path:
 #                 print("[ERROR] Folder .gdb tidak ditemukan.")
 #                 return "gdb"
 #             print(f"[INFO] GDB Path: {gdb_path}")
 #             # Cari seluruh file .gdbtable
 #             table_files = [
 #                 os.path.join(gdb_path, f)
 #                 for f in os.listdir(gdb_path)
 #                 if f.lower().endswith(".gdbtable")
 #             ]
 #             if not table_files:
 #                 print("[ERROR] Tidak ada file .gdbtable ditemukan.")
 #                 return "gdb"
 #             # Scan semua table file untuk mencari SpatialReference
 #             found_crs = False
 #             for table_file in table_files:
 #                 try:
 #                     with open(table_file, "rb") as f:
 #                         raw = f.read(15000)  # baca awal file, cukup untuk header JSON
 #                     text = raw.decode("utf-8", errors="ignore")
 #                     start = text.find("{")
 #                     end = text.rfind("}") + 1
 #                     if start == -1 or end == -1:
 #                         continue
 #                     json_str = text[start:end]
 #                     meta = json.loads(json_str)
 #                     spatial_ref = meta.get("SpatialReference")
 #                     if not spatial_ref:
 #                         continue
 #                     wkt = spatial_ref.get("WKT")
 #                     if not wkt:
 #                         continue
 #                     print(f"[FOUND] CRS metadata pada: {os.path.basename(table_file)}")
 #                     print(f"[CRS WKT] {wkt[:200]}...")
 #                     # Convert to EPSG
 #                     try:
 #                         epsg = CRS.from_wkt(wkt).to_epsg()
 #                         print(f"[EPSG] {epsg}")
 #                     except:
 #                         print("[EPSG] Tidak ditemukan EPSG.")
 #                     found_crs = True
 #                     break
 #                 except Exception:
 #                     continue
 #             if not found_crs:
 #                 print("[WARNING] Tidak ditemukan CRS di file .gdbtable manapun.")
 #         return "gdb"
 #     # -----------------------------------------------------
 #     # 2. DETEKSI SHP
 #     # -----------------------------------------------------
 #     if any(f.lower().endswith(".shp") for f in files):
 #         print("\n[INFO] ZIP terdeteksi berisi SHP.")
 #         # cari file .prj
 #         prj_files = [f for f in files if f.lower().endswith(".prj")]
 #         if not prj_files:
 #             print("[WARNING] Tidak ada file .prj → CRS tidak diketahui.")
 #             return "shp"
 #         with zipfile.ZipFile(zip_path, "r") as zip_ref:
 #             with tempfile.TemporaryDirectory() as temp_dir:
 #                 prj_path = os.path.join(temp_dir, os.path.basename(prj_files[0]))
 #                 zip_ref.extract(prj_files[0], temp_dir)
 #                 # baca isi prj
 #                 with open(prj_path, "r") as f:
 #                     prj_text = f.read()
 #                 try:
 #                     crs = CRS.from_wkt(prj_text)
 #                     print(f"[CRS WKT] {crs.to_wkt()[:200]}...")
 #                     epsg = crs.to_epsg()
 #                     if epsg:
 #                         print(f"[EPSG] {epsg}")
 #                     else:
 #                         print("[EPSG] Tidak ditemukan dalam database EPSG.")
 #                 except Exception as e:
 #                     print("[ERROR] Gagal membaca CRS dari file PRJ:", e)
 #         return "shp"
 #     # -----------------------------------------------------
 #     # 3. UNKNOWN
 #     # -----------------------------------------------------
 #     return "unknown"
 def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
    result = detect_and_build_geometry(df, master_polygons=None)
    if not hasattr(result, "geometry") or result.geometry.isna().all():
        result = attach_polygon_geometry_auto(result)
-    if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
+    # if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
-        geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
+    #     geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
-            if not result.empty else "None"
+    #         if not result.empty else "None"
    #     null_geom = result.geometry.isna().sum()
    def normalize_geom_type(geom_type):
        if geom_type.startswith("Multi"):
            return geom_type.replace("Multi", "")
        return geom_type
    if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
        geom_types = (
            result.geometry
            .dropna()
            .geom_type
            .apply(normalize_geom_type)
            .unique()
        )
        geom_type = geom_types[0] if len(geom_types) > 0 else "None"
        null_geom = result.geometry.isna().sum()
        print(f"[INFO] Tipe Geometry: {geom_type}")
        print(f"[INFO] Jumlah geometry kosong: {null_geom}")
    else:
@ -131,17 +292,39 @@ def process_data(df: pd.DataFrame, ext: str):
        {k: safe_json(v) for k, v in row.items()} for row in warning_examples
    ]
    ai_context = {
        "nama_file_peta": filename,
        "nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim",
        "tipe_data_spasial": geom_type,
        "deskripsi_singkat": fileDesc,
        "struktur_atribut_data": {},
        # "metadata": {
        #     "judul": "",
        #     "abstrak": "",
        #     "tujuan": "",
        #     "keyword": [],
        #     "kategori": [],
        #     "kategori_mapset": ""
        # }
    }
    ai_suggest = send_metadata(ai_context)
    # ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
    print(ai_suggest)
    response = {
        "message": "File berhasil dibaca dan dianalisis.",
        "file_name": filename,
        "file_type": ext,
        "rows": int(len(result)),
        "columns": list(map(str, result.columns)),
        "geometry_valid": int(valid_count),
        "geometry_empty": int(empty_count),
        "geometry_valid_percent": float(round(match_percentage, 2)),
        "geometry_type": geom_type,
        "warnings": warnings,
-        "warning_examples": warning_safe,
+        "warning_rows": warning_safe,
-        "preview": preview_safe
+        "preview": preview_safe,
        "metadata_suggest": ai_suggest
    }
    # return successRes(content=response)
@ -156,7 +339,7 @@ def process_data(df: pd.DataFrame, ext: str):
-async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form("")):
+async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form(""), fileDesc: Optional[str] = Form("")):
    fname = file.filename
    ext = os.path.splitext(fname)[1].lower()
    contents = await file.read()
@ -216,7 +399,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
        if df is None or (hasattr(df, "empty") and df.empty):
            return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
-        res = process_data(df, ext)
+        res = process_data(df, ext, fname, fileDesc)
        tmp_path.unlink(missing_ok=True)
@ -241,6 +424,8 @@ class PdfRequest(BaseModel):
    title: str
    columns: List[str]
    rows: List[List]
    fileName: str
    fileDesc: str
 async def handle_process_pdf(payload: PdfRequest):
    try:
@ -248,7 +433,7 @@ async def handle_process_pdf(payload: PdfRequest):
        if df is None or (hasattr(df, "empty") and df.empty):
            return errorRes(message="Tidak ada tabel")
-        res = process_data(df, '.pdf')        
+        res = process_data(df, '.pdf', payload.fileName, payload.fileDesc)        
        return successRes(data=res)
    except Exception as e:
@ -271,6 +456,7 @@ class UploadRequest(BaseModel):
    rows: List[dict]
    columns: List[str]
    author: Dict[str, Any]
    style: str
 # generate _2 if exist
 async def generate_unique_table_name(base_name: str):
@ -300,125 +486,32 @@ def str_to_date(raw_date: str):
            print("[WARNING] Tidak bisa parse dateCreated:", e)
            return None
 import asyncio
 # async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
 #     try:
 #         table_name = await generate_unique_table_name(payload.title)
 #         df = pd.DataFrame(payload.rows)
 #         df.columns = [col.upper() for col in df.columns]
 #         if "GEOMETRY" not in df.columns:
 #             raise HTTPException(400, "Kolom GEOMETRY tidak ditemukan")
-#         df["GEOMETRY"] = df["GEOMETRY"].apply(
+def generate_job_id(user_id: str) -> str:
-#             lambda g: wkt.loads(g)
+    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-#             if isinstance(g, str) else None
+    return f"{user_id}_{timestamp}"
 #         )
 #         # ======================================================
 #         # RENAME kolom GEOMETRY → geom   (WAJIB)
 #         # ======================================================
 #         df = df.rename(columns={"GEOMETRY": "geom"})
 #         gdf = gpd.GeoDataFrame(df, geometry="geom", crs="EPSG:4326")
-#         # --- Wajib: gunakan engine sync TANPA asyncpg
+def save_xml_to_sld(xml_string, filename):
-#         loop = asyncio.get_running_loop()
+    folder_path = 'style_temp'
-#         await loop.run_in_executor(
+    os.makedirs(folder_path, exist_ok=True)
 #             None,
 #             lambda: gdf.to_postgis(
 #                 table_name,
 #                 sync_engine,      
 #                 if_exists="replace",
 #                 index=False
 #             )
 #         )
-#         # === STEP 4: add ID column ===
+    file_path = os.path.join(folder_path, f"{filename}.sld")
-#         async with engine.begin() as conn:
+
-#             await conn.execute(text(
+    with open(file_path, "w", encoding="utf-8") as f:
-#                 f'ALTER TABLE "{table_name}" ADD COLUMN _ID SERIAL PRIMARY KEY;'
+        f.write(xml_string)
-#             ))
+
    return file_path
 #         # === STEP 5: save author metadata ===
 #         author = payload.author
 #         async with engine.begin() as conn:
 #             await conn.execute(text("""
 #                 INSERT INTO backend.author_metadata (
 #                     table_title,
 #                     dataset_title,
 #                     dataset_abstract,
 #                     keywords,
 #                     topic_category,
 #                     date_created,
 #                     dataset_status,
 #                     organization_name,
 #                     contact_person_name,
 #                     contact_email,
 #                     contact_phone,
 #                     geom_type,
 #                     user_id
 #                 ) VALUES (
 #                     :table_title,
 #                     :dataset_title,
 #                     :dataset_abstract,
 #                     :keywords,
 #                     :topic_category,
 #                     :date_created,
 #                     :dataset_status,
 #                     :organization_name,
 #                     :contact_person_name,
 #                     :contact_email,
 #                     :contact_phone,
 #                     :geom_type,
 #                     :user_id
 #                 )
 #             """), {
 #                 "table_title": table_name,
 #                 "dataset_title": author.get("title") or payload.title,
 #                 "dataset_abstract": author.get("abstract"),
 #                 "keywords": author.get("keywords"),
 #                 "topic_category": author.get("topicCategory"),
 #                 "date_created": str_to_date(author.get("dateCreated")),
 #                 "dataset_status": author.get("status"),
 #                 "organization_name": author.get("organization"),
 #                 "contact_person_name": author.get("contactName"),
 #                 "contact_email": author.get("contactEmail"),
 #                 "contact_phone": author.get("contactPhone"),
 #                 "geom_type": json.dumps(list(gdf.geom_type.unique())),
 #                 "user_id": user_id
 #             })
 #         # === STEP 6: log success ===
 #         await log_activity(
 #             user_id=user_id,
 #             action_type="UPLOAD",
 #             action_title=f"Upload dataset {table_name}",
 #             details={"table_name": table_name, "rows": len(gdf)}
 #         )
 #         res = {
 #             "table_name": table_name,
 #             "status": "success",
 #             "message": f"Tabel '{table_name}' berhasil dibuat.",
 #             "total_rows": len(gdf),
 #             "geometry_type": list(gdf.geom_type.unique()),
 #         }
 #         return successRes(data=res)
 #     except Exception as e:
 #         await log_activity(
 #             user_id=user_id,
 #             action_type="ERROR",
 #             action_title="Upload gagal",
 #             details={"error": str(e)}
 #         )
 #         print(f"error : {str(e)}")
 #         raise HTTPException(status_code=500, detail=str(e))
 async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
    try:
@ -479,9 +572,10 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
        if not detected_crs:
            detected_crs = "EPSG:4326"
-
+        detected_crs = 'EPSG:4326'
        # Buat GeoDataFrame
        gdf = gpd.GeoDataFrame(df, geometry="geom", crs=detected_crs)
        row_count = len(gdf)
        # =====================================================================
        # 6. VERIFY CRS (SRID) VALID di PROJ / PostGIS
@ -533,7 +627,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
                    contact_email,
                    contact_phone,
                    geom_type,
-                    user_id
+                    user_id,
                    process,
                    geometry_count
                ) VALUES (
                    :table_title,
                    :dataset_title,
@ -547,14 +643,17 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
                    :contact_email,
                    :contact_phone,
                    :geom_type,
-                    :user_id
+                    :user_id,
                    :process,
                    :geometry_count
                )
            """), {
                "table_title": table_name,
-                "dataset_title": author.get("title") or payload.title,
+                "dataset_title": payload.title,
                "dataset_abstract": author.get("abstract"),
                "keywords": author.get("keywords"),
-                "topic_category": author.get("topicCategory"),
+                # "topic_category": author.get("topicCategory"),
                "topic_category": ", ".join(author.get("topicCategory")),
                "date_created": str_to_date(author.get("dateCreated")),
                "dataset_status": author.get("status"),
                "organization_name": author.get("organization"),
@ -562,7 +661,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
                "contact_email": author.get("contactEmail"),
                "contact_phone": author.get("contactPhone"),
                "geom_type": json.dumps(unified_geom_type),
-                "user_id": user_id
+                "user_id": user_id,
                "process": 'CLEANSING',
                "geometry_count": row_count
            })
@ -576,7 +677,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
            details={"table_name": table_name, "rows": len(gdf)}
        )
        job_id = generate_job_id(str(user_id))
        result = {
            "job_id": job_id,
            "table_name": table_name,
            "status": "success",
            "message": f"Tabel '{table_name}' berhasil dibuat.",
@ -584,6 +687,10 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
            "geometry_type": unified_geom_type,
            "crs": detected_crs,
        }
        save_xml_to_sld(payload.style, job_id)
        await report_progress(job_id, "upload", 20, "Upload selesai")
        cleansing_data(table_name, job_id)
        return successRes(data=result)
    except Exception as e: