add ai generate suggestion

This commit is contained in:
DmsAnhr 2025-12-22 15:19:20 +07:00
parent b4cd6a8a5d
commit e8c50aa460
2 changed files with 278 additions and 122 deletions

View File

@ -0,0 +1,49 @@
import requests
from typing import Dict, Any
from core.config import GEN_AI_URL
URL = GEN_AI_URL
def send_metadata(payload: Dict[str, Any]) -> Dict[str, Any]:
headers = {
"Content-Type": "application/json",
"API_KEY": "testsatupeta"
}
try:
response = requests.post(
f"{URL}",
json=payload,
headers=headers,
)
# response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
return {
"success": False,
"error": str(e)
}
if __name__ == "__main__":
# Contoh payload
payload = {
"nama_file_peta": "peta bencana.pdf",
"nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD)",
"tipe_data_spasial": "Multipolygon",
"struktur_atribut_data": {},
"metadata": {
"judul": "",
"abstrak": "",
"tujuan": "",
"keyword": [],
"kategori": [],
"kategori_mapset": ""
}
}
result = send_metadata(payload)
print(result)

View File

@ -5,10 +5,15 @@ import geopandas as gpd
import numpy as np
import re
import zipfile
import tempfile
import asyncio
from pyproj import CRS
from shapely.geometry.base import BaseGeometry
from shapely.geometry import base as shapely_base
from fastapi import File, Form, UploadFile, HTTPException
from api.routers.datasets_router import cleansing_data
from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES
from services.upload_file.ai_generate import send_metadata
from services.upload_file.readers.reader_csv import read_csv
from services.upload_file.readers.reader_shp import read_shp
from services.upload_file.readers.reader_gdb import read_gdb
@ -16,6 +21,7 @@ from services.upload_file.readers.reader_mpk import read_mpk
from services.upload_file.readers.reader_pdf import convert_df, read_pdf
from services.upload_file.utils.geometry_detector import detect_and_build_geometry
from services.upload_file.utils.geometry_detector import attach_polygon_geometry_auto
from services.upload_file.upload_ws import report_progress
from database.connection import engine, sync_engine
from database.models import Base
from pydantic import BaseModel
@ -68,17 +74,172 @@ def detect_zip_type(zip_path: str) -> str:
return "unknown"
def process_data(df: pd.DataFrame, ext: str):
# def detect_zip_type(zip_path: str) -> str:
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# files = zip_ref.namelist()
# # -------------------------------------------------------------
# # 1) DETECT FileGDB
# # -------------------------------------------------------------
# is_gdb = (
# any(".gdb/" in f.lower() for f in files)
# or any(f.lower().endswith(ext) for ext in
# [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files)
# )
# if is_gdb:
# print("\n[INFO] ZIP terdeteksi berisi FileGDB.")
# with tempfile.TemporaryDirectory() as temp_dir:
# # extract ZIP
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# zip_ref.extractall(temp_dir)
# # find folder *.gdb
# gdb_path = None
# for root, dirs, _ in os.walk(temp_dir):
# for d in dirs:
# if d.lower().endswith(".gdb"):
# gdb_path = os.path.join(root, d)
# break
# if not gdb_path:
# print("[ERROR] Folder .gdb tidak ditemukan.")
# return "gdb"
# print(f"[INFO] GDB Path: {gdb_path}")
# # Cari seluruh file .gdbtable
# table_files = [
# os.path.join(gdb_path, f)
# for f in os.listdir(gdb_path)
# if f.lower().endswith(".gdbtable")
# ]
# if not table_files:
# print("[ERROR] Tidak ada file .gdbtable ditemukan.")
# return "gdb"
# # Scan semua table file untuk mencari SpatialReference
# found_crs = False
# for table_file in table_files:
# try:
# with open(table_file, "rb") as f:
# raw = f.read(15000) # baca awal file, cukup untuk header JSON
# text = raw.decode("utf-8", errors="ignore")
# start = text.find("{")
# end = text.rfind("}") + 1
# if start == -1 or end == -1:
# continue
# json_str = text[start:end]
# meta = json.loads(json_str)
# spatial_ref = meta.get("SpatialReference")
# if not spatial_ref:
# continue
# wkt = spatial_ref.get("WKT")
# if not wkt:
# continue
# print(f"[FOUND] CRS metadata pada: {os.path.basename(table_file)}")
# print(f"[CRS WKT] {wkt[:200]}...")
# # Convert to EPSG
# try:
# epsg = CRS.from_wkt(wkt).to_epsg()
# print(f"[EPSG] {epsg}")
# except:
# print("[EPSG] Tidak ditemukan EPSG.")
# found_crs = True
# break
# except Exception:
# continue
# if not found_crs:
# print("[WARNING] Tidak ditemukan CRS di file .gdbtable manapun.")
# return "gdb"
# # -----------------------------------------------------
# # 2. DETEKSI SHP
# # -----------------------------------------------------
# if any(f.lower().endswith(".shp") for f in files):
# print("\n[INFO] ZIP terdeteksi berisi SHP.")
# # cari file .prj
# prj_files = [f for f in files if f.lower().endswith(".prj")]
# if not prj_files:
# print("[WARNING] Tidak ada file .prj → CRS tidak diketahui.")
# return "shp"
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# with tempfile.TemporaryDirectory() as temp_dir:
# prj_path = os.path.join(temp_dir, os.path.basename(prj_files[0]))
# zip_ref.extract(prj_files[0], temp_dir)
# # baca isi prj
# with open(prj_path, "r") as f:
# prj_text = f.read()
# try:
# crs = CRS.from_wkt(prj_text)
# print(f"[CRS WKT] {crs.to_wkt()[:200]}...")
# epsg = crs.to_epsg()
# if epsg:
# print(f"[EPSG] {epsg}")
# else:
# print("[EPSG] Tidak ditemukan dalam database EPSG.")
# except Exception as e:
# print("[ERROR] Gagal membaca CRS dari file PRJ:", e)
# return "shp"
# # -----------------------------------------------------
# # 3. UNKNOWN
# # -----------------------------------------------------
# return "unknown"
def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
result = detect_and_build_geometry(df, master_polygons=None)
if not hasattr(result, "geometry") or result.geometry.isna().all():
result = attach_polygon_geometry_auto(result)
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
if not result.empty else "None"
# if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
# geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
# if not result.empty else "None"
# null_geom = result.geometry.isna().sum()
def normalize_geom_type(geom_type):
if geom_type.startswith("Multi"):
return geom_type.replace("Multi", "")
return geom_type
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_types = (
result.geometry
.dropna()
.geom_type
.apply(normalize_geom_type)
.unique()
)
geom_type = geom_types[0] if len(geom_types) > 0 else "None"
null_geom = result.geometry.isna().sum()
print(f"[INFO] Tipe Geometry: {geom_type}")
print(f"[INFO] Jumlah geometry kosong: {null_geom}")
else:
@ -130,20 +291,42 @@ def process_data(df: pd.DataFrame, ext: str):
warning_safe = [
{k: safe_json(v) for k, v in row.items()} for row in warning_examples
]
ai_context = {
"nama_file_peta": filename,
"nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim",
"tipe_data_spasial": geom_type,
"deskripsi_singkat": fileDesc,
"struktur_atribut_data": {},
# "metadata": {
# "judul": "",
# "abstrak": "",
# "tujuan": "",
# "keyword": [],
# "kategori": [],
# "kategori_mapset": ""
# }
}
ai_suggest = send_metadata(ai_context)
# ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
print(ai_suggest)
response = {
"message": "File berhasil dibaca dan dianalisis.",
"file_name": filename,
"file_type": ext,
"rows": int(len(result)),
"columns": list(map(str, result.columns)),
"geometry_valid": int(valid_count),
"geometry_empty": int(empty_count),
"geometry_valid_percent": float(round(match_percentage, 2)),
"geometry_type": geom_type,
"warnings": warnings,
"warning_examples": warning_safe,
"preview": preview_safe
"warning_rows": warning_safe,
"preview": preview_safe,
"metadata_suggest": ai_suggest
}
# return successRes(content=response)
return response
@ -156,7 +339,7 @@ def process_data(df: pd.DataFrame, ext: str):
async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form("")):
async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form(""), fileDesc: Optional[str] = Form("")):
fname = file.filename
ext = os.path.splitext(fname)[1].lower()
contents = await file.read()
@ -216,7 +399,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
if df is None or (hasattr(df, "empty") and df.empty):
return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
res = process_data(df, ext)
res = process_data(df, ext, fname, fileDesc)
tmp_path.unlink(missing_ok=True)
@ -241,6 +424,8 @@ class PdfRequest(BaseModel):
title: str
columns: List[str]
rows: List[List]
fileName: str
fileDesc: str
async def handle_process_pdf(payload: PdfRequest):
try:
@ -248,7 +433,7 @@ async def handle_process_pdf(payload: PdfRequest):
if df is None or (hasattr(df, "empty") and df.empty):
return errorRes(message="Tidak ada tabel")
res = process_data(df, '.pdf')
res = process_data(df, '.pdf', payload.fileName, payload.fileDesc)
return successRes(data=res)
except Exception as e:
@ -271,6 +456,7 @@ class UploadRequest(BaseModel):
rows: List[dict]
columns: List[str]
author: Dict[str, Any]
style: str
# generate _2 if exist
async def generate_unique_table_name(base_name: str):
@ -300,125 +486,32 @@ def str_to_date(raw_date: str):
print("[WARNING] Tidak bisa parse dateCreated:", e)
return None
import asyncio
# async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
# try:
# table_name = await generate_unique_table_name(payload.title)
# df = pd.DataFrame(payload.rows)
# df.columns = [col.upper() for col in df.columns]
# if "GEOMETRY" not in df.columns:
# raise HTTPException(400, "Kolom GEOMETRY tidak ditemukan")
# df["GEOMETRY"] = df["GEOMETRY"].apply(
# lambda g: wkt.loads(g)
# if isinstance(g, str) else None
# )
def generate_job_id(user_id: str) -> str:
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
return f"{user_id}_{timestamp}"
# # ======================================================
# # RENAME kolom GEOMETRY → geom (WAJIB)
# # ======================================================
# df = df.rename(columns={"GEOMETRY": "geom"})
# gdf = gpd.GeoDataFrame(df, geometry="geom", crs="EPSG:4326")
# # --- Wajib: gunakan engine sync TANPA asyncpg
# loop = asyncio.get_running_loop()
# await loop.run_in_executor(
# None,
# lambda: gdf.to_postgis(
# table_name,
# sync_engine,
# if_exists="replace",
# index=False
# )
# )
def save_xml_to_sld(xml_string, filename):
folder_path = 'style_temp'
os.makedirs(folder_path, exist_ok=True)
file_path = os.path.join(folder_path, f"{filename}.sld")
with open(file_path, "w", encoding="utf-8") as f:
f.write(xml_string)
return file_path
# # === STEP 4: add ID column ===
# async with engine.begin() as conn:
# await conn.execute(text(
# f'ALTER TABLE "{table_name}" ADD COLUMN _ID SERIAL PRIMARY KEY;'
# ))
# # === STEP 5: save author metadata ===
# author = payload.author
# async with engine.begin() as conn:
# await conn.execute(text("""
# INSERT INTO backend.author_metadata (
# table_title,
# dataset_title,
# dataset_abstract,
# keywords,
# topic_category,
# date_created,
# dataset_status,
# organization_name,
# contact_person_name,
# contact_email,
# contact_phone,
# geom_type,
# user_id
# ) VALUES (
# :table_title,
# :dataset_title,
# :dataset_abstract,
# :keywords,
# :topic_category,
# :date_created,
# :dataset_status,
# :organization_name,
# :contact_person_name,
# :contact_email,
# :contact_phone,
# :geom_type,
# :user_id
# )
# """), {
# "table_title": table_name,
# "dataset_title": author.get("title") or payload.title,
# "dataset_abstract": author.get("abstract"),
# "keywords": author.get("keywords"),
# "topic_category": author.get("topicCategory"),
# "date_created": str_to_date(author.get("dateCreated")),
# "dataset_status": author.get("status"),
# "organization_name": author.get("organization"),
# "contact_person_name": author.get("contactName"),
# "contact_email": author.get("contactEmail"),
# "contact_phone": author.get("contactPhone"),
# "geom_type": json.dumps(list(gdf.geom_type.unique())),
# "user_id": user_id
# })
# # === STEP 6: log success ===
# await log_activity(
# user_id=user_id,
# action_type="UPLOAD",
# action_title=f"Upload dataset {table_name}",
# details={"table_name": table_name, "rows": len(gdf)}
# )
# res = {
# "table_name": table_name,
# "status": "success",
# "message": f"Tabel '{table_name}' berhasil dibuat.",
# "total_rows": len(gdf),
# "geometry_type": list(gdf.geom_type.unique()),
# }
# return successRes(data=res)
# except Exception as e:
# await log_activity(
# user_id=user_id,
# action_type="ERROR",
# action_title="Upload gagal",
# details={"error": str(e)}
# )
# print(f"error : {str(e)}")
# raise HTTPException(status_code=500, detail=str(e))
async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
try:
@ -479,9 +572,10 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
if not detected_crs:
detected_crs = "EPSG:4326"
detected_crs = 'EPSG:4326'
# Buat GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry="geom", crs=detected_crs)
row_count = len(gdf)
# =====================================================================
# 6. VERIFY CRS (SRID) VALID di PROJ / PostGIS
@ -533,7 +627,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
contact_email,
contact_phone,
geom_type,
user_id
user_id,
process,
geometry_count
) VALUES (
:table_title,
:dataset_title,
@ -547,14 +643,17 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
:contact_email,
:contact_phone,
:geom_type,
:user_id
:user_id,
:process,
:geometry_count
)
"""), {
"table_title": table_name,
"dataset_title": author.get("title") or payload.title,
"dataset_title": payload.title,
"dataset_abstract": author.get("abstract"),
"keywords": author.get("keywords"),
"topic_category": author.get("topicCategory"),
# "topic_category": author.get("topicCategory"),
"topic_category": ", ".join(author.get("topicCategory")),
"date_created": str_to_date(author.get("dateCreated")),
"dataset_status": author.get("status"),
"organization_name": author.get("organization"),
@ -562,7 +661,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
"contact_email": author.get("contactEmail"),
"contact_phone": author.get("contactPhone"),
"geom_type": json.dumps(unified_geom_type),
"user_id": user_id
"user_id": user_id,
"process": 'CLEANSING',
"geometry_count": row_count
})
@ -576,7 +677,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
details={"table_name": table_name, "rows": len(gdf)}
)
job_id = generate_job_id(str(user_id))
result = {
"job_id": job_id,
"table_name": table_name,
"status": "success",
"message": f"Tabel '{table_name}' berhasil dibuat.",
@ -584,6 +687,10 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
"geometry_type": unified_geom_type,
"crs": detected_crs,
}
save_xml_to_sld(payload.style, job_id)
await report_progress(job_id, "upload", 20, "Upload selesai")
cleansing_data(table_name, job_id)
return successRes(data=result)
except Exception as e: