add ai generate suggestion

This commit is contained in:
DmsAnhr 2025-12-22 15:19:20 +07:00
parent b4cd6a8a5d
commit e8c50aa460
2 changed files with 278 additions and 122 deletions

View File

@ -0,0 +1,49 @@
import requests
from typing import Dict, Any
from core.config import GEN_AI_URL
URL = GEN_AI_URL
def send_metadata(payload: Dict[str, Any]) -> Dict[str, Any]:
headers = {
"Content-Type": "application/json",
"API_KEY": "testsatupeta"
}
try:
response = requests.post(
f"{URL}",
json=payload,
headers=headers,
)
# response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
return {
"success": False,
"error": str(e)
}
if __name__ == "__main__":
# Contoh payload
payload = {
"nama_file_peta": "peta bencana.pdf",
"nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD)",
"tipe_data_spasial": "Multipolygon",
"struktur_atribut_data": {},
"metadata": {
"judul": "",
"abstrak": "",
"tujuan": "",
"keyword": [],
"kategori": [],
"kategori_mapset": ""
}
}
result = send_metadata(payload)
print(result)

View File

@ -5,10 +5,15 @@ import geopandas as gpd
import numpy as np import numpy as np
import re import re
import zipfile import zipfile
import tempfile
import asyncio
from pyproj import CRS
from shapely.geometry.base import BaseGeometry from shapely.geometry.base import BaseGeometry
from shapely.geometry import base as shapely_base from shapely.geometry import base as shapely_base
from fastapi import File, Form, UploadFile, HTTPException from fastapi import File, Form, UploadFile, HTTPException
from api.routers.datasets_router import cleansing_data
from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES
from services.upload_file.ai_generate import send_metadata
from services.upload_file.readers.reader_csv import read_csv from services.upload_file.readers.reader_csv import read_csv
from services.upload_file.readers.reader_shp import read_shp from services.upload_file.readers.reader_shp import read_shp
from services.upload_file.readers.reader_gdb import read_gdb from services.upload_file.readers.reader_gdb import read_gdb
@ -16,6 +21,7 @@ from services.upload_file.readers.reader_mpk import read_mpk
from services.upload_file.readers.reader_pdf import convert_df, read_pdf from services.upload_file.readers.reader_pdf import convert_df, read_pdf
from services.upload_file.utils.geometry_detector import detect_and_build_geometry from services.upload_file.utils.geometry_detector import detect_and_build_geometry
from services.upload_file.utils.geometry_detector import attach_polygon_geometry_auto from services.upload_file.utils.geometry_detector import attach_polygon_geometry_auto
from services.upload_file.upload_ws import report_progress
from database.connection import engine, sync_engine from database.connection import engine, sync_engine
from database.models import Base from database.models import Base
from pydantic import BaseModel from pydantic import BaseModel
@ -68,17 +74,172 @@ def detect_zip_type(zip_path: str) -> str:
return "unknown" return "unknown"
def process_data(df: pd.DataFrame, ext: str): # def detect_zip_type(zip_path: str) -> str:
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# files = zip_ref.namelist()
# # -------------------------------------------------------------
# # 1) DETECT FileGDB
# # -------------------------------------------------------------
# is_gdb = (
# any(".gdb/" in f.lower() for f in files)
# or any(f.lower().endswith(ext) for ext in
# [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files)
# )
# if is_gdb:
# print("\n[INFO] ZIP terdeteksi berisi FileGDB.")
# with tempfile.TemporaryDirectory() as temp_dir:
# # extract ZIP
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# zip_ref.extractall(temp_dir)
# # find folder *.gdb
# gdb_path = None
# for root, dirs, _ in os.walk(temp_dir):
# for d in dirs:
# if d.lower().endswith(".gdb"):
# gdb_path = os.path.join(root, d)
# break
# if not gdb_path:
# print("[ERROR] Folder .gdb tidak ditemukan.")
# return "gdb"
# print(f"[INFO] GDB Path: {gdb_path}")
# # Cari seluruh file .gdbtable
# table_files = [
# os.path.join(gdb_path, f)
# for f in os.listdir(gdb_path)
# if f.lower().endswith(".gdbtable")
# ]
# if not table_files:
# print("[ERROR] Tidak ada file .gdbtable ditemukan.")
# return "gdb"
# # Scan semua table file untuk mencari SpatialReference
# found_crs = False
# for table_file in table_files:
# try:
# with open(table_file, "rb") as f:
# raw = f.read(15000) # baca awal file, cukup untuk header JSON
# text = raw.decode("utf-8", errors="ignore")
# start = text.find("{")
# end = text.rfind("}") + 1
# if start == -1 or end == -1:
# continue
# json_str = text[start:end]
# meta = json.loads(json_str)
# spatial_ref = meta.get("SpatialReference")
# if not spatial_ref:
# continue
# wkt = spatial_ref.get("WKT")
# if not wkt:
# continue
# print(f"[FOUND] CRS metadata pada: {os.path.basename(table_file)}")
# print(f"[CRS WKT] {wkt[:200]}...")
# # Convert to EPSG
# try:
# epsg = CRS.from_wkt(wkt).to_epsg()
# print(f"[EPSG] {epsg}")
# except:
# print("[EPSG] Tidak ditemukan EPSG.")
# found_crs = True
# break
# except Exception:
# continue
# if not found_crs:
# print("[WARNING] Tidak ditemukan CRS di file .gdbtable manapun.")
# return "gdb"
# # -----------------------------------------------------
# # 2. DETEKSI SHP
# # -----------------------------------------------------
# if any(f.lower().endswith(".shp") for f in files):
# print("\n[INFO] ZIP terdeteksi berisi SHP.")
# # cari file .prj
# prj_files = [f for f in files if f.lower().endswith(".prj")]
# if not prj_files:
# print("[WARNING] Tidak ada file .prj → CRS tidak diketahui.")
# return "shp"
# with zipfile.ZipFile(zip_path, "r") as zip_ref:
# with tempfile.TemporaryDirectory() as temp_dir:
# prj_path = os.path.join(temp_dir, os.path.basename(prj_files[0]))
# zip_ref.extract(prj_files[0], temp_dir)
# # baca isi prj
# with open(prj_path, "r") as f:
# prj_text = f.read()
# try:
# crs = CRS.from_wkt(prj_text)
# print(f"[CRS WKT] {crs.to_wkt()[:200]}...")
# epsg = crs.to_epsg()
# if epsg:
# print(f"[EPSG] {epsg}")
# else:
# print("[EPSG] Tidak ditemukan dalam database EPSG.")
# except Exception as e:
# print("[ERROR] Gagal membaca CRS dari file PRJ:", e)
# return "shp"
# # -----------------------------------------------------
# # 3. UNKNOWN
# # -----------------------------------------------------
# return "unknown"
def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
result = detect_and_build_geometry(df, master_polygons=None) result = detect_and_build_geometry(df, master_polygons=None)
if not hasattr(result, "geometry") or result.geometry.isna().all(): if not hasattr(result, "geometry") or result.geometry.isna().all():
result = attach_polygon_geometry_auto(result) result = attach_polygon_geometry_auto(result)
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns: # if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \ # geom_type = ", ".join([g for g in result.geometry.geom_type.unique() if g]) \
if not result.empty else "None" # if not result.empty else "None"
# null_geom = result.geometry.isna().sum()
def normalize_geom_type(geom_type):
if geom_type.startswith("Multi"):
return geom_type.replace("Multi", "")
return geom_type
if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
geom_types = (
result.geometry
.dropna()
.geom_type
.apply(normalize_geom_type)
.unique()
)
geom_type = geom_types[0] if len(geom_types) > 0 else "None"
null_geom = result.geometry.isna().sum() null_geom = result.geometry.isna().sum()
print(f"[INFO] Tipe Geometry: {geom_type}") print(f"[INFO] Tipe Geometry: {geom_type}")
print(f"[INFO] Jumlah geometry kosong: {null_geom}") print(f"[INFO] Jumlah geometry kosong: {null_geom}")
else: else:
@ -131,17 +292,39 @@ def process_data(df: pd.DataFrame, ext: str):
{k: safe_json(v) for k, v in row.items()} for row in warning_examples {k: safe_json(v) for k, v in row.items()} for row in warning_examples
] ]
ai_context = {
"nama_file_peta": filename,
"nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim",
"tipe_data_spasial": geom_type,
"deskripsi_singkat": fileDesc,
"struktur_atribut_data": {},
# "metadata": {
# "judul": "",
# "abstrak": "",
# "tujuan": "",
# "keyword": [],
# "kategori": [],
# "kategori_mapset": ""
# }
}
ai_suggest = send_metadata(ai_context)
# ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
print(ai_suggest)
response = { response = {
"message": "File berhasil dibaca dan dianalisis.", "message": "File berhasil dibaca dan dianalisis.",
"file_name": filename,
"file_type": ext, "file_type": ext,
"rows": int(len(result)), "rows": int(len(result)),
"columns": list(map(str, result.columns)), "columns": list(map(str, result.columns)),
"geometry_valid": int(valid_count), "geometry_valid": int(valid_count),
"geometry_empty": int(empty_count), "geometry_empty": int(empty_count),
"geometry_valid_percent": float(round(match_percentage, 2)), "geometry_valid_percent": float(round(match_percentage, 2)),
"geometry_type": geom_type,
"warnings": warnings, "warnings": warnings,
"warning_examples": warning_safe, "warning_rows": warning_safe,
"preview": preview_safe "preview": preview_safe,
"metadata_suggest": ai_suggest
} }
# return successRes(content=response) # return successRes(content=response)
@ -156,7 +339,7 @@ def process_data(df: pd.DataFrame, ext: str):
async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form("")): async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = Form(""), sheet: Optional[str] = Form(""), fileDesc: Optional[str] = Form("")):
fname = file.filename fname = file.filename
ext = os.path.splitext(fname)[1].lower() ext = os.path.splitext(fname)[1].lower()
contents = await file.read() contents = await file.read()
@ -216,7 +399,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
if df is None or (hasattr(df, "empty") and df.empty): if df is None or (hasattr(df, "empty") and df.empty):
return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid") return successRes(message="File berhasil dibaca, Tetapi tidak ditemukan tabel valid")
res = process_data(df, ext) res = process_data(df, ext, fname, fileDesc)
tmp_path.unlink(missing_ok=True) tmp_path.unlink(missing_ok=True)
@ -241,6 +424,8 @@ class PdfRequest(BaseModel):
title: str title: str
columns: List[str] columns: List[str]
rows: List[List] rows: List[List]
fileName: str
fileDesc: str
async def handle_process_pdf(payload: PdfRequest): async def handle_process_pdf(payload: PdfRequest):
try: try:
@ -248,7 +433,7 @@ async def handle_process_pdf(payload: PdfRequest):
if df is None or (hasattr(df, "empty") and df.empty): if df is None or (hasattr(df, "empty") and df.empty):
return errorRes(message="Tidak ada tabel") return errorRes(message="Tidak ada tabel")
res = process_data(df, '.pdf') res = process_data(df, '.pdf', payload.fileName, payload.fileDesc)
return successRes(data=res) return successRes(data=res)
except Exception as e: except Exception as e:
@ -271,6 +456,7 @@ class UploadRequest(BaseModel):
rows: List[dict] rows: List[dict]
columns: List[str] columns: List[str]
author: Dict[str, Any] author: Dict[str, Any]
style: str
# generate _2 if exist # generate _2 if exist
async def generate_unique_table_name(base_name: str): async def generate_unique_table_name(base_name: str):
@ -300,125 +486,32 @@ def str_to_date(raw_date: str):
print("[WARNING] Tidak bisa parse dateCreated:", e) print("[WARNING] Tidak bisa parse dateCreated:", e)
return None return None
import asyncio
# async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
# try:
# table_name = await generate_unique_table_name(payload.title)
# df = pd.DataFrame(payload.rows)
# df.columns = [col.upper() for col in df.columns]
# if "GEOMETRY" not in df.columns:
# raise HTTPException(400, "Kolom GEOMETRY tidak ditemukan")
# df["GEOMETRY"] = df["GEOMETRY"].apply( def generate_job_id(user_id: str) -> str:
# lambda g: wkt.loads(g) timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
# if isinstance(g, str) else None return f"{user_id}_{timestamp}"
# )
# # ======================================================
# # RENAME kolom GEOMETRY → geom (WAJIB)
# # ======================================================
# df = df.rename(columns={"GEOMETRY": "geom"})
# gdf = gpd.GeoDataFrame(df, geometry="geom", crs="EPSG:4326")
# # --- Wajib: gunakan engine sync TANPA asyncpg def save_xml_to_sld(xml_string, filename):
# loop = asyncio.get_running_loop() folder_path = 'style_temp'
# await loop.run_in_executor( os.makedirs(folder_path, exist_ok=True)
# None,
# lambda: gdf.to_postgis(
# table_name,
# sync_engine,
# if_exists="replace",
# index=False
# )
# )
# # === STEP 4: add ID column === file_path = os.path.join(folder_path, f"{filename}.sld")
# async with engine.begin() as conn:
# await conn.execute(text( with open(file_path, "w", encoding="utf-8") as f:
# f'ALTER TABLE "{table_name}" ADD COLUMN _ID SERIAL PRIMARY KEY;' f.write(xml_string)
# ))
return file_path
# # === STEP 5: save author metadata ===
# author = payload.author
# async with engine.begin() as conn:
# await conn.execute(text("""
# INSERT INTO backend.author_metadata (
# table_title,
# dataset_title,
# dataset_abstract,
# keywords,
# topic_category,
# date_created,
# dataset_status,
# organization_name,
# contact_person_name,
# contact_email,
# contact_phone,
# geom_type,
# user_id
# ) VALUES (
# :table_title,
# :dataset_title,
# :dataset_abstract,
# :keywords,
# :topic_category,
# :date_created,
# :dataset_status,
# :organization_name,
# :contact_person_name,
# :contact_email,
# :contact_phone,
# :geom_type,
# :user_id
# )
# """), {
# "table_title": table_name,
# "dataset_title": author.get("title") or payload.title,
# "dataset_abstract": author.get("abstract"),
# "keywords": author.get("keywords"),
# "topic_category": author.get("topicCategory"),
# "date_created": str_to_date(author.get("dateCreated")),
# "dataset_status": author.get("status"),
# "organization_name": author.get("organization"),
# "contact_person_name": author.get("contactName"),
# "contact_email": author.get("contactEmail"),
# "contact_phone": author.get("contactPhone"),
# "geom_type": json.dumps(list(gdf.geom_type.unique())),
# "user_id": user_id
# })
# # === STEP 6: log success ===
# await log_activity(
# user_id=user_id,
# action_type="UPLOAD",
# action_title=f"Upload dataset {table_name}",
# details={"table_name": table_name, "rows": len(gdf)}
# )
# res = {
# "table_name": table_name,
# "status": "success",
# "message": f"Tabel '{table_name}' berhasil dibuat.",
# "total_rows": len(gdf),
# "geometry_type": list(gdf.geom_type.unique()),
# }
# return successRes(data=res)
# except Exception as e:
# await log_activity(
# user_id=user_id,
# action_type="ERROR",
# action_title="Upload gagal",
# details={"error": str(e)}
# )
# print(f"error : {str(e)}")
# raise HTTPException(status_code=500, detail=str(e))
async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
try: try:
@ -479,9 +572,10 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
if not detected_crs: if not detected_crs:
detected_crs = "EPSG:4326" detected_crs = "EPSG:4326"
detected_crs = 'EPSG:4326'
# Buat GeoDataFrame # Buat GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry="geom", crs=detected_crs) gdf = gpd.GeoDataFrame(df, geometry="geom", crs=detected_crs)
row_count = len(gdf)
# ===================================================================== # =====================================================================
# 6. VERIFY CRS (SRID) VALID di PROJ / PostGIS # 6. VERIFY CRS (SRID) VALID di PROJ / PostGIS
@ -533,7 +627,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
contact_email, contact_email,
contact_phone, contact_phone,
geom_type, geom_type,
user_id user_id,
process,
geometry_count
) VALUES ( ) VALUES (
:table_title, :table_title,
:dataset_title, :dataset_title,
@ -547,14 +643,17 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
:contact_email, :contact_email,
:contact_phone, :contact_phone,
:geom_type, :geom_type,
:user_id :user_id,
:process,
:geometry_count
) )
"""), { """), {
"table_title": table_name, "table_title": table_name,
"dataset_title": author.get("title") or payload.title, "dataset_title": payload.title,
"dataset_abstract": author.get("abstract"), "dataset_abstract": author.get("abstract"),
"keywords": author.get("keywords"), "keywords": author.get("keywords"),
"topic_category": author.get("topicCategory"), # "topic_category": author.get("topicCategory"),
"topic_category": ", ".join(author.get("topicCategory")),
"date_created": str_to_date(author.get("dateCreated")), "date_created": str_to_date(author.get("dateCreated")),
"dataset_status": author.get("status"), "dataset_status": author.get("status"),
"organization_name": author.get("organization"), "organization_name": author.get("organization"),
@ -562,7 +661,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
"contact_email": author.get("contactEmail"), "contact_email": author.get("contactEmail"),
"contact_phone": author.get("contactPhone"), "contact_phone": author.get("contactPhone"),
"geom_type": json.dumps(unified_geom_type), "geom_type": json.dumps(unified_geom_type),
"user_id": user_id "user_id": user_id,
"process": 'CLEANSING',
"geometry_count": row_count
}) })
@ -576,7 +677,9 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
details={"table_name": table_name, "rows": len(gdf)} details={"table_name": table_name, "rows": len(gdf)}
) )
job_id = generate_job_id(str(user_id))
result = { result = {
"job_id": job_id,
"table_name": table_name, "table_name": table_name,
"status": "success", "status": "success",
"message": f"Tabel '{table_name}' berhasil dibuat.", "message": f"Tabel '{table_name}' berhasil dibuat.",
@ -584,6 +687,10 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
"geometry_type": unified_geom_type, "geometry_type": unified_geom_type,
"crs": detected_crs, "crs": detected_crs,
} }
save_xml_to_sld(payload.style, job_id)
await report_progress(job_id, "upload", 20, "Upload selesai")
cleansing_data(table_name, job_id)
return successRes(data=result) return successRes(data=result)
except Exception as e: except Exception as e: