diff --git a/.gitignore b/.gitignore index b4f33eb..7955214 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,7 @@ test-ai/ uploads/ scrapp/ logs/ -style_temp/ \ No newline at end of file +style_temp/ +services/styles/ + +cleansing_func.sql \ No newline at end of file diff --git a/api/routers/datasets_router.py b/api/routers/datasets_router.py index a86fa57..72219b9 100644 --- a/api/routers/datasets_router.py +++ b/api/routers/datasets_router.py @@ -1,8 +1,10 @@ import asyncio from uuid import uuid4 from fastapi import APIRouter, HTTPException +import httpx import requests from sqlalchemy import text +from sqlalchemy.exc import SQLAlchemyError from database.connection import engine from services.datasets.delete import delete_dataset_from_partition # import fungsi di atas from response import successRes, errorRes @@ -10,7 +12,7 @@ from services.datasets.publish_geonetwork import publish_metadata from services.datasets.publish_geoserver import publish_layer_to_geoserver from services.datasets.metadata import update_job_status from services.upload_file.upload_ws import report_progress -from core.config import GEOSERVER_URL, GEOSERVER_USER, GEOSERVER_PASS, QGIS_URL +from core.config import GEOSERVER_URL, GEOSERVER_USER, GEOSERVER_PASS, QGIS_URL, MAIN_API_URL, SERVICE_KEY router = APIRouter() @@ -206,4 +208,111 @@ def get_style(style_name: str, workspace: str = None): raise HTTPException(status_code=500, detail=f"GeoServer error: {response.text}") except requests.exceptions.RequestException as e: - raise HTTPException(status_code=500, detail=f"Request error: {str(e)}") \ No newline at end of file + raise HTTPException(status_code=500, detail=f"Request error: {str(e)}") + + +# ============================================================= +# cleansing query +# ============================================================= + +# async def query_cleansing_data_fn( +# table_name: str +# ): +# try: +# async with engine.begin() as conn: +# await conn.execute( +# text("SELECT public.fn_cleansing_satupeta_polygon(:table_name)"), +# {"table_name": table_name} +# ) +# return "done" + +# except SQLAlchemyError as e: +# raise RuntimeError(f"Fix geometry failed: {str(e)}") + +async def query_cleansing_data( + table_name: str +): + try: + async with engine.begin() as conn: + await conn.execute( + text("CALL pr_cleansing_satupeta_polygon(:table_name, NULL);"), + {"table_name": table_name} + ) + return "done" + + except SQLAlchemyError as e: + raise RuntimeError(f"Fix geometry failed: {str(e)}") + + + +async def publish_layer(table_name: str, job_id: str): + # await asyncio.sleep(10) + try: + await report_progress(job_id, "cleansing", 50, "Cleansing data selesai") + # await asyncio.sleep(5) + + geos_link = publish_layer_to_geoserver(table_name, job_id) + await report_progress(job_id, "publish_geoserver", 80, "Publish GeoServer selesai") + # await asyncio.sleep(3) + + uuid = publish_metadata( + table_name=table_name, + geoserver_links=geos_link + ) + await report_progress(job_id, "done", 100, "Publish GeoNetwork selesai") + + update_job_status(table_name, "FINISHED", job_id) + # return uuid + return { + "geos_link": geos_link["layer_url"], + "uuid": uuid + } + + except Exception as e: + update_job_status(table_name, "FAILED", job_id) + raise RuntimeError(f"Publish layer gagal: {e}") from e + + + + + + + +async def upload_to_main(payload: dict): + try: + async with httpx.AsyncClient(timeout=10) as client: + response = await client.post( + MAIN_API_URL+"/api/internal/mapsets", + json=payload, + headers={ + "X-SERVICE-KEY": SERVICE_KEY + } + ) + response.raise_for_status() + + print("GOOOOO") + return { + "status": "success", + "data": response.json() + } + + except httpx.RequestError as e: + # error koneksi, DNS, timeout, dll + raise HTTPException( + status_code=500, + detail=f"Gagal connect ke MAIN_API: {str(e)}" + ) + + except httpx.HTTPStatusError as e: + # API tujuan balas tapi error (4xx / 5xx) + raise HTTPException( + status_code=e.response.status_code, + detail=f"MAIN_API error: {e.response.text}" + ) + + except Exception as e: + # fallback kalau ada error lain + raise HTTPException( + status_code=500, + detail=f"Unexpected error: {str(e)}" + ) \ No newline at end of file diff --git a/core/config.py b/core/config.py index fe2e1db..315b3d3 100644 --- a/core/config.py +++ b/core/config.py @@ -6,6 +6,9 @@ load_dotenv() API_VERSION = "2.1.3" +MAIN_API_URL = os.getenv("MAIN_API_URL") +SERVICE_KEY = os.getenv("SERVICE_KEY") + POSTGIS_URL = os.getenv("POSTGIS_URL") POSTGIS_SYNC_URL = os.getenv("SYNC_URL") @@ -26,6 +29,7 @@ UPLOAD_FOLDER = Path(os.getenv("UPLOAD_FOLDER", "./uploads")) MAX_FILE_MB = int(os.getenv("MAX_FILE_MB", 30)) ALLOWED_ORIGINS = [ + "http://localhost:4000", "http://localhost:3000", "http://127.0.0.1:3000", "http://localhost:5173", diff --git a/services/auth/login.py b/services/auth/login.py index f0af30c..1201545 100644 --- a/services/auth/login.py +++ b/services/auth/login.py @@ -24,9 +24,9 @@ async def loginService(username: str, password: str, db: AsyncSession): if not pwd_context.verify(password, user.password_hash): raise errorRes(status_code=401, message="Invalid username or password") - # Validation for institution user - if user.role != "admin" and not user.institution_id: - raise errorRes(status_code=403, message="User must belong to an institution") + # Validation for organization user + if user.role != "admin" and not user.organization_id: + raise errorRes(status_code=403, message="User must belong to an organization") # Generate single active token token = str(uuid4()) @@ -41,7 +41,7 @@ async def loginService(username: str, password: str, db: AsyncSession): "status": "success", "username": user.username, "role": user.role, - "institution_id": user.institution_id, + "organization_id": user.organization_id, "token": token, "token_expired_at": expiry.isoformat() } diff --git a/services/datasets/metadata.py b/services/datasets/metadata.py index c01802a..5d345cc 100644 --- a/services/datasets/metadata.py +++ b/services/datasets/metadata.py @@ -4,7 +4,6 @@ from database.connection import sync_engine from utils.logger_config import log_activity def update_job_status(table_name: str, status: str, job_id: str = None): - print("update status") query = text(""" UPDATE backend.author_metadata SET process = :status, diff --git a/services/datasets/publish_geonetwork.py b/services/datasets/publish_geonetwork.py index 92f0327..7b781b5 100644 --- a/services/datasets/publish_geonetwork.py +++ b/services/datasets/publish_geonetwork.py @@ -7,6 +7,22 @@ from datetime import datetime from uuid import uuid4 import re + + +def create_gn_session(): + session = requests.Session() + session.auth = (GEONETWORK_USER, GEONETWORK_PASS) + + session.get(f"{GEONETWORK_URL}/srv/eng/info?type=me") + xsrf_token = session.cookies.get("XSRF-TOKEN") + + if not xsrf_token: + raise Exception("XSRF token missing") + + return session, xsrf_token + + + def escape_url_params(url: str) -> str: """ Escape karakter berbahaya di dalam URL agar valid dalam XML. @@ -73,30 +89,6 @@ def get_extent(table_name: str): "ymax": -5.4819 # north } - -# def get_author_metadata(table_name: str): - -# sql = """ -# SELECT table_title, dataset_title, dataset_abstract, keywords, date_created, -# organization_name, contact_person_name, -# contact_email, contact_phone, geom_type -# FROM backend.author_metadata -# WHERE table_title = :table -# LIMIT 1 -# """ - -# conn = engine.connect() -# try: -# row = conn.execute(text(sql), {"table": table_name}).fetchone() -# finally: -# conn.close() - -# if not row: -# raise Exception(f"Tidak ada metadata untuk tabel: {table_name}") - -# # FIX: SQLAlchemy Row → dict -# return dict(row._mapping) - def get_author_metadata(table_name: str): sql = """ @@ -356,10 +348,10 @@ def generate_metadata_xml(table_name, meta, extent, geoserver_links): - Dinas Tenaga Kerja dan Transmigrasi Provinsi Jawa Timur + Lab AI Polinema - Dinas Tenaga Kerja dan Transmigrasi Provinsi Jawa Timur + Lab AI Polinema @@ -510,9 +502,7 @@ def generate_metadata_xml(table_name, meta, extent, geoserver_links): - - https://geoserver.jatimprov.go.id/wms?service=WMS&version=1.1.0&request=GetMap&layers=satupeta:fngsl_trw_1_4_2020_2023&bbox=110.89527893066406,-8.78022289276123,116.27019500732422,-5.042964935302734&width=768&height=534&srs=EPSG:4326&styles=&format=application/openlayers - + {geoserver_links["wms_url"]} WWW:LINK-1.0-http--link @@ -578,21 +568,23 @@ def generate_metadata_xml(table_name, meta, extent, geoserver_links): """ +# Geonetwork version 4.4.9.0 def upload_metadata_to_geonetwork(xml_metadata: str): - session = requests.Session() - session.auth = (GEONETWORK_USER, GEONETWORK_PASS) + # session = requests.Session() + # session.auth = (GEONETWORK_USER, GEONETWORK_PASS) - # 1. Get XSRF token - try: - info_url = f"{GEONETWORK_URL}/srv/eng/info?type=me" - session.get(info_url) - except requests.exceptions.RequestException as e: - raise HTTPException(status_code=503, detail=f"Failed to connect to GeoNetwork: {e}") + # # 1. Get XSRF token + # try: + # info_url = f"{GEONETWORK_URL}/srv/eng/info?type=me" + # session.get(info_url) + # except requests.exceptions.RequestException as e: + # raise HTTPException(status_code=503, detail=f"Failed to connect to GeoNetwork: {e}") - xsrf_token = session.cookies.get('XSRF-TOKEN') - if not xsrf_token: - raise HTTPException(status_code=500, detail="Could not retrieve XSRF-TOKEN from GeoNetwork.") + # xsrf_token = session.cookies.get('XSRF-TOKEN') + # if not xsrf_token: + # raise HTTPException(status_code=500, detail="Could not retrieve XSRF-TOKEN from GeoNetwork.") + session, xsrf_token = create_gn_session() headers = { 'X-XSRF-TOKEN': xsrf_token, 'Accept': 'application/json' @@ -604,16 +596,33 @@ def upload_metadata_to_geonetwork(xml_metadata: str): files = { 'file': ('metadata.xml', xml_metadata, 'application/xml') } + + params = { + "ownerGroup": 1, # all + "ownerUser": 1 # admin + } response = session.post( GN_API_RECORDS_URL, + params=params, files=files, headers=headers, cookies=session.cookies.get_dict() ) + + metadata_infos = response.json().get("metadataInfos", {}) + uuid = None + for records in metadata_infos.values(): + if records and isinstance(records, list): + uuid = records[0].get("uuid") + break + if not uuid: + raise ValueError("UUID not found in GeoNetwork response") - # print("response", response.json()) - return response.json() + publish_record(session, uuid) + + # print("response", response.json()) + return uuid @@ -629,11 +638,59 @@ def publish_metadata(table_name: str, geoserver_links: dict): ) xml_clean = fix_xml_urls(xml) - response = upload_metadata_to_geonetwork(xml_clean) - - uuid = response.get("uuid") + uuid = upload_metadata_to_geonetwork(xml_clean) + print(f"[GeoNetwork] Metadata uploaded. UUID = {uuid}") return uuid + +def publish_record(session, uuid): + print('[uuid]', uuid) + xsrf_token = session.cookies.get('XSRF-TOKEN') + + headers = { + "X-XSRF-TOKEN": xsrf_token, + "Accept": "application/json", + "Content-Type": "application/json" + } + + url = f"{GEONETWORK_URL}/srv/api/records/{uuid}/sharing" + + payload = { + "clear": True, + "privileges": [ + { + "group": 1, + "operations": { + "view": True + } + } + ] + } + + response = session.put(url, json=payload, headers=headers) + response.raise_for_status() + + +# single stand func +# def publish_record(uuid): +# session, xsrf_token = create_gn_session() + +# headers = { +# "X-XSRF-TOKEN": xsrf_token, +# "Content-Type": "application/json" +# } + +# url = f"{GEONETWORK_URL}/srv/api/records/{uuid}/sharing" + +# payload = { +# "clear": True, +# "privileges": [ +# {"group": 1, "operations": {"view": True}} +# ] +# } + +# resp = session.put(url, json=payload, headers=headers) +# resp.raise_for_status() diff --git a/services/datasets/publish_geoserver.py b/services/datasets/publish_geoserver.py index abe9704..3058c42 100644 --- a/services/datasets/publish_geoserver.py +++ b/services/datasets/publish_geoserver.py @@ -52,18 +52,52 @@ def publish_layer_to_geoserver(table: str, job_id: str): print(f"[WARNING] SLD file tidak ditemukan: {sld_file}") else: print(f"[GeoServer] Upload SLD {sld_file}") - style_url = f"{GEOSERVER_URL}/rest/styles" - with open(sld_file, "rb") as sld: - requests.post( - f"{style_url}?name={style_name}&workspace={GEOSERVER_WORKSPACE}", - auth=(GEOSERVER_USER, GEOSERVER_PASS), - headers={"Content-Type": "application/vnd.ogc.sld+xml"}, - data=sld.read() + #old + # style_url = f"{GEOSERVER_URL}/rest/styles" + + # with open(sld_file, "rb") as sld: + # requests.post( + # f"{style_url}?name={style_name}&workspace={GEOSERVER_WORKSPACE}", + # auth=(GEOSERVER_USER, GEOSERVER_PASS), + # headers={"Content-Type": "application/vnd.ogc.sld+xml"}, + # data=sld.read() + # ) + + # print(f"[GeoServer] SLD uploaded: {style_name}") + + + + #new + style_url = ( + f"{GEOSERVER_URL}/rest/workspaces/" + f"{GEOSERVER_WORKSPACE}/styles" + ) + + with open(sld_file, "r", encoding="utf-8") as f: + sld_content = f.read() + + # 🔥 INI BARIS PENTINGNYA + sld_content = sld_content.lstrip("\ufeff \t\r\n") + + resp = requests.post( + f"{style_url}?name={style_name}", + auth=(GEOSERVER_USER, GEOSERVER_PASS), + headers={"Content-Type": "application/vnd.ogc.sld+xml"}, + data=sld_content.encode("utf-8") + ) + + + if resp.status_code not in (200, 201): + raise Exception( + f"Upload SLD gagal ({resp.status_code}): {resp.text}" ) print(f"[GeoServer] SLD uploaded: {style_name}") + + + # ========================================== # 3. Apply SLD to the layer # ========================================== @@ -116,14 +150,29 @@ def publish_layer_to_geoserver(table: str, job_id: str): f"{GEOSERVER_URL}/{GEOSERVER_WORKSPACE}/wfs?" f"service=WFS&request=GetFeature&typeName={GEOSERVER_WORKSPACE}:{table}" ) - print(f"[GeoServer] WMS URL: {wms_link}") - print(f"[GeoServer] WFS URL: {wfs_link}") - print(f"[GeoServer] Reload completed. Layer {table} ready.") + # print(f"[GeoServer] WMS URL: {wms_link}") + # print(f"[GeoServer] WFS URL: {wfs_link}") + # print(f"[GeoServer] Reload completed. Layer {table} ready.") + openlayer_url = ( + f"{GEOSERVER_URL}/{GEOSERVER_WORKSPACE}/wms?" + f"service=WMS" + f"&version=1.1.0" + f"&request=GetMap" + f"&layers={GEOSERVER_WORKSPACE}:{table}" + f"&styles=" + f"&bbox=110.89528623700005%2C-8.780412043999945%2C116.26994997700001%2C-5.042971664999925" + f"&width=768" + f"&height=384" + f"&srs=EPSG:4326" + f"&format=application/openlayers" + ) + return { "table": table, "style": style_name, "wms_url": wms_link, - "wfs_url": wfs_link + "wfs_url": wfs_link, + "layer_url": openlayer_url } diff --git a/services/upload_file/readers/reader_pdf.py b/services/upload_file/readers/reader_pdf.py index 972927f..03c4df9 100644 --- a/services/upload_file/readers/reader_pdf.py +++ b/services/upload_file/readers/reader_pdf.py @@ -1,7 +1,7 @@ import re import pdfplumber import pandas as pd -from services.upload_file.utils.pdf_cleaner import row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column +from services.upload_file.utils.pdf_cleaner import get_number_column_index, get_start_end_number, normalize_number_column, row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column from services.upload_file.upload_exceptions import PDFReadError from utils.logger_config import setup_logger @@ -56,6 +56,41 @@ def merge_multiline_header(header_rows): final_header = [v for v in final_header if v not in ['', None]] return final_header +def merge_parsed_table(tables): + roots = [] + fragments = [] + + # STEP 1: klasifikasi + for table in tables: + num_idx = get_number_column_index(table["columns"]) + if num_idx is None: + roots.append(table) + continue + + start_no, _ = get_start_end_number(table["rows"], num_idx) + if start_no == 1: + roots.append(table) + else: + fragments.append(table) + + # STEP 2: merge fragment ke root + for frag in fragments: + frag_idx = get_number_column_index(frag["columns"]) + f_start, _ = get_start_end_number(frag["rows"], frag_idx) + + for root in roots: + if root["columns"] != frag["columns"]: + continue + + root_idx = get_number_column_index(root["columns"]) + _, r_end = get_start_end_number(root["rows"], root_idx) + + if f_start == r_end + 1: + root["rows"].extend(frag["rows"]) + break # fragment hanya boleh nempel ke 1 root + + return roots + def read_pdf(path: str, page: str): """ @@ -90,6 +125,74 @@ def read_pdf(path: str, page: str): Raises: PDFReadError: Jika terjadi kesalahan saat membaca atau parsing PDF. """ + # try: + # pdf_path = path + # selectedPage = page if page else "1" + # tables_data = [] + + # with pdfplumber.open(pdf_path) as pdf: + # total_pages = len(pdf.pages) + # selected_pages = parse_page_selection(selectedPage, total_pages) + + # logger.info(f"[INFO] Total Halaman PDF: {total_pages}") + # logger.info(f"[INFO] Total Halaman yang dipilih: {len(selected_pages)}") + # logger.info(f"[INFO] Halaman yang dipilih untuk dibaca: {selected_pages}") + + # for page_num in selected_pages: + # pdf_page = pdf.pages[page_num - 1] + # tables = pdf_page.find_tables() + # logger.info(f"\n\n[INFO] Halaman {page_num}: {len(tables)} tabel terdeteksi") + + # # pembacaan title ini tidak valid untuk halaman lanscape + # # for line in pdf_page.extract_text_lines(): + # # if line['top'] > tables[0].bbox[1]: + # # break + # # previous_line = line + # # print('[TITLE]', previous_line['text']) + + # for i, t in enumerate(tables, start=1): + # table = t.extract() + # if len(table) > 2: + # print(f"[TBL] tabel : {i} - halaman {page_num}") + # tables_data.append(table) + + # logger.info(f"\nTotal tabel terbaca: {len(tables_data)}\n") + + # header_only, body_only = [], [] + # for tbl in tables_data: + # head, body = detect_header_rows(tbl) + # header_only.append(head) + # body_only.append(body) + + # clean_header = [merge_multiline_header(h) for h in header_only] + # clean_body = [] + + # for i, raw_body in enumerate(body_only): + # con_body = [[cell for cell in row if cell not in (None, '')] for row in raw_body] + # cleaned = cleaning_column(clean_header[i], [con_body]) + # clean_body.append(cleaned[0]) + + # parsed = [] + # for i, (cols, rows) in enumerate(zip(clean_header, clean_body), start=1): + # parsed.append({ + # "title": str(i), + # "columns": cols, + # "rows": rows + # }) + + # # ================================================================= + + # clean_parsed = filter_geo_admin_column(parsed) + # merge_parsed = merge_parsed_table(clean_parsed) + + # logger.info(f"\nTotal tabel valid: {len(merge_parsed)}\n") + + # ordered_tables = [normalize_number_column(t) for t in merge_parsed] + # return ordered_tables + + # except Exception as e: + # raise PDFReadError(f"Gagal membaca PDF: {e}", code=422) + try: pdf_path = path selectedPage = page if page else "1" @@ -99,26 +202,36 @@ def read_pdf(path: str, page: str): total_pages = len(pdf.pages) selected_pages = parse_page_selection(selectedPage, total_pages) - logger.info(f"[INFO] Total halaman PDF: {total_pages}") + logger.info(f"[INFO] Total Halaman PDF: {total_pages}") + logger.info(f"[INFO] Total Halaman yang dipilih: {len(selected_pages)}") logger.info(f"[INFO] Halaman yang dipilih untuk dibaca: {selected_pages}") for page_num in selected_pages: pdf_page = pdf.pages[page_num - 1] tables = pdf_page.find_tables() - logger.info(f"[INFO] Halaman {page_num}: {len(tables)} tabel terdeteksi") + logger.info(f"\n\n[INFO] Halaman {page_num}: {len(tables)} tabel terdeteksi") - for t in tables: + # pembacaan title ini tidak valid untuk halaman lanscape + # for line in pdf_page.extract_text_lines(): + # if line['top'] > tables[0].bbox[1]: + # break + # previous_line = line + # print('[TITLE]', previous_line['text']) + + for i, t in enumerate(tables, start=1): table = t.extract() if len(table) > 2: - tables_data.append(table) + print(f"[TBL] tabel : {i} - halaman {page_num}") + tables_data.append({"page": f"halaman {page_num} - {i}", "table": table}) - logger.info(f"\nTotal tabel valid: {len(tables_data)}\n") + logger.info(f"\nTotal tabel terbaca: {len(tables_data)}\n") - header_only, body_only = [], [] + header_only, body_only, page_info = [], [], [] for tbl in tables_data: - head, body = detect_header_rows(tbl) + head, body = detect_header_rows(tbl["table"]) header_only.append(head) body_only.append(body) + page_info.append(tbl["page"]) clean_header = [merge_multiline_header(h) for h in header_only] clean_body = [] @@ -129,15 +242,22 @@ def read_pdf(path: str, page: str): clean_body.append(cleaned[0]) parsed = [] - for i, (cols, rows) in enumerate(zip(clean_header, clean_body), start=1): + for i, (cols, rows, page) in enumerate(zip(clean_header, clean_body, page_info), start=1): parsed.append({ - "title": str(i), + "title": page, "columns": cols, "rows": rows }) + # ================================================================= + clean_parsed = filter_geo_admin_column(parsed) - return clean_parsed + merge_parsed = merge_parsed_table(clean_parsed) + + logger.info(f"\nTotal tabel valid: {len(merge_parsed)}\n") + + ordered_tables = [normalize_number_column(t) for t in merge_parsed] + return ordered_tables except Exception as e: raise PDFReadError(f"Gagal membaca PDF: {e}", code=422) diff --git a/services/upload_file/upload.py b/services/upload_file/upload.py index f781327..3c2c05c 100644 --- a/services/upload_file/upload.py +++ b/services/upload_file/upload.py @@ -10,17 +10,16 @@ import asyncio from pyproj import CRS from shapely.geometry.base import BaseGeometry from shapely.geometry import base as shapely_base -from fastapi import File, Form, UploadFile, HTTPException -from api.routers.datasets_router import cleansing_data -from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES +from fastapi import Depends, File, Form, UploadFile, HTTPException +from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main +from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES, GEONETWORK_URL from services.upload_file.ai_generate import send_metadata from services.upload_file.readers.reader_csv import read_csv from services.upload_file.readers.reader_shp import read_shp from services.upload_file.readers.reader_gdb import read_gdb from services.upload_file.readers.reader_mpk import read_mpk from services.upload_file.readers.reader_pdf import convert_df, read_pdf -from services.upload_file.utils.geometry_detector import detect_and_build_geometry -from services.upload_file.utils.geometry_detector import attach_polygon_geometry_auto +from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto from services.upload_file.upload_ws import report_progress from database.connection import engine, sync_engine from database.models import Base @@ -309,7 +308,7 @@ def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str): } ai_suggest = send_metadata(ai_context) # ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'} - print(ai_suggest) + # print(ai_suggest) response = { "message": "File berhasil dibaca dan dianalisis.", @@ -363,11 +362,11 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = tbl = read_pdf(tmp_path, page) if len(tbl) == 0: res = { - "message": "Tidak ditemukan tabel valid", + "message": "Tidak ditemukan tabel valid pada halaman yang dipilih", "tables": {}, "file_type": ext } - return successRes(message="Tidak ditemukan tabel valid", data=res) + return successRes(message="Tidak ditemukan tabel valid pada halaman yang dipilih", data=res) elif len(tbl) > 1: res = { "message": "File berhasil dibaca dan dianalisis.", @@ -389,10 +388,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = df = read_gdb(str(tmp_path)) else: - raise errorRes( - status_code=400, - message="ZIP file tidak mengandung SHP atau GDB yang valid." - ) + return successRes(message="ZIP file tidak mengandung SHP / GDB valid.") else: raise errorRes(status_code=400, message="Unsupported file type") @@ -680,17 +676,53 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): job_id = generate_job_id(str(user_id)) result = { "job_id": job_id, + "job_status": "wait", "table_name": table_name, "status": "success", "message": f"Tabel '{table_name}' berhasil dibuat.", "total_rows": len(gdf), "geometry_type": unified_geom_type, "crs": detected_crs, + "metadata_uuid": "" } save_xml_to_sld(payload.style, job_id) await report_progress(job_id, "upload", 20, "Upload selesai") - cleansing_data(table_name, job_id) + # cleansing_data(table_name, job_id) + + cleansing = await query_cleansing_data(table_name) + result['job_status'] = cleansing + + publish = await publish_layer(table_name, job_id) + result['metadata_uuid'] = publish['uuid'] + + mapset = { + "name": payload.title, + "description": author.get("abstract"), + "scale": "1:25000", + "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7", + "category_id": author.get("mapsetCategory"), + "data_status": "sementara", + "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4", + "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187", + "layer_type": unified_geom_type[0], + "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"], + "layer_url": publish['geos_link'], + "metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}", + "coverage_level": "provinsi", + "coverage_area": "kabupaten", + "data_update_period": "Tahunan", + "data_version": "2026", + "is_popular": False, + "is_active": True, + "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e", + "notes": "Mapset baru dibuat", + "status_validation": "on_verification", + } + + print("mapset data",mapset) + await upload_to_main(mapset) + return successRes(data=result) except Exception as e: @@ -705,6 +737,53 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): +# async def handle_to_postgis(payload: UploadRequest, user_id: int = 2): +# try: +# job_id = generate_job_id(str(user_id)) +# result = { +# "job_id": job_id, +# "job_status": "done", +# "table_name": "just for test", +# "status": "success", +# "message": f"Tabel test berhasil dibuat.", +# "total_rows": 10, +# "geometry_type": "Polygon", +# "crs": "EPSG 4326", +# "metadata_uuid": "-" +# } + +# mapset = { +# "name": "Resiko Letusan Gunung Arjuno", +# "description": "Testing Automation Upload", +# "scale": "1:25000", +# "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7", +# "category_id": "0196c80c-855f-77f9-abd0-0c8a30b8c2f5", +# "data_status": "sementara", +# "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4", +# "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187", +# "layer_type": "polygon", +# "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"], +# "layer_url": "http://192.168.60.24:8888/geoserver/wms?service=WMS&version=1.1.0&request=GetMap&layers=labai:risiko_letusan_gunung_arjuno_bromo&bbox=110.89528623700005,-8.780412043999945,116.26994997700001,-5.042971664999925&width=768&height=534&srs=EPSG:4326&styles=&format=application/openlayers", +# "metadata_url": "http://192.168.60.24:7777/geonetwork/srv/eng/catalog.search#/metadata/9e5e2f09-13ef-49b5-bb49-1cb12136f63b", +# "coverage_level": "provinsi", +# "coverage_area": "kabupaten", +# "data_update_period": "Tahunan", +# "data_version": "2026", +# "is_popular": False, +# "is_active": True, +# "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e", +# "notes": "Mapset baru dibuat", +# "status_validation": "on_verification", +# } + +# await upload_to_main(mapset) + +# return successRes(data=result) + +# except Exception as e: +# print("errot", e) + + diff --git a/services/upload_file/utils/pdf_cleaner.py b/services/upload_file/utils/pdf_cleaner.py index 86d2dd1..69d84c0 100644 --- a/services/upload_file/utils/pdf_cleaner.py +++ b/services/upload_file/utils/pdf_cleaner.py @@ -156,4 +156,53 @@ def is_short_text_row(row): return False text_only = all(not is_number(c) for c in non_empty) joined = " ".join(non_empty) - return text_only and len(non_empty) <= 2 and len(joined) < 20 \ No newline at end of file + return text_only and len(non_empty) <= 2 and len(joined) < 20 + + + + + + + + +def get_number_column_index(columns): + for i, col in enumerate(columns): + if has_number_header(col): + return i + return None + +def get_start_end_number(rows, idx): + try: + start_no = int(rows[0][idx]) + end_no = int(rows[-1][idx]) + return start_no, end_no + except: + return None, None + +def normalize_number_column(table): + columns = table["columns"] + rows = table["rows"] + + num_idx = get_number_column_index(columns) + if num_idx is None: + return table + + current = None + + for row in rows: + try: + val = int(row[num_idx]) + except: + continue + + if current is None: + current = val + else: + if val <= current: + current += 1 + else: + current = val + + row[num_idx] = str(current) + + return table