update automation service

2026-01-28 12:42:46 +07:00 · 2026-01-28 12:42:46 +07:00 · 470c87f8c4
commit 470c87f8c4
parent 466820e1f6
10 changed files with 558 additions and 89 deletions
--- a/.gitignore
+++ b/.gitignore
@ -16,3 +16,6 @@ uploads/
 scrapp/
 logs/
 style_temp/
+services/styles/
+
+cleansing_func.sql
--- a/api/routers/datasets_router.py
+++ b/api/routers/datasets_router.py
@ -1,8 +1,10 @@
 import asyncio
 from uuid import uuid4
 from fastapi import APIRouter, HTTPException
+import httpx
 import requests
 from sqlalchemy import text
+from sqlalchemy.exc import SQLAlchemyError
 from database.connection import engine
 from services.datasets.delete import delete_dataset_from_partition  # import fungsi di atas
 from response import successRes, errorRes
@ -10,7 +12,7 @@ from services.datasets.publish_geonetwork import publish_metadata
 from services.datasets.publish_geoserver import publish_layer_to_geoserver
 from services.datasets.metadata import update_job_status
 from services.upload_file.upload_ws import report_progress
-from core.config import GEOSERVER_URL, GEOSERVER_USER, GEOSERVER_PASS, QGIS_URL
+from core.config import GEOSERVER_URL, GEOSERVER_USER, GEOSERVER_PASS, QGIS_URL, MAIN_API_URL, SERVICE_KEY

 router = APIRouter()

@ -207,3 +209,110 @@ def get_style(style_name: str, workspace: str = None):

    except requests.exceptions.RequestException as e:
        raise HTTPException(status_code=500, detail=f"Request error: {str(e)}")
+    
+
+# =============================================================
+# cleansing query
+# =============================================================
+
+# async def query_cleansing_data_fn(
+#     table_name: str
+# ):
+#     try:
+#         async with engine.begin() as conn:
+#             await conn.execute(
+#                 text("SELECT public.fn_cleansing_satupeta_polygon(:table_name)"),
+#                 {"table_name": table_name}
+#             )
+#         return "done"
+
+#     except SQLAlchemyError as e:
+#         raise RuntimeError(f"Fix geometry failed: {str(e)}")
+
+async def query_cleansing_data(
+    table_name: str
+):
+    try:
+        async with engine.begin() as conn:
+            await conn.execute(
+                text("CALL pr_cleansing_satupeta_polygon(:table_name, NULL);"),
+                {"table_name": table_name}
+            )
+        return "done"
+
+    except SQLAlchemyError as e:
+        raise RuntimeError(f"Fix geometry failed: {str(e)}")
+    
+
+
+async def publish_layer(table_name: str, job_id: str):
+        # await asyncio.sleep(10)
+    try:
+        await report_progress(job_id, "cleansing", 50, "Cleansing data selesai")
+        # await asyncio.sleep(5)
+
+        geos_link = publish_layer_to_geoserver(table_name, job_id)
+        await report_progress(job_id, "publish_geoserver", 80, "Publish GeoServer selesai")
+        # await asyncio.sleep(3)
+
+        uuid = publish_metadata(
+            table_name=table_name,
+            geoserver_links=geos_link
+        )
+        await report_progress(job_id, "done", 100, "Publish GeoNetwork selesai")
+
+        update_job_status(table_name, "FINISHED", job_id)
+        # return uuid
+        return {
+            "geos_link": geos_link["layer_url"],
+            "uuid": uuid
+        }
+
+    except Exception as e:
+        update_job_status(table_name, "FAILED", job_id)
+        raise RuntimeError(f"Publish layer gagal: {e}") from e
+
+
+
+
+
+
+
+async def upload_to_main(payload: dict):
+    try:
+        async with httpx.AsyncClient(timeout=10) as client:
+            response = await client.post(
+                MAIN_API_URL+"/api/internal/mapsets", 
+                json=payload,
+                headers={
+                    "X-SERVICE-KEY": SERVICE_KEY
+                }
+            )
+            response.raise_for_status()
+
+        print("GOOOOO")
+        return {
+            "status": "success",
+            "data": response.json()
+        }
+
+    except httpx.RequestError as e:
+        # error koneksi, DNS, timeout, dll
+        raise HTTPException(
+            status_code=500,
+            detail=f"Gagal connect ke MAIN_API: {str(e)}"
+        )
+
+    except httpx.HTTPStatusError as e:
+        # API tujuan balas tapi error (4xx / 5xx)
+        raise HTTPException(
+            status_code=e.response.status_code,
+            detail=f"MAIN_API error: {e.response.text}"
+        )
+
+    except Exception as e:
+        # fallback kalau ada error lain
+        raise HTTPException(
+            status_code=500,
+            detail=f"Unexpected error: {str(e)}"
+        )
--- a/core/config.py
+++ b/core/config.py
@ -6,6 +6,9 @@ load_dotenv()

 API_VERSION = "2.1.3"

+MAIN_API_URL = os.getenv("MAIN_API_URL")
+SERVICE_KEY = os.getenv("SERVICE_KEY")
+
 POSTGIS_URL = os.getenv("POSTGIS_URL")
 POSTGIS_SYNC_URL = os.getenv("SYNC_URL")

@ -26,6 +29,7 @@ UPLOAD_FOLDER = Path(os.getenv("UPLOAD_FOLDER", "./uploads"))
 MAX_FILE_MB = int(os.getenv("MAX_FILE_MB", 30))

 ALLOWED_ORIGINS = [
+    "http://localhost:4000",
    "http://localhost:3000",
    "http://127.0.0.1:3000",
    "http://localhost:5173",
--- a/services/auth/login.py
+++ b/services/auth/login.py
@ -24,9 +24,9 @@ async def loginService(username: str, password: str, db: AsyncSession):
    if not pwd_context.verify(password, user.password_hash):
        raise errorRes(status_code=401, message="Invalid username or password")

-    # Validation for institution user
-    if user.role != "admin" and not user.institution_id:
-        raise errorRes(status_code=403, message="User must belong to an institution")
+    # Validation for organization user
+    if user.role != "admin" and not user.organization_id:
+        raise errorRes(status_code=403, message="User must belong to an organization")

    # Generate single active token
    token = str(uuid4())
@ -41,7 +41,7 @@ async def loginService(username: str, password: str, db: AsyncSession):
        "status": "success",
        "username": user.username,
        "role": user.role,
-        "institution_id": user.institution_id,
+        "organization_id": user.organization_id,
        "token": token,
        "token_expired_at": expiry.isoformat()
    }
--- a/services/datasets/metadata.py
+++ b/services/datasets/metadata.py
@ -4,7 +4,6 @@ from database.connection import sync_engine
 from utils.logger_config import log_activity

 def update_job_status(table_name: str, status: str, job_id: str = None):
-    print("update status")
    query = text("""
        UPDATE backend.author_metadata
        SET process = :status,
--- a/services/datasets/publish_geonetwork.py
+++ b/services/datasets/publish_geonetwork.py
@ -7,6 +7,22 @@ from datetime import datetime
 from uuid import uuid4
 import re

+
+
+def create_gn_session():
+    session = requests.Session()
+    session.auth = (GEONETWORK_USER, GEONETWORK_PASS)
+
+    session.get(f"{GEONETWORK_URL}/srv/eng/info?type=me")
+    xsrf_token = session.cookies.get("XSRF-TOKEN")
+
+    if not xsrf_token:
+        raise Exception("XSRF token missing")
+
+    return session, xsrf_token
+
+
+
 def escape_url_params(url: str) -> str:
    """
    Escape karakter berbahaya di dalam URL agar valid dalam XML.
@ -73,30 +89,6 @@ def get_extent(table_name: str):
        "ymax": -5.4819     # north
    }

-
-# def get_author_metadata(table_name: str):
-
-#     sql = """
-#         SELECT table_title, dataset_title, dataset_abstract, keywords, date_created,
-#                organization_name, contact_person_name, 
-#                contact_email, contact_phone, geom_type
-#         FROM backend.author_metadata
-#         WHERE table_title = :table
-#         LIMIT 1
-#     """
-
-#     conn = engine.connect()
-#     try:
-#         row = conn.execute(text(sql), {"table": table_name}).fetchone()
-#     finally:
-#         conn.close()
-
-#     if not row:
-#         raise Exception(f"Tidak ada metadata untuk tabel: {table_name}")
-
-#     # FIX: SQLAlchemy Row → dict
-#     return dict(row._mapping)
-
 def get_author_metadata(table_name: str):

    sql = """
@ -356,10 +348,10 @@ def generate_metadata_xml(table_name, meta, extent, geoserver_links):
 			<gmd:pointOfContact>
 				<gmd:CI_ResponsibleParty>
 					<gmd:individualName>
-						<gco:CharacterString>Dinas Tenaga Kerja dan Transmigrasi Provinsi Jawa Timur</gco:CharacterString>
+						<gco:CharacterString>Lab AI Polinema</gco:CharacterString>
 					</gmd:individualName>
 					<gmd:organisationName>
-						<gco:CharacterString>Dinas Tenaga Kerja dan Transmigrasi Provinsi Jawa Timur</gco:CharacterString>
+						<gco:CharacterString>Lab AI Polinema</gco:CharacterString>
 					</gmd:organisationName>
 					<gmd:positionName gco:nilReason="missing"/>
 					<gmd:contactInfo>
@ -510,9 +502,7 @@ def generate_metadata_xml(table_name, meta, extent, geoserver_links):
 					<gmd:onLine>
 						<gmd:CI_OnlineResource>
 							<gmd:linkage>
-								<gmd:URL>
-                  https://geoserver.jatimprov.go.id/wms?service=WMS&amp;version=1.1.0&amp;request=GetMap&amp;layers=satupeta:fngsl_trw_1_4_2020_2023&amp;bbox=110.89527893066406,-8.78022289276123,116.27019500732422,-5.042964935302734&amp;width=768&amp;height=534&amp;srs=EPSG:4326&amp;styles=&amp;format=application/openlayers
-                </gmd:URL>
+								<gmd:URL>{geoserver_links["wms_url"]}</gmd:URL>
 							</gmd:linkage>
 							<gmd:protocol>
 								<gco:CharacterString>WWW:LINK-1.0-http--link</gco:CharacterString>
@ -578,21 +568,23 @@ def generate_metadata_xml(table_name, meta, extent, geoserver_links):
 """


+# Geonetwork version 4.4.9.0
 def upload_metadata_to_geonetwork(xml_metadata: str):
-    session = requests.Session()
-    session.auth = (GEONETWORK_USER, GEONETWORK_PASS)
+    # session = requests.Session()
+    # session.auth = (GEONETWORK_USER, GEONETWORK_PASS)

-    # 1. Get XSRF token
-    try:
-        info_url = f"{GEONETWORK_URL}/srv/eng/info?type=me"
-        session.get(info_url)
-    except requests.exceptions.RequestException as e:
-        raise HTTPException(status_code=503, detail=f"Failed to connect to GeoNetwork: {e}")
+    # # 1. Get XSRF token
+    # try:
+    #     info_url = f"{GEONETWORK_URL}/srv/eng/info?type=me"
+    #     session.get(info_url)
+    # except requests.exceptions.RequestException as e:
+    #     raise HTTPException(status_code=503, detail=f"Failed to connect to GeoNetwork: {e}")

-    xsrf_token = session.cookies.get('XSRF-TOKEN')
-    if not xsrf_token:
-        raise HTTPException(status_code=500, detail="Could not retrieve XSRF-TOKEN from GeoNetwork.")
+    # xsrf_token = session.cookies.get('XSRF-TOKEN')
+    # if not xsrf_token:
+    #     raise HTTPException(status_code=500, detail="Could not retrieve XSRF-TOKEN from GeoNetwork.")

+    session, xsrf_token = create_gn_session()
    headers = {
        'X-XSRF-TOKEN': xsrf_token,
        'Accept': 'application/json'
@ -605,15 +597,32 @@ def upload_metadata_to_geonetwork(xml_metadata: str):
        'file': ('metadata.xml', xml_metadata, 'application/xml')
    }
    
+    params = {
+		"ownerGroup": 1, # all
+		"ownerUser": 1	 # admin
+	}
+
    response = session.post(
        GN_API_RECORDS_URL,
+        params=params,
        files=files,
        headers=headers,
        cookies=session.cookies.get_dict()
    )
    
-    # print("response", response.json())
-    return response.json()
+    metadata_infos = response.json().get("metadataInfos", {})
+    uuid = None
+    for records in metadata_infos.values():
+         if records and isinstance(records, list):
+            uuid = records[0].get("uuid")
+            break
+    if not uuid:
+        raise ValueError("UUID not found in GeoNetwork response")
+
+    publish_record(session, uuid)
+    
+	# print("response", response.json())
+    return uuid

    

@ -629,11 +638,59 @@ def publish_metadata(table_name: str, geoserver_links: dict):
    )

    xml_clean = fix_xml_urls(xml)
-    response = upload_metadata_to_geonetwork(xml_clean)
+    uuid = upload_metadata_to_geonetwork(xml_clean)
    
-    uuid = response.get("uuid")
    print(f"[GeoNetwork] Metadata uploaded. UUID = {uuid}")

    return uuid


+
+def publish_record(session, uuid):
+    print('[uuid]', uuid)
+    xsrf_token = session.cookies.get('XSRF-TOKEN')
+
+    headers = {
+        "X-XSRF-TOKEN": xsrf_token,
+        "Accept": "application/json",
+        "Content-Type": "application/json"
+    }
+
+    url = f"{GEONETWORK_URL}/srv/api/records/{uuid}/sharing"
+
+    payload = {
+        "clear": True,
+        "privileges": [
+            {
+                "group": 1, 
+                "operations": {
+                    "view": True
+                }
+            }
+        ]
+    }
+
+    response = session.put(url, json=payload, headers=headers)
+    response.raise_for_status()
+
+
+# single stand func
+# def publish_record(uuid):
+#     session, xsrf_token = create_gn_session()
+
+#     headers = {
+#         "X-XSRF-TOKEN": xsrf_token,
+#         "Content-Type": "application/json"
+#     }
+
+#     url = f"{GEONETWORK_URL}/srv/api/records/{uuid}/sharing"
+
+#     payload = {
+#         "clear": True,
+#         "privileges": [
+#             {"group": 1, "operations": {"view": True}}
+#         ]
+#     }
+
+#     resp = session.put(url, json=payload, headers=headers)
+#     resp.raise_for_status()
--- a/services/datasets/publish_geoserver.py
+++ b/services/datasets/publish_geoserver.py
@ -52,18 +52,52 @@ def publish_layer_to_geoserver(table: str, job_id: str):
        print(f"[WARNING] SLD file tidak ditemukan: {sld_file}")
    else:
        print(f"[GeoServer] Upload SLD {sld_file}")
-        style_url = f"{GEOSERVER_URL}/rest/styles"

-        with open(sld_file, "rb") as sld:
-            requests.post(
-                f"{style_url}?name={style_name}&workspace={GEOSERVER_WORKSPACE}",
-                auth=(GEOSERVER_USER, GEOSERVER_PASS),
-                headers={"Content-Type": "application/vnd.ogc.sld+xml"},
-                data=sld.read()
+        #old
+        # style_url = f"{GEOSERVER_URL}/rest/styles"
+
+        # with open(sld_file, "rb") as sld:
+        #     requests.post(
+        #         f"{style_url}?name={style_name}&workspace={GEOSERVER_WORKSPACE}",
+        #         auth=(GEOSERVER_USER, GEOSERVER_PASS),
+        #         headers={"Content-Type": "application/vnd.ogc.sld+xml"},
+        #         data=sld.read()
+        #     )
+
+        # print(f"[GeoServer] SLD uploaded: {style_name}")
+
+
+
+        #new
+        style_url = (
+            f"{GEOSERVER_URL}/rest/workspaces/"
+            f"{GEOSERVER_WORKSPACE}/styles"
+        )
+        
+        with open(sld_file, "r", encoding="utf-8") as f:
+            sld_content = f.read()
+
+        # 🔥 INI BARIS PENTINGNYA
+        sld_content = sld_content.lstrip("\ufeff \t\r\n")
+
+        resp = requests.post(
+            f"{style_url}?name={style_name}",
+            auth=(GEOSERVER_USER, GEOSERVER_PASS),
+            headers={"Content-Type": "application/vnd.ogc.sld+xml"},
+            data=sld_content.encode("utf-8")
+        )
+
+
+        if resp.status_code not in (200, 201):
+            raise Exception(
+                f"Upload SLD gagal ({resp.status_code}): {resp.text}"
            )

        print(f"[GeoServer] SLD uploaded: {style_name}")

+
+
+
        # ==========================================
        # 3. Apply SLD to the layer
        # ==========================================
@ -116,14 +150,29 @@ def publish_layer_to_geoserver(table: str, job_id: str):
        f"{GEOSERVER_URL}/{GEOSERVER_WORKSPACE}/wfs?"
        f"service=WFS&request=GetFeature&typeName={GEOSERVER_WORKSPACE}:{table}"
    )
-    print(f"[GeoServer] WMS URL: {wms_link}")
-    print(f"[GeoServer] WFS URL: {wfs_link}")
-    print(f"[GeoServer] Reload completed. Layer {table} ready.")
+    # print(f"[GeoServer] WMS URL: {wms_link}")
+    # print(f"[GeoServer] WFS URL: {wfs_link}")
+    # print(f"[GeoServer] Reload completed. Layer {table} ready.")
+    openlayer_url = (
+        f"{GEOSERVER_URL}/{GEOSERVER_WORKSPACE}/wms?"
+        f"service=WMS"
+        f"&version=1.1.0"
+        f"&request=GetMap"
+        f"&layers={GEOSERVER_WORKSPACE}:{table}"
+        f"&styles="
+        f"&bbox=110.89528623700005%2C-8.780412043999945%2C116.26994997700001%2C-5.042971664999925"
+        f"&width=768"
+        f"&height=384"
+        f"&srs=EPSG:4326"
+        f"&format=application/openlayers"
+    )
+
    return {
        "table": table,
        "style": style_name,
        "wms_url": wms_link,
-        "wfs_url": wfs_link
+        "wfs_url": wfs_link,
+        "layer_url": openlayer_url
    }


--- a/services/upload_file/readers/reader_pdf.py
+++ b/services/upload_file/readers/reader_pdf.py
@ -1,7 +1,7 @@
 import re
 import pdfplumber
 import pandas as pd
-from services.upload_file.utils.pdf_cleaner import row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column
+from services.upload_file.utils.pdf_cleaner import get_number_column_index, get_start_end_number, normalize_number_column, row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column
 from services.upload_file.upload_exceptions import PDFReadError
 from utils.logger_config import setup_logger

@ -56,6 +56,41 @@ def merge_multiline_header(header_rows):
    final_header = [v for v in final_header if v not in ['', None]]
    return final_header

+def merge_parsed_table(tables):
+    roots = []
+    fragments = []
+
+    # STEP 1: klasifikasi
+    for table in tables:
+        num_idx = get_number_column_index(table["columns"])
+        if num_idx is None:
+            roots.append(table)
+            continue
+
+        start_no, _ = get_start_end_number(table["rows"], num_idx)
+        if start_no == 1:
+            roots.append(table)
+        else:
+            fragments.append(table)
+
+    # STEP 2: merge fragment ke root
+    for frag in fragments:
+        frag_idx = get_number_column_index(frag["columns"])
+        f_start, _ = get_start_end_number(frag["rows"], frag_idx)
+
+        for root in roots:
+            if root["columns"] != frag["columns"]:
+                continue
+
+            root_idx = get_number_column_index(root["columns"])
+            _, r_end = get_start_end_number(root["rows"], root_idx)
+
+            if f_start == r_end + 1:
+                root["rows"].extend(frag["rows"])
+                break  # fragment hanya boleh nempel ke 1 root
+
+    return roots
+

 def read_pdf(path: str, page: str):
    """
@ -90,6 +125,74 @@ def read_pdf(path: str, page: str):
    Raises:
        PDFReadError: Jika terjadi kesalahan saat membaca atau parsing PDF.
    """
+    # try:
+    #     pdf_path = path
+    #     selectedPage = page if page else "1"
+    #     tables_data = []
+
+    #     with pdfplumber.open(pdf_path) as pdf:
+    #         total_pages = len(pdf.pages)
+    #         selected_pages = parse_page_selection(selectedPage, total_pages)
+
+    #         logger.info(f"[INFO] Total Halaman PDF: {total_pages}")
+    #         logger.info(f"[INFO] Total Halaman yang dipilih: {len(selected_pages)}")
+    #         logger.info(f"[INFO] Halaman yang dipilih untuk dibaca: {selected_pages}")
+
+    #         for page_num in selected_pages:
+    #             pdf_page = pdf.pages[page_num - 1]
+    #             tables = pdf_page.find_tables()
+    #             logger.info(f"\n\n[INFO] Halaman {page_num}: {len(tables)} tabel terdeteksi")
+
+    #             # pembacaan title ini tidak valid untuk halaman lanscape
+    #             # for line in pdf_page.extract_text_lines():
+    #             #     if line['top'] > tables[0].bbox[1]:
+    #             #         break
+    #             #     previous_line = line
+    #             #     print('[TITLE]', previous_line['text'])
+
+    #             for i, t in enumerate(tables, start=1):
+    #                 table = t.extract()
+    #                 if len(table) > 2:
+    #                     print(f"[TBL] tabel : {i} - halaman {page_num}")
+    #                     tables_data.append(table)
+
+    #     logger.info(f"\nTotal tabel terbaca: {len(tables_data)}\n")
+
+    #     header_only, body_only = [], []
+    #     for tbl in tables_data:
+    #         head, body = detect_header_rows(tbl)
+    #         header_only.append(head)
+    #         body_only.append(body)
+
+    #     clean_header = [merge_multiline_header(h) for h in header_only]
+    #     clean_body = []
+
+    #     for i, raw_body in enumerate(body_only):
+    #         con_body = [[cell for cell in row if cell not in (None, '')] for row in raw_body]
+    #         cleaned = cleaning_column(clean_header[i], [con_body])
+    #         clean_body.append(cleaned[0])
+
+    #     parsed = []
+    #     for i, (cols, rows) in enumerate(zip(clean_header, clean_body), start=1):
+    #         parsed.append({
+    #             "title": str(i),
+    #             "columns": cols,
+    #             "rows": rows
+    #         })
+
+    #     # =================================================================
+
+    #     clean_parsed = filter_geo_admin_column(parsed)
+    #     merge_parsed = merge_parsed_table(clean_parsed)
+
+    #     logger.info(f"\nTotal tabel valid: {len(merge_parsed)}\n")
+
+    #     ordered_tables = [normalize_number_column(t) for t in merge_parsed]
+    #     return ordered_tables
+
+    # except Exception as e:
+    #     raise PDFReadError(f"Gagal membaca PDF: {e}", code=422)
+
    try:
        pdf_path = path
        selectedPage = page if page else "1"
@ -99,26 +202,36 @@ def read_pdf(path: str, page: str):
            total_pages = len(pdf.pages)
            selected_pages = parse_page_selection(selectedPage, total_pages)

-            logger.info(f"[INFO] Total halaman PDF: {total_pages}")
+            logger.info(f"[INFO] Total Halaman PDF: {total_pages}")
+            logger.info(f"[INFO] Total Halaman yang dipilih: {len(selected_pages)}")
            logger.info(f"[INFO] Halaman yang dipilih untuk dibaca: {selected_pages}")

            for page_num in selected_pages:
                pdf_page = pdf.pages[page_num - 1]
                tables = pdf_page.find_tables()
-                logger.info(f"[INFO] Halaman {page_num}: {len(tables)} tabel terdeteksi")
+                logger.info(f"\n\n[INFO] Halaman {page_num}: {len(tables)} tabel terdeteksi")

-                for t in tables:
+                # pembacaan title ini tidak valid untuk halaman lanscape
+                # for line in pdf_page.extract_text_lines():
+                #     if line['top'] > tables[0].bbox[1]:
+                #         break
+                #     previous_line = line
+                #     print('[TITLE]', previous_line['text'])
+
+                for i, t in enumerate(tables, start=1):
                    table = t.extract()
                    if len(table) > 2:
-                        tables_data.append(table)
+                        print(f"[TBL] tabel : {i} - halaman {page_num}")
+                        tables_data.append({"page": f"halaman {page_num} - {i}", "table": table})

-        logger.info(f"\nTotal tabel valid: {len(tables_data)}\n")
+        logger.info(f"\nTotal tabel terbaca: {len(tables_data)}\n")

-        header_only, body_only = [], []
+        header_only, body_only, page_info = [], [], []
        for tbl in tables_data:
-            head, body = detect_header_rows(tbl)
+            head, body = detect_header_rows(tbl["table"])
            header_only.append(head)
            body_only.append(body)
+            page_info.append(tbl["page"])

        clean_header = [merge_multiline_header(h) for h in header_only]
        clean_body = []
@ -129,15 +242,22 @@ def read_pdf(path: str, page: str):
            clean_body.append(cleaned[0])

        parsed = []
-        for i, (cols, rows) in enumerate(zip(clean_header, clean_body), start=1):
+        for i, (cols, rows, page) in enumerate(zip(clean_header, clean_body, page_info), start=1):
            parsed.append({
-                "title": str(i),
+                "title": page,
                "columns": cols,
                "rows": rows
            })

+        # =================================================================
+
        clean_parsed = filter_geo_admin_column(parsed)
-        return clean_parsed
+        merge_parsed = merge_parsed_table(clean_parsed)
+
+        logger.info(f"\nTotal tabel valid: {len(merge_parsed)}\n")
+
+        ordered_tables = [normalize_number_column(t) for t in merge_parsed]
+        return ordered_tables

    except Exception as e:
        raise PDFReadError(f"Gagal membaca PDF: {e}", code=422)
--- a/services/upload_file/upload.py
+++ b/services/upload_file/upload.py
@ -10,17 +10,16 @@ import asyncio
 from pyproj import CRS
 from shapely.geometry.base import BaseGeometry
 from shapely.geometry import base as shapely_base
-from fastapi import File, Form, UploadFile, HTTPException
-from api.routers.datasets_router import cleansing_data
-from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES
+from fastapi import Depends, File, Form, UploadFile, HTTPException
+from api.routers.datasets_router import cleansing_data, publish_layer, query_cleansing_data, upload_to_main
+from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES, GEONETWORK_URL
 from services.upload_file.ai_generate import send_metadata
 from services.upload_file.readers.reader_csv import read_csv
 from services.upload_file.readers.reader_shp import read_shp
 from services.upload_file.readers.reader_gdb import read_gdb
 from services.upload_file.readers.reader_mpk import read_mpk
 from services.upload_file.readers.reader_pdf import convert_df, read_pdf
-from services.upload_file.utils.geometry_detector import detect_and_build_geometry
-from services.upload_file.utils.geometry_detector import attach_polygon_geometry_auto
+from services.upload_file.utils.geometry_detector import detect_and_build_geometry, attach_polygon_geometry_auto
 from services.upload_file.upload_ws import report_progress
 from database.connection import engine, sync_engine
 from database.models import Base
@ -309,7 +308,7 @@ def process_data(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
    }
    ai_suggest = send_metadata(ai_context)
    # ai_suggest = {'judul': 'Peta Risiko Letusan Gunung Arjuna di Provinsi Jawa Timur', 'abstrak': 'Peta ini menggambarkan wilayah berisiko letusan Gunung Arjuna yang berada di Provinsi Jawa Timur. Data disajikan dalam bentuk poligon yang menunjukkan zona risiko berdasarkan analisis potensi aktivitas vulkanik.', 'tujuan': 'Data dapat digunakan untuk perencanaan mitigasi bencana dan pengambilan keputusan di wilayah Jawa Timur.', 'keyword': ['Risiko letusan', 'Gunung Arjuna', 'Bencana alam', 'Provinsi Jawa Timur', 'Geologi'], 'kategori': ['Geoscientific information', 'Environment'], 'kategori_mapset': 'Lingkungan Hidup'}
-    print(ai_suggest)
+    # print(ai_suggest)

    response = {
        "message": "File berhasil dibaca dan dianalisis.",
@ -363,11 +362,11 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
            tbl = read_pdf(tmp_path, page)
            if len(tbl) == 0:
                res = {
-                    "message": "Tidak ditemukan tabel valid",
+                    "message": "Tidak ditemukan tabel valid pada halaman yang dipilih",
                    "tables": {},
                    "file_type": ext
                }
-                return successRes(message="Tidak ditemukan tabel valid", data=res)
+                return successRes(message="Tidak ditemukan tabel valid pada halaman yang dipilih", data=res)
            elif len(tbl) > 1:
                res = {
                    "message": "File berhasil dibaca dan dianalisis.",
@ -389,10 +388,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
                df = read_gdb(str(tmp_path))

            else:
-                raise errorRes(
-                    status_code=400,
-                    message="ZIP file tidak mengandung SHP atau GDB yang valid."
-                )
+                return successRes(message="ZIP file tidak mengandung SHP / GDB valid.")
        else:
            raise errorRes(status_code=400, message="Unsupported file type")

@ -680,17 +676,53 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
        job_id = generate_job_id(str(user_id))
        result = {
            "job_id": job_id,
+            "job_status": "wait",
            "table_name": table_name,
            "status": "success",
            "message": f"Tabel '{table_name}' berhasil dibuat.",
            "total_rows": len(gdf),
            "geometry_type": unified_geom_type,
            "crs": detected_crs,
+            "metadata_uuid": ""
        }
        save_xml_to_sld(payload.style, job_id)

        await report_progress(job_id, "upload", 20, "Upload selesai")
-        cleansing_data(table_name, job_id)
+        # cleansing_data(table_name, job_id)
+
+        cleansing = await query_cleansing_data(table_name)
+        result['job_status'] = cleansing
+        
+        publish = await publish_layer(table_name, job_id)
+        result['metadata_uuid'] = publish['uuid']
+
+        mapset = {
+            "name": payload.title,
+            "description": author.get("abstract"),
+            "scale": "1:25000",
+            "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
+            "category_id": author.get("mapsetCategory"),
+            "data_status": "sementara",
+            "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
+            "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
+            "layer_type": unified_geom_type[0],
+            "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
+            "layer_url": publish['geos_link'],
+            "metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish['uuid']}",
+            "coverage_level": "provinsi",
+            "coverage_area": "kabupaten",
+            "data_update_period": "Tahunan",
+            "data_version": "2026",
+            "is_popular": False,
+            "is_active": True,
+            "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
+            "notes": "Mapset baru dibuat",
+            "status_validation": "on_verification",
+        }
+        
+        print("mapset data",mapset)
+        await upload_to_main(mapset)
+
        return successRes(data=result)

    except Exception as e:
@ -705,6 +737,53 @@ async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):



+# async def handle_to_postgis(payload: UploadRequest, user_id: int = 2):
+#     try:
+#         job_id = generate_job_id(str(user_id))
+#         result = {
+#             "job_id": job_id,
+#             "job_status": "done",
+#             "table_name": "just for test",
+#             "status": "success",
+#             "message": f"Tabel test berhasil dibuat.",
+#             "total_rows": 10,
+#             "geometry_type": "Polygon",
+#             "crs": "EPSG 4326",
+#             "metadata_uuid": "-"
+#         }
+
+#         mapset = {
+#             "name": "Resiko Letusan Gunung Arjuno",
+#             "description": "Testing Automation Upload",
+#             "scale": "1:25000",
+#             "projection_system_id": "0196c746-d1ba-7f1c-9706-5df738679cc7",
+#             "category_id": "0196c80c-855f-77f9-abd0-0c8a30b8c2f5",
+#             "data_status": "sementara",
+#             "classification_id": "01968b4b-d3f9-76c9-888c-ee887ac31ce4",
+#             "producer_id": "019bd4ea-eb33-704e-83c3-8253d457b187",
+#             "layer_type": "polygon",
+#             "source_id": ["019bd4e7-3df8-75c8-9b89-3f310967649c"],
+#             "layer_url": "http://192.168.60.24:8888/geoserver/wms?service=WMS&version=1.1.0&request=GetMap&layers=labai:risiko_letusan_gunung_arjuno_bromo&bbox=110.89528623700005,-8.780412043999945,116.26994997700001,-5.042971664999925&width=768&height=534&srs=EPSG:4326&styles=&format=application/openlayers",
+#             "metadata_url": "http://192.168.60.24:7777/geonetwork/srv/eng/catalog.search#/metadata/9e5e2f09-13ef-49b5-bb49-1cb12136f63b",
+#             "coverage_level": "provinsi",
+#             "coverage_area": "kabupaten",
+#             "data_update_period": "Tahunan",
+#             "data_version": "2026",
+#             "is_popular": False,
+#             "is_active": True,
+#             "regional_id": "01968b53-a910-7a67-bd10-975b8923b92e",
+#             "notes": "Mapset baru dibuat",
+#             "status_validation": "on_verification",
+#         }
+        
+#         await upload_to_main(mapset)
+
+#         return successRes(data=result)
+
+#     except Exception as e:
+#         print("errot", e)
+
+



--- a/services/upload_file/utils/pdf_cleaner.py
+++ b/services/upload_file/utils/pdf_cleaner.py
@ -157,3 +157,52 @@ def is_short_text_row(row):
    text_only = all(not is_number(c) for c in non_empty)
    joined = " ".join(non_empty)
    return text_only and len(non_empty) <= 2 and len(joined) < 20
+
+
+
+
+
+
+
+
+def get_number_column_index(columns):
+    for i, col in enumerate(columns):
+        if has_number_header(col):
+            return i
+    return None
+
+def get_start_end_number(rows, idx):
+    try:
+        start_no = int(rows[0][idx])
+        end_no = int(rows[-1][idx])
+        return start_no, end_no
+    except:
+        return None, None
+
+def normalize_number_column(table):
+    columns = table["columns"]
+    rows = table["rows"]
+
+    num_idx = get_number_column_index(columns)
+    if num_idx is None:
+        return table
+
+    current = None
+
+    for row in rows:
+        try:
+            val = int(row[num_idx])
+        except:
+            continue
+
+        if current is None:
+            current = val
+        else:
+            if val <= current:
+                current += 1
+            else:
+                current = val
+
+        row[num_idx] = str(current)
+
+    return table