diff --git a/services/datasets/pub.py b/services/datasets/pub.py new file mode 100644 index 0000000..c58b975 --- /dev/null +++ b/services/datasets/pub.py @@ -0,0 +1,679 @@ +from fastapi import HTTPException +import requests +from sqlalchemy import text +from core.config import GEONETWORK_PASS, GEONETWORK_URL, GEONETWORK_USER +from database.connection import sync_engine as engine +from datetime import datetime +from uuid import uuid4 +import re + + + +def create_gn_session(): + session = requests.Session() + session.auth = (GEONETWORK_USER, GEONETWORK_PASS) + + session.get(f"{GEONETWORK_URL}/srv/eng/info?type=me") + xsrf_token = session.cookies.get("XSRF-TOKEN") + + if not xsrf_token: + raise Exception("XSRF token missing") + + return session, xsrf_token + + + +def escape_url_params(url: str) -> str: + """ + Escape karakter berbahaya di dalam URL agar valid dalam XML. + Khususnya mengganti '&' menjadi '&' kecuali jika sudah '&'. + """ + # Ganti semua & yang bukan bagian dari & + url = re.sub(r'&(?!amp;)', '&', url) + return url + + +def fix_xml_urls(xml: str) -> str: + """ + Temukan semua ... dalam XML dan escape URL-nya. + """ + def replacer(match): + original = match.group(1).strip() + fixed = escape_url_params(original) + return f"{fixed}" + + # Replace semua ... + xml_fixed = re.sub( + r"(.*?)", + replacer, + xml, + flags=re.DOTALL + ) + + return xml_fixed + + + +def get_extent(table_name: str): + + sql = f""" + SELECT + ST_XMin(extent), ST_YMin(extent), + ST_XMax(extent), ST_YMax(extent) + FROM ( + SELECT ST_Extent(geom) AS extent + FROM public.{table_name} + ) AS box; + """ + + conn = engine.connect() + try: + row = conn.execute(text(sql)).fetchone() + finally: + conn.close() + + if not row or row[0] is None: + return None + + # return { + # "xmin": float(row[0]), + # "ymin": float(row[1]), + # "xmax": float(row[2]), + # "ymax": float(row[3]) + # } + + return { + "xmin": 110.1372, # west + "ymin": -9.3029, # south + "xmax": 114.5287, # east + "ymax": -5.4819 # north + } + +def get_author_metadata(table_name: str): + + sql = """ + SELECT am.table_title, am.dataset_title, am.dataset_abstract, am.keywords, am.date_created, + am.organization_name, am.contact_person_name, am.created_at, + am.contact_email, am.contact_phone, am.geom_type, + u.organization_id, + o.address AS organization_address, + o.email AS organization_email, + o.phone_number AS organization_phone + FROM backend.author_metadata AS am + LEFT JOIN backend.users u ON am.user_id = u.id + LEFT JOIN backend.organizations o ON u.organization_id = o.id + WHERE am.table_title = :table + LIMIT 1 + """ + + conn = engine.connect() + try: + row = conn.execute(text(sql), {"table": table_name}).fetchone() + finally: + conn.close() + + if not row: + raise Exception(f"Tidak ada metadata untuk tabel: {table_name}") + + return dict(row._mapping) + + +def map_geom_type(gtype): + + if gtype is None: + return "surface" + + # Jika LIST → ambil elemen pertama + if isinstance(gtype, list): + if len(gtype) > 0: + gtype = gtype[0] + else: + return "surface" + + # Setelah pasti string + gtype = str(gtype).lower() + + if "polygon" in gtype or "multi" in gtype: + return "surface" + if "line" in gtype: + return "curve" + if "point" in gtype: + return "point" + + return "surface" + + +def generate_metadata_xml(table_name, meta, extent, geoserver_links): + + keywords_xml = "".join([ + f""" + {kw.strip()} + """ for kw in meta["keywords"].split(",") + ]) + + geom_type_code = map_geom_type(meta["geom_type"]) + print('type', geom_type_code) + uuid = str(uuid4()) + + return f""" + + + {uuid} + + + + + + + + + + + + + + {meta['contact_person_name']} + + + {meta['organization_name']} + + + + + + + {meta['organization_phone']} + + + {meta['organization_phone']} + + + + + + + {meta['organization_address']} + + + Surabaya + + + Jawa Timur + + + Indonesia + + + {meta['organization_email']} + + + + + 08.00-16.00 + + + + + + + + + + {datetime.utcnow().isoformat()}+07:00 + + + ISO 19115:2003/19139 + + + 1.0 + + + + + + + + + + 38 + + + + + + + + + + + 4326 + + + EPSG + + + + + + + + + + + {meta['dataset_title']} + + + + + {meta['created_at'].isoformat()}+07:00 + + + + + + + + {meta['date_created'].year} + + + + + {meta['contact_person_name']} + + + {meta['organization_name']} + + + + + + + {meta['organization_phone']} + + + {meta['organization_phone']} + + + + + + + {meta['organization_address']} + + + Surabaya + + + Indonesia + + + {meta['organization_email']} + + + + + 08.00-16.00 + + + + + + + + + + Timezone: UTC+7 (Asia/Jakarta) + + + + + {meta['dataset_abstract']} + + + {meta['dataset_abstract']} + + + + + + + + Dinas Tenaga Kerja dan Transmigrasi Provinsi Jawa Timur + + + Dinas Tenaga Kerja dan Transmigrasi Provinsi Jawa Timur + + + + + + + + {meta['organization_phone']} + + + {meta['organization_phone']} + + + + + + + {meta['organization_address']} + + + Surabaya + + + Jawa Timur + + + Indonesia + + + {meta['organization_email']} + + + + + + + + + + + + + + + + + + + + {keywords_xml} + + + + + + + + + + + + Penggunaan data harus mencantumkan sumber: {meta['organization_name']}. + + + + + + + + + + + + 25000 + + + + + + + + + + + + + + + + {extent['xmin']} + {extent['xmax']} + {extent['ymin']} + {extent['ymax']} + + + + + + + + + + true + + + + + + {meta['dataset_title']} + + + + + {meta['created_at'].isoformat()}+07:00 + + + + + + + + {meta['date_created'].year} + + + + + + + + + + + + + {geoserver_links["wms_url"]} + + + DB:POSTGIS + + + {meta["dataset_title"]} + + + {meta["dataset_title"]} + + + + + + + {geoserver_links["wms_url"]} + + + WWW:LINK-1.0-http--link + + + {meta["dataset_title"]} + + + {meta["dataset_title"]} + + + + + + + {geoserver_links["wms_url"]} + + + OGC:WMS + + + {meta["dataset_title"]} + + + + + + + + {geoserver_links["wfs_url"]} + + + OGC:WFS + + + {meta["dataset_title"]} + + + + + + + + + + + + + + + + + + + + Data dihasilkan dari digitasi peta dasar skala 1:25000 menggunakan QGIS. + + + + + + +""" + + +# Geonetwork version 4.4.9.0 +def upload_metadata_to_geonetwork(xml_metadata: str): + # session = requests.Session() + # session.auth = (GEONETWORK_USER, GEONETWORK_PASS) + + # # 1. Get XSRF token + # try: + # info_url = f"{GEONETWORK_URL}/srv/eng/info?type=me" + # session.get(info_url) + # except requests.exceptions.RequestException as e: + # raise HTTPException(status_code=503, detail=f"Failed to connect to GeoNetwork: {e}") + + # xsrf_token = session.cookies.get('XSRF-TOKEN') + # if not xsrf_token: + # raise HTTPException(status_code=500, detail="Could not retrieve XSRF-TOKEN from GeoNetwork.") + + session, xsrf_token = create_gn_session() + + headers = { + 'X-XSRF-TOKEN': xsrf_token, + 'Accept': 'application/json' + } + + GN_API_RECORDS_URL = f"{GEONETWORK_URL}/srv/api/records" + + # 2. GeoNetwork requires a multipart/form-data upload + files = { + 'file': ('metadata.xml', xml_metadata, 'application/xml') + } + + params = { + "ownerGroup": 1, # all + "ownerUser": 1 # admin + } + + response = session.post( + GN_API_RECORDS_URL, + params=params, + files=files, + headers=headers, + cookies=session.cookies.get_dict() + ) + + metadata_infos = response.json().get("metadataInfos", {}) + uuid = None + for records in metadata_infos.values(): + if records and isinstance(records, list): + uuid = records[0].get("uuid") + break + if not uuid: + raise ValueError("UUID not found in GeoNetwork response") + + record = publish_record(session, uuid) + print('[record]', record) + + # print("response", response.json()) + return response.json() + + + +def publish_metadata(table_name: str, geoserver_links: dict): + + extent = get_extent(table_name) + meta = get_author_metadata(table_name) + xml = generate_metadata_xml( + table_name=meta["dataset_title"], + meta=meta, + extent=extent, + geoserver_links=geoserver_links + ) + + xml_clean = fix_xml_urls(xml) + response = upload_metadata_to_geonetwork(xml_clean) + + uuid = response.get("uuid") + print(f"[GeoNetwork] Metadata uploaded. UUID = {uuid}") + + return uuid + + + +def publish_record(session, uuid): + print('[uuid]', uuid) + xsrf_token = session.cookies.get('XSRF-TOKEN') + + headers = { + "X-XSRF-TOKEN": xsrf_token, + "Accept": "application/json", + "Content-Type": "application/json" + } + + url = f"{GEONETWORK_URL}/srv/api/records/{uuid}/sharing" + + payload = { + "clear": True, + "privileges": [ + { + "group": 1, + "operations": { + "view": True + } + } + ] + } + + response = session.put(url, json=payload, headers=headers) + response.raise_for_status() + return response.json() +