diff --git a/.DS_Store b/.DS_Store
old mode 100644
new mode 100755
diff --git a/.env.example b/.env.example
old mode 100644
new mode 100755
diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
index 0a19790..161b876
--- a/.gitignore
+++ b/.gitignore
@@ -172,3 +172,6 @@ cython_debug/
# PyPI configuration file
.pypirc
+
+
+tests/
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
old mode 100644
new mode 100755
diff --git a/.python-version b/.python-version
old mode 100644
new mode 100755
diff --git a/Dockerfile b/Dockerfile
old mode 100644
new mode 100755
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
diff --git a/addons.txt b/addons.txt
new file mode 100755
index 0000000..74121b4
--- /dev/null
+++ b/addons.txt
@@ -0,0 +1,11 @@
+pandas = "^3.0.0"
+geopandas = "^1.1.2"
+fiona = "^1.10.1"
+numpy = "^2.4.2"
+pdfplumber = "^0.11.9"
+py7zr = "^1.1.0"
+pyogrio = "^0.12.1"
+rapidfuzz = "^3.14.3"
+requests = "^2.32.5"
+openpyxl = "^3.1.5"
+pyarrow = "21.0.0"
\ No newline at end of file
diff --git a/alembic.ini b/alembic.ini
old mode 100644
new mode 100755
diff --git a/app/__init__.py b/app/__init__.py
old mode 100644
new mode 100755
diff --git a/app/api/dependencies/__init__.py b/app/api/dependencies/__init__.py
old mode 100644
new mode 100755
diff --git a/app/api/dependencies/auth.py b/app/api/dependencies/auth.py
old mode 100644
new mode 100755
diff --git a/app/api/dependencies/database.py b/app/api/dependencies/database.py
old mode 100644
new mode 100755
diff --git a/app/api/dependencies/factory.py b/app/api/dependencies/factory.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/__init__.py b/app/api/v1/__init__.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/__init__.py b/app/api/v1/routes/__init__.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/auth_route.py b/app/api/v1/routes/auth_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/category_route.py b/app/api/v1/routes/category_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/classification_route.py b/app/api/v1/routes/classification_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/count_route.py b/app/api/v1/routes/count_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/credential_route.py b/app/api/v1/routes/credential_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/feedback_route.py b/app/api/v1/routes/feedback_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/file_route.py b/app/api/v1/routes/file_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/geonetwork_route.py b/app/api/v1/routes/geonetwork_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/map_projection_system_route.py b/app/api/v1/routes/map_projection_system_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/map_source_route.py b/app/api/v1/routes/map_source_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/mapset_history_route.py b/app/api/v1/routes/mapset_history_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/mapset_route.py b/app/api/v1/routes/mapset_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/news_route.py b/app/api/v1/routes/news_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/organization_route.py b/app/api/v1/routes/organization_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/regional_route.py b/app/api/v1/routes/regional_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/role_route.py b/app/api/v1/routes/role_route.py
old mode 100644
new mode 100755
diff --git a/app/api/v1/routes/user_route.py b/app/api/v1/routes/user_route.py
old mode 100644
new mode 100755
diff --git a/app/core/__init__.py b/app/core/__init__.py
old mode 100644
new mode 100755
diff --git a/app/core/config.py b/app/core/config.py
old mode 100644
new mode 100755
diff --git a/app/core/data_types.py b/app/core/data_types.py
old mode 100644
new mode 100755
diff --git a/app/core/database.py b/app/core/database.py
old mode 100644
new mode 100755
diff --git a/app/core/exceptions.py b/app/core/exceptions.py
old mode 100644
new mode 100755
diff --git a/app/core/minio_client.py b/app/core/minio_client.py
old mode 100644
new mode 100755
diff --git a/app/core/params.py b/app/core/params.py
old mode 100644
new mode 100755
diff --git a/app/core/responses.py b/app/core/responses.py
old mode 100644
new mode 100755
diff --git a/app/core/security.py b/app/core/security.py
old mode 100644
new mode 100755
diff --git a/app/main.py b/app/main.py
old mode 100644
new mode 100755
diff --git a/app/mapset_pipeline/__init__,.py b/app/mapset_pipeline/__init__,.py
new file mode 100755
index 0000000..e69de29
diff --git a/app/mapset_pipeline/api/router.py b/app/mapset_pipeline/api/router.py
new file mode 100755
index 0000000..cf644ce
--- /dev/null
+++ b/app/mapset_pipeline/api/router.py
@@ -0,0 +1,41 @@
+# services/file_pipeline/router.py
+from fastapi import APIRouter, Depends, File, UploadFile, Form
+from .schemas import UploadRequest, PdfRequest
+from app.mapset_pipeline.service import handle_file_analysis, process_pdf_file, execute_postgis_ingestion
+from app.response.res import successRes, errorRes
+
+router = APIRouter(prefix="/pipeline", tags=["File Pipeline"])
+
+@router.post("/analyze")
+async def upload_file(
+ file: UploadFile = File(...),
+ page: str = Form(""),
+ sheet: str = Form(""),
+ fileDesc: str = Form("")
+):
+ try:
+ data = await handle_file_analysis(file, page, sheet, fileDesc)
+ return successRes(data=data)
+ except Exception as e:
+ return errorRes(message="Upload failed", details=str(e), status_code=500)
+
+
+@router.post("/analyze/pdf")
+async def upload_file(
+ payload: PdfRequest
+):
+ try:
+ res = await process_pdf_file(payload)
+ return res
+ except Exception as e:
+ return errorRes(message="Upload failed", details=str(e), status_code=500)
+
+
+@router.post("/publish")
+async def process_to_postgis(payload: UploadRequest):
+ # user_id bisa diambil dari dependency injection auth
+ try:
+ data = await execute_postgis_ingestion(payload, user_id=2)
+ return successRes(data=data)
+ except Exception as e:
+ return errorRes(message="Processing failed", details=str(e), status_code=500)
\ No newline at end of file
diff --git a/app/mapset_pipeline/api/schemas.py b/app/mapset_pipeline/api/schemas.py
new file mode 100755
index 0000000..7b1d9fe
--- /dev/null
+++ b/app/mapset_pipeline/api/schemas.py
@@ -0,0 +1,17 @@
+from pydantic import BaseModel
+from typing import List, Dict, Any
+
+class PdfRequest(BaseModel):
+ title: str
+ columns: List[str]
+ rows: List[List]
+ fileName: str
+ fileDesc: str
+
+class UploadRequest(BaseModel):
+ title: str
+ path: str
+ rows: List[dict]
+ columns: List[str]
+ author: Dict[str, Any]
+ style: str
\ No newline at end of file
diff --git a/app/mapset_pipeline/core/clients/ai_client.py b/app/mapset_pipeline/core/clients/ai_client.py
new file mode 100755
index 0000000..df3b65d
--- /dev/null
+++ b/app/mapset_pipeline/core/clients/ai_client.py
@@ -0,0 +1,49 @@
+import requests
+from typing import Dict, Any
+from app.core.config import GEN_AI_URL
+
+URL = GEN_AI_URL
+
+
+def generate_metadata(payload: Dict[str, Any]) -> Dict[str, Any]:
+ headers = {
+ "Content-Type": "application/json",
+ "API_KEY": "testsatupeta"
+ }
+
+ try:
+ response = requests.post(
+ f"{URL}",
+ json=payload,
+ headers=headers,
+ )
+
+ # response.raise_for_status()
+ return response.json()
+
+ except requests.exceptions.RequestException as e:
+ return {
+ "success": False,
+ "error": str(e)
+ }
+
+
+if __name__ == "__main__":
+ # Contoh payload
+ payload = {
+ "nama_file_peta": "peta bencana.pdf",
+ "nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD)",
+ "tipe_data_spasial": "Multipolygon",
+ "struktur_atribut_data": {},
+ "metadata": {
+ "judul": "",
+ "abstrak": "",
+ "tujuan": "",
+ "keyword": [],
+ "kategori": [],
+ "kategori_mapset": ""
+ }
+ }
+
+ result = generate_metadata(payload)
+ print(result)
diff --git a/app/mapset_pipeline/core/processing/analyzer.py b/app/mapset_pipeline/core/processing/analyzer.py
new file mode 100755
index 0000000..2ef09ea
--- /dev/null
+++ b/app/mapset_pipeline/core/processing/analyzer.py
@@ -0,0 +1,181 @@
+import os
+import asyncio
+import pandas as pd
+import geopandas as gpd
+from app.response.res import errorRes
+
+from app.mapset_pipeline.utils.file_ops import generate_unique_filename, dataframe_validation
+from app.mapset_pipeline.utils.formatters import safe_json
+from .geometry_build import is_geom_empty, detect_and_build_geometry, attach_polygon_geometry_auto
+from app.mapset_pipeline.core.clients.ai_client import generate_metadata
+from app.mapset_pipeline.core.publication.publish_geoserver import publish_layer_to_geoserver
+from app.mapset_pipeline.core.publication.publish_geonetwork import publish_metadata
+
+async def analyze_and_clean_dataframe(df: pd.DataFrame, ext: str, filename: str, fileDesc: str):
+ """
+ Fungsi utama untuk memproses DataFrame:
+ 1. Deteksi Geometri
+ 2. Validasi & Hitung Statistik
+ 3. Generate Preview & Warnings
+ 4. Generate Metadata (AI)
+ 5. Simpan ke Temporary Parquet
+ """
+
+ # 1. Deteksi Geometri
+ result = detect_and_build_geometry(df, master_polygons=None)
+
+ if not hasattr(result, "geometry") or result.geometry.isna().all():
+ result = attach_polygon_geometry_auto(result)
+
+ def normalize_geom_type(geom_type):
+ if geom_type and geom_type.startswith("Multi"):
+ return geom_type.replace("Multi", "")
+ return geom_type
+
+ # 2. Analisis Tipe Geometri
+ if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns:
+ geom_types = (
+ result.geometry
+ .dropna()
+ .geom_type
+ .apply(normalize_geom_type)
+ .unique()
+ )
+ geom_type = geom_types[0] if len(geom_types) > 0 else "None"
+ null_geom = result.geometry.isna().sum()
+
+ print(f"[INFO] Tipe Geometry: {geom_type}")
+ print(f"[INFO] Jumlah geometry kosong: {null_geom}")
+ else:
+ # Fallback jika gagal mendeteksi geometry
+ res = {
+ "message": "Tidak menemukan tabel yang relevan atau kolom geometri.",
+ "file_type": ext,
+ "rows": len(df),
+ "columns": len(df.columns),
+ "geometry_valid": 0,
+ "geometry_empty": 0,
+ "geometry_valid_percent": 0,
+ "warnings": [],
+ "warning_examples": [],
+ "preview": []
+ }
+ # Kita raise error dictionary agar bisa ditangkap oleh router/service
+ # Atau return dictionary error structure
+ return errorRes(message="Tidak berhasil mencocokan geometry pada tabel.", details=res, status_code=422)
+
+ # 3. Cleaning Data Values
+ result = result.replace([pd.NA, float('inf'), float('-inf')], None)
+
+ # Convert Geometry ke WKT untuk analisis teks
+ if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns:
+ # Kita perlu simpan WKT string agar serializable saat preview
+ # Tapi biarkan geometry asli untuk proses parquet nanti
+ pass
+
+ # Hitung Statistik Validitas
+ empty_count = result['geometry'].apply(is_geom_empty).sum()
+ valid_count = len(result) - empty_count
+ match_percentage = (valid_count / len(result)) * 100
+
+ warnings = []
+ if empty_count > 0:
+ warnings.append(
+ f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid "
+ f"({100 - match_percentage:.2f}% data gagal cocok)."
+ )
+
+ # Ambil contoh data error
+ if empty_count > 0:
+ examples = result[result['geometry'].apply(is_geom_empty)].head(500)
+ warning_examples = examples.to_dict(orient="records")
+ else:
+ warning_examples = []
+
+ # Prepare Preview Data (Convert WKT for JSON response)
+ # Kita copy agar tidak merusak dataframe utama
+ data_df = result.copy()
+ if 'geometry' in data_df.columns:
+ data_df['geometry'] = data_df['geometry'].apply(
+ lambda g: g.wkt if g is not None else None
+ )
+
+ preview_data = data_df.to_dict(orient="records")
+
+ # Sanitasi JSON (numpy types -> python types)
+ preview_safe = [
+ {k: safe_json(v) for k, v in row.items()} for row in preview_data
+ ]
+
+ warning_safe = [
+ {k: safe_json(v) for k, v in row.items()} for row in warning_examples
+ ]
+
+ # 4. AI Metadata Generation
+ ai_context = {
+ "nama_file_peta": filename,
+ "nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim", # Sebaiknya dinamis
+ "tipe_data_spasial": geom_type,
+ "deskripsi_singkat": fileDesc,
+ "struktur_atribut_data": {},
+ }
+
+ try:
+ ai_suggest = generate_metadata(ai_context)
+ except Exception as e:
+ print(f"[WARNING] Gagal generate metadata AI: {e}")
+ ai_suggest = {}
+
+ # 5. Simpan ke Temporary Parquet
+ # Gunakan filename unik agar thread safe
+ tmp_file = generate_unique_filename(folder="tmp", ext="parquet")
+
+ # Proses konversi synchronous dijalankan di thread terpisah agar tidak blocking
+ print('start')
+ await asyncio.to_thread(dataframe_validation, data_df, tmp_file)
+ print('pass')
+
+ response = {
+ "message": "File berhasil dibaca dan dianalisis.",
+ "file_name": filename,
+ "file_type": ext,
+ "rows": int(len(result)),
+ "columns": list(map(str, result.columns)),
+ "geometry_valid": int(valid_count),
+ "geometry_empty": int(empty_count),
+ "geometry_valid_percent": float(round(match_percentage, 2)),
+ "geometry_type": geom_type,
+ "warnings": warnings,
+ "warning_rows": warning_safe,
+ "preview": preview_safe,
+ "metadata_suggest": ai_suggest,
+ "tmp_path": tmp_file
+ }
+
+ return response
+
+
+async def publish_mapset(table_name: str, job_id: str):
+ try:
+
+ geos_link = publish_layer_to_geoserver(table_name, job_id)
+
+ uuid = await publish_metadata(
+ table_name=table_name,
+ geoserver_links=geos_link
+ )
+
+ # await update_job_status(table_name, "FINISHED", job_id)
+
+ # return uuid
+ return {
+ "geos_link": geos_link["layer_url"],
+ # "uuid": uuid
+ "uuid": "123123"
+ }
+
+ except Exception as e:
+ # await update_job_status(table_name, "FAILED", job_id)
+ raise RuntimeError(f"Publish layer gagal: {e}") from e
+
+
diff --git a/app/mapset_pipeline/core/processing/geometry_build.py b/app/mapset_pipeline/core/processing/geometry_build.py
new file mode 100755
index 0000000..91fbe9c
--- /dev/null
+++ b/app/mapset_pipeline/core/processing/geometry_build.py
@@ -0,0 +1,466 @@
+import geopandas as gpd
+from shapely.geometry import Point, LineString
+import pandas as pd
+import numpy as np
+import re
+import os
+from shapely import wkt
+from rapidfuzz import process, fuzz
+from sqlalchemy import create_engine
+from shapely.geometry.base import BaseGeometry
+from app.core.config import REFERENCE_DB_URL, REFERENCE_SCHEMA, DESA_REF, KEC_REF, KAB_REF
+
+# ============================================================
+# KONFIGURASI DAN KONSTANTA
+# ============================================================
+
+COLUMN_ALIASES = {
+ 'desa': ['desa', 'kelurahan', 'desa_kelurahan', 'desa/kelurahan', 'nama_desa', 'nama_kelurahan', 'Desa/Kel'],
+ 'kecamatan': ['kec', 'kecamatan', 'nama_kec', 'nama_kecamatan'],
+ 'kabupaten': ['kab', 'kabupaten', 'kota', 'kabupaten_kota', 'kota_kabupaten', 'kab/kota', 'kota/kabupaten', 'kota/kab']
+}
+
+# ============================================================
+# FUNGSI BANTU ADMINISTRATIF
+# ============================================================
+
+def find_admin_column(df, aliases):
+ """Mencari kolom yang paling cocok untuk tiap level admin (desa/kec/kab)"""
+ matched = {}
+ for level, alias_list in aliases.items():
+ for col in df.columns:
+ col_norm = col.strip().lower().replace(' ', '_').replace('/', '_')
+ if any(alias in col_norm for alias in alias_list):
+ matched[level] = col
+ break
+ return matched
+
+
+def detect_smallest_admin_level(df):
+ """Mendeteksi level administratif terkecil yang ada di DataFrame"""
+ cols = [c.lower() for c in df.columns]
+ if any('desa' in c or 'kelurahan' in c for c in cols):
+ return 'desa'
+ elif any('kecamatan' in c for c in cols):
+ return 'kecamatan'
+ elif any('kab' in c or 'kota' in c for c in cols):
+ return 'kabupaten'
+ return None
+
+
+def fuzzy_merge(df, master, left_key, right_key, threshold=85):
+ """Melakukan fuzzy matching antar nama wilayah"""
+ matches = df[left_key].apply(
+ lambda x: process.extractOne(str(x), master[right_key], score_cutoff=threshold)
+ )
+ df['match'] = matches.apply(lambda m: m[0] if m else None)
+ merged = df.merge(master, left_on='match', right_on=right_key, how='left')
+ return merged
+
+
+
+
+
+def normalize_name(name: str, level: str = None):
+ if not isinstance(name, str):
+ return None
+
+ name = name.strip()
+ if not name:
+ return None
+
+ name = re.sub(r'\s*\([^)]*\)\s*', '', name)
+
+ raw = name.lower()
+ raw = re.sub(r'^(desa|kelurahan|kel|dusun|kampung)\s+', '', raw)
+ raw = re.sub(r'^(kecamatan|kec)\s+', '', raw)
+ raw = re.sub(r'^(kabupaten|kab\.?|kab)\s+', '', raw)
+
+ if level in ["kabupaten", "kota"]:
+ raw = re.sub(r'^(kota\s+)', '', raw)
+
+ raw = re.sub(r'[^a-z\s]', '', raw)
+ raw = re.sub(r'\s+', ' ', raw).strip()
+
+ tokens = raw.split()
+
+ merged_tokens = []
+ i = 0
+ while i < len(tokens):
+ if i < len(tokens) - 1:
+ sim = fuzz.ratio(tokens[i], tokens[i + 1])
+ if sim > 75:
+ merged_tokens.append(tokens[i] + tokens[i + 1])
+ i += 2
+ continue
+ merged_tokens.append(tokens[i])
+ i += 1
+
+ cleaned_tokens = []
+ prev = None
+ for tok in merged_tokens:
+ if prev and fuzz.ratio(prev, tok) > 95:
+ continue
+ cleaned_tokens.append(tok)
+ prev = tok
+
+ raw = " ".join(cleaned_tokens)
+ formatted = raw.title()
+
+ if level in ["kabupaten", "kota"]:
+ if "kota" in name.lower():
+ if not formatted.startswith("Kota "):
+ formatted = f"Kota {formatted}"
+ else:
+ formatted = formatted.replace("Kota ", "")
+
+ return formatted
+
+
+
+
+def is_geom_empty(g):
+ """True jika geometry None, NaN, atau geometry Shapely kosong."""
+ if g is None:
+ return True
+ if isinstance(g, float) and pd.isna(g):
+ return True
+ if isinstance(g, BaseGeometry):
+ return g.is_empty
+ return False
+
+
+
+
+
+import math
+
+def normalize_lon(val, is_lat=False):
+ if pd.isna(val):
+ return None
+ try:
+ v = float(val)
+ except:
+ return None
+
+ av = abs(v)
+ if av == 0:
+ return v
+
+ if (-180 <= v <= 180 and not is_lat) or (-90 <= v <= 90 and is_lat):
+ return v
+
+ for factor in [1, 10, 100, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9]:
+ nv = v / factor
+ if (not is_lat and -180 <= nv <= 180) or (is_lat and -90 <= nv <= 90):
+ return nv
+
+ return None
+
+
+
+def normalize_lat(val):
+ if pd.isna(val):
+ return None
+ v = float(val)
+ av = abs(v)
+ if av > 1e9: # contoh: -8167413802 (10 digit)
+ return v / 1e9
+ elif av > 1e8: # fallback jika ada variasi
+ return v / 1e8
+ else:
+ return v
+
+
+# ============================================================
+# FUNGSI UTAMA GEOMETRY DETECTION (LAT/LON / PATH)
+# ============================================================
+def detect_and_build_geometry(df: pd.DataFrame, master_polygons: gpd.GeoDataFrame = None):
+ """
+ Mendeteksi dan membentuk geometry dari DataFrame.
+ Bisa dari lat/lon, WKT, atau join ke master polygon (jika disediakan).
+ """
+
+ if isinstance(df, gpd.GeoDataFrame):
+ geom_cols = [
+ c for c in df.columns
+ if re.match(r'^(geometry|geom|the_geom|wkb_geometry)$', c, re.IGNORECASE)
+ or c.lower().startswith("geom")
+ or c.lower().endswith("geometry")
+ ]
+ # if "geometry" in df.columns and df.geometry.notna().any():
+ if geom_cols:
+ geom_count = df.geometry.notna().sum()
+ geom_type = list(df.geom_type.unique())
+ print(f"[INFO] Detected existing geometry in GeoDataFrame ({geom_count} features, {geom_type}).")
+ return df
+
+ lat_col = next((c for c in df.columns if re.search(r'\b(lat|latitude|y[_\s]*coord|y$)\b', c.lower())), None)
+ lon_col = next((c for c in df.columns if re.search(r'\b(lon|long|longitude|x[_\s]*coord|x$)\b', c.lower())), None)
+
+ if lat_col and lon_col:
+ df[lat_col] = pd.to_numeric(df[lat_col], errors='coerce')
+ df[lon_col] = pd.to_numeric(df[lon_col], errors='coerce')
+
+ df[lon_col] = df[lon_col].apply(lambda x: normalize_lon(x, is_lat=False))
+ df[lat_col] = df[lat_col].apply(normalize_lat)
+
+ gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[lon_col], df[lat_col]), crs="EPSG:4326")
+ print("[INFO] Geometry dibangun dari kolom lat/lon.")
+ return gdf
+
+ coord_col = next(
+ (c for c in df.columns if re.search(r'(geom|geometry|wkt|shp|shape|path|coord)', c.lower())), None
+ )
+
+ if coord_col and df[coord_col].notnull().any():
+ sample_val = str(df[coord_col].dropna().iloc[0]).strip()
+
+ if sample_val.startswith('['):
+ def parse_geom(val):
+ try:
+ pts = eval(val)
+ return LineString(pts)
+ except Exception:
+ return None
+ df['geometry'] = df[coord_col].apply(parse_geom)
+ gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")
+ print("[INFO] Geometry dibangun dari kolom koordinat/path (list of points).")
+ return gdf
+
+ elif any(x in sample_val.upper() for x in ["POINT", "LINESTRING", "POLYGON"]):
+ try:
+ df['geometry'] = df[coord_col].apply(
+ lambda g: wkt.loads(g) if isinstance(g, str) and any(
+ x in g.upper() for x in ["POINT", "LINESTRING", "POLYGON"]
+ ) else None
+ )
+ gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")
+ print("[INFO] Geometry dibangun dari kolom WKT (Point/Line/Polygon/MultiPolygon).")
+ return gdf
+ except Exception as e:
+ print(f"[WARN] Gagal parsing kolom geometry sebagai WKT: {e}")
+
+
+
+ if master_polygons is not None:
+ df.columns = df.columns.str.lower().str.strip().str.replace(' ', '_').str.replace('/', '_')
+ matches = find_admin_column(df, COLUMN_ALIASES)
+
+ if 'desa' in matches:
+ admin_col = matches['desa']
+ merged = df.merge(master_polygons, left_on=admin_col, right_on='nama_desa', how='left')
+ if merged['geometry'].isna().sum() > 0:
+ merged = fuzzy_merge(df, master_polygons, admin_col, 'nama_desa')
+ gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=master_polygons.crs)
+ return gdf
+
+ elif 'kecamatan' in matches:
+ admin_col = matches['kecamatan']
+ merged = df.merge(master_polygons, left_on=admin_col, right_on='nama_kecamatan', how='left')
+ gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=master_polygons.crs)
+ return gdf
+
+ elif 'kabupaten' in matches:
+ admin_col = matches['kabupaten']
+ merged = df.merge(master_polygons, left_on=admin_col, right_on='nama_kabupaten', how='left')
+ gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=master_polygons.crs)
+ return gdf
+
+ print("[WARN] Tidak ditemukan geometry (lat/lon, path, atau master).")
+ return df
+
+
+# def get_reference_polygons(level):
+# """Mengambil data batas wilayah (MultiPolygon) dari DB referensi"""
+# table_map = {
+# 'desa': f"{REFERENCE_SCHEMA}.administrasi_ar_keldesa_jatim",
+# 'kecamatan': f"{REFERENCE_SCHEMA}.administrasi_ar_kec_jatim",
+# 'kabupaten': f"{REFERENCE_SCHEMA}.administrasi_ar_kabkot_jatim"
+# }
+
+# table_name = table_map.get(level)
+# if not table_name:
+# raise ValueError(f"Tidak ada tabel referensi untuk level '{level}'.")
+
+# engine = create_engine(REFERENCE_DB_URL)
+# query = f"SELECT *, ST_Multi(geom) AS geometry FROM {table_name}"
+# gdf = gpd.read_postgis(query, engine, geom_col='geometry')
+
+# print(f"[INFO] {len(gdf)} data referensi '{level}' berhasil dimuat dari {table_name}.")
+# return gdf
+
+
+from functools import lru_cache
+
+@lru_cache(maxsize=3)
+def get_reference_polygons(level):
+ local_path = f"cache/{level}_ref.parquet"
+ if os.path.exists(local_path):
+ print(f"[CACHE] Memuat referensi '{level}' dari file lokal.")
+ return gpd.read_parquet(local_path)
+
+ print(f"[DB] Mengambil data referensi '{level}' dari database...")
+ table_map = {
+ "desa": f"{REFERENCE_SCHEMA}.administrasi_ar_keldesa_jatim",
+ "kecamatan": f"{REFERENCE_SCHEMA}.administrasi_ar_kec_jatim",
+ "kabupaten": f"{REFERENCE_SCHEMA}.administrasi_ar_kabkot_jatim"
+ }
+ table_name = table_map.get(level)
+ engine = create_engine(REFERENCE_DB_URL)
+ query = f"SELECT *, ST_Multi(geom) AS geometry FROM {table_name}"
+ gdf = gpd.read_postgis(query, engine, geom_col="geometry")
+ gdf.to_parquet(local_path)
+ print(f"[CACHE] Disimpan ke {local_path}")
+ return gdf
+
+
+
+
+
+
+# ============================================================
+# Optimize Join
+# ============================================================
+def build_join_key(df, cols):
+ arr = df[cols].astype(str).replace("nan", "", regex=False).to_numpy()
+ return np.char.add.reduce(np.column_stack(
+ [arr[:, i] + ("|" if i < len(cols) - 1 else "") for i in range(len(cols))]
+ ), axis=1)
+
+
+# ============================================================
+# FUNGSI: AUTO ATTACH POLYGON KE DATAFRAME NON-SPASIAL
+# ============================================================
+def attach_polygon_geometry_auto(df: pd.DataFrame):
+ """
+ Tambahkan kolom geometry MultiPolygon berdasarkan kombinasi
+ (desa/kelurahan + kecamatan + kabupaten/kota), tanpa duplikasi baris.
+ """
+ level = detect_smallest_admin_level(df)
+ if not level:
+ print("[WARN] Tidak ditemukan kolom administratif (desa/kecamatan/kabupaten).")
+ return df
+
+ print(f"[INFO] Detected smallest admin level: {level}")
+ ref_gdf = get_reference_polygons(level)
+
+ desa_col = next((c for c in df.columns if any(x in c.lower() for x in ['desa', 'kelurahan'])), None)
+ kec_col = next((c for c in df.columns if 'kec' in c.lower()), None)
+ kab_col = next((c for c in df.columns if any(x in c.lower() for x in ['kab', 'kota'])), None)
+
+ if desa_col and (not kec_col or not kab_col):
+ print("[ERROR] Kolom 'Desa' ditemukan tetapi kolom 'Kecamatan' dan/atau 'Kabupaten' tidak lengkap.")
+ print(f"[DEBUG] Ditemukan: Desa={desa_col}, Kec={kec_col}, Kab={kab_col}")
+ return df
+
+ elif not desa_col and kec_col and not kab_col:
+ print("[ERROR] Kolom 'Kecamatan' ditemukan tetapi kolom 'Kabupaten/Kota' tidak ditemukan.")
+ print(f"[DEBUG] Ditemukan: Desa={desa_col}, Kec={kec_col}, Kab={kab_col}")
+ return df
+
+ elif kab_col and not desa_col and not kec_col :
+ print("[INFO] Struktur kolom administratif valid (minimal Kabupaten/Kota ditemukan).")
+ print(f"[DEBUG] Ditemukan: Desa={desa_col}, Kec={kec_col}, Kab={kab_col}")
+
+ elif not desa_col and not kec_col and not kab_col:
+ print("[WARN] Tidak ditemukan kolom administratif apapun (Desa/Kecamatan/Kabupaten).")
+ print(f"[DEBUG] Kolom CSV: {list(df.columns)}")
+ return df
+
+ # kolom di referensi
+ desa_ref = DESA_REF
+ kec_ref = KEC_REF
+ kab_ref = KAB_REF
+
+ if desa_col is not None:
+ df[desa_col] = df[desa_col].astype(str).apply(lambda x: normalize_name(x, "desa"))
+
+ if kec_col is not None:
+ df[kec_col] = df[kec_col].astype(str).apply(lambda x: normalize_name(x, "kecamatan"))
+
+ if kab_col is not None:
+ df[kab_col] = df[kab_col].astype(str).apply(lambda x: normalize_name(x, "kabupaten"))
+
+
+ if desa_ref is not None:
+ ref_gdf[desa_ref] = ref_gdf[desa_ref].astype(str).apply(lambda x: normalize_name(x, "desa"))
+
+ if kec_ref is not None:
+ ref_gdf[kec_ref] = ref_gdf[kec_ref].astype(str).apply(lambda x: normalize_name(x, "kecamatan"))
+
+ if kab_ref is not None:
+ ref_gdf[kab_ref] = ref_gdf[kab_ref].astype(str).apply(lambda x: normalize_name(x, "kabupaten"))
+
+
+
+
+ join_cols = [col for col in [desa_col, kec_col, kab_col] if col]
+
+ if not join_cols:
+ print("[ERROR] Tidak ada kolom administratif yang bisa digunakan untuk join key.")
+ else:
+ join_cols_df = [col for col in [desa_col, kec_col, kab_col] if col]
+ join_cols_ref = [col for col in [desa_ref, kec_ref, kab_ref] if col]
+
+ common_depth = min(len(join_cols_df), len(join_cols_ref))
+ join_cols_df = join_cols_df[-common_depth:]
+ join_cols_ref = join_cols_ref[-common_depth:]
+
+ # print(f"[DEBUG] Join kolom DF : {join_cols_df}")
+ # print(f"[DEBUG] Join kolom REF : {join_cols_ref}")
+
+ # df["_join_key"] = df[join_cols_df].astype(str).agg("|".join, axis=1)
+ # ref_gdf["_join_key"] = ref_gdf[join_cols_ref].astype(str).agg("|".join, axis=1)
+
+ df["_join_key"] = build_join_key(df, join_cols_df)
+ ref_gdf["_join_key"] = build_join_key(ref_gdf, join_cols_ref)
+
+
+ # print(f"[INFO] Join key berhasil dibuat dari kolom: {join_cols_df}")
+
+ ref_lookup = ref_gdf[["_join_key", "geometry"]].drop_duplicates(subset=["_join_key"])
+ df = df.merge(ref_lookup, how="left", on="_join_key")
+ matched = df["geometry"].notna().sum()
+ # print(f"[INFO] {matched} dari {len(df)} baris cocok langsung berdasarkan (desa + kec + kab/kota).")
+
+ if matched < len(df):
+ unmatched = df[df["geometry"].isna()]
+ # print(f"[INFO] Melakukan fuzzy match untuk {len(unmatched)} baris yang belum cocok...")
+
+ ref_dict = dict(zip(ref_lookup["_join_key"], ref_lookup["geometry"]))
+
+ def find_fuzzy_geom(row):
+ key = row["_join_key"]
+ if not isinstance(key, str):
+ return None
+ # fuzzy old
+ # match = process.extractOne(key, list(ref_dict.keys()), scorer=fuzz.token_sort_ratio)
+ # fuzzy new
+ match = process.extractOne(
+ key, list(ref_dict.keys()), scorer=fuzz.token_set_ratio, score_cutoff=80
+ )
+
+ if match and match[1] >= 85:
+ return ref_dict[match[0]]
+ return None
+
+ df.loc[df["geometry"].isna(), "geometry"] = df[df["geometry"].isna()].apply(find_fuzzy_geom, axis=1)
+
+ df = df.drop(columns=["_join_key"], errors="ignore")
+
+ # admin_cols = [col for col in [desa_col, kec_col, kab_col] if col and col in df.columns]
+ # if matched < len(df):
+ # diff = df[df['geometry'].isna()][admin_cols]
+
+ # print("[DEBUG] Baris yang tidak match:")
+ # if diff.empty:
+ # print("(semua baris berhasil match)")
+ # else:
+ # print(diff.to_string(index=False))
+
+
+ # print(f"[REPORT] Total match: {df['geometry'].notna().sum()} / {len(df)} ({df['geometry'].notna().mean()*100:.2f}%)")
+
+
+ return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
diff --git a/app/mapset_pipeline/core/publication/publish_geonetwork.py b/app/mapset_pipeline/core/publication/publish_geonetwork.py
new file mode 100755
index 0000000..109a955
--- /dev/null
+++ b/app/mapset_pipeline/core/publication/publish_geonetwork.py
@@ -0,0 +1,693 @@
+from fastapi import HTTPException
+import requests
+from sqlalchemy import text
+from app.core.config import GEONETWORK_PASS, GEONETWORK_URL, GEONETWORK_USER
+from database.connection import engine
+from datetime import datetime
+from uuid import uuid4
+import re
+
+
+
+def create_gn_session():
+ session = requests.Session()
+ session.auth = (GEONETWORK_USER, GEONETWORK_PASS)
+
+ session.get(f"{GEONETWORK_URL}/srv/eng/info?type=me")
+ xsrf_token = session.cookies.get("XSRF-TOKEN")
+
+ if not xsrf_token:
+ raise Exception("XSRF token missing")
+
+ return session, xsrf_token
+
+
+
+def escape_url_params(url: str) -> str:
+ """
+ Escape karakter berbahaya di dalam URL agar valid dalam XML.
+ Khususnya mengganti '&' menjadi '&' kecuali jika sudah '&'.
+ """
+ # Ganti semua & yang bukan bagian dari &
+ url = re.sub(r'&(?!amp;)', '&', url)
+ return url
+
+
+def fix_xml_urls(xml: str) -> str:
+ """
+ Temukan semua ... dalam XML dan escape URL-nya.
+ """
+ def replacer(match):
+ original = match.group(1).strip()
+ fixed = escape_url_params(original)
+ return f"{fixed}"
+
+ # Replace semua ...
+ xml_fixed = re.sub(
+ r"(.*?)",
+ replacer,
+ xml,
+ flags=re.DOTALL
+ )
+
+ return xml_fixed
+
+
+
+async def get_extent(table_name: str):
+
+ sql = f"""
+ SELECT
+ ST_XMin(extent), ST_YMin(extent),
+ ST_XMax(extent), ST_YMax(extent)
+ FROM (
+ SELECT ST_Extent(geom) AS extent
+ FROM public."{table_name}"
+ ) AS box;
+ """
+
+ async with engine.connect() as conn:
+ result = await conn.execute(sql)
+ row = result.fetchone()
+
+ if not row or row[0] is None:
+ return None
+
+ # return {
+ # "xmin": row[0],
+ # "ymin": row[1],
+ # "xmax": row[2],
+ # "ymax": row[3]
+ # }
+
+ return {
+ "xmin": 110.1372, # west
+ "ymin": -9.3029, # south
+ "xmax": 114.5287, # east
+ "ymax": -5.4819 # north
+ }
+
+async def get_author_metadata(table_name: str):
+
+ sql = """
+ SELECT am.table_title, am.dataset_title, am.dataset_abstract, am.keywords, am.date_created,
+ am.organization_name, am.contact_person_name, am.created_at,
+ am.contact_email, am.contact_phone, am.geom_type,
+ u.organization_id,
+ o.address AS organization_address,
+ o.email AS organization_email,
+ o.phone_number AS organization_phone
+ FROM backend.author_metadata AS am
+ LEFT JOIN backend.users u ON am.user_id = u.id
+ LEFT JOIN backend.organizations o ON u.organization_id = o.id
+ WHERE am.table_title = :table
+ LIMIT 1
+ """
+
+ async with engine.connect() as conn:
+ result = await conn.execute(sql, {"table": table_name})
+ row = result.fetchone()
+
+ if not row:
+ raise Exception(f"Tidak ada metadata untuk tabel: {table_name}")
+
+ # SQLAlchemy Async row support ._mapping untuk convert ke dict
+ return dict(row._mapping)
+
+
+def map_geom_type(gtype):
+
+ if gtype is None:
+ return "surface"
+
+ # Jika LIST → ambil elemen pertama
+ if isinstance(gtype, list):
+ if len(gtype) > 0:
+ gtype = gtype[0]
+ else:
+ return "surface"
+
+ # Setelah pasti string
+ gtype = str(gtype).lower()
+
+ if "polygon" in gtype or "multi" in gtype:
+ return "surface"
+ if "line" in gtype:
+ return "curve"
+ if "point" in gtype:
+ return "point"
+
+ return "surface"
+
+
+def generate_metadata_xml(table_name, meta, extent, geoserver_links):
+
+ keywords_xml = "".join([
+ f"""
+ {kw.strip()}
+ """ for kw in meta["keywords"].split(",")
+ ])
+
+ geom_type_code = map_geom_type(meta["geom_type"])
+ print('type', geom_type_code)
+ uuid = str(uuid4())
+
+ return f"""
+
+
+ {uuid}
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {meta['contact_person_name']}
+
+
+ {meta['organization_name']}
+
+
+
+
+
+
+ {meta['organization_phone']}
+
+
+ {meta['organization_phone']}
+
+
+
+
+
+
+ {meta['organization_address']}
+
+
+ Surabaya
+
+
+ Jawa Timur
+
+
+ Indonesia
+
+
+ {meta['organization_email']}
+
+
+
+
+ 08.00-16.00
+
+
+
+
+
+
+
+
+
+ {datetime.utcnow().isoformat()}+07:00
+
+
+ ISO 19115:2003/19139
+
+
+ 1.0
+
+
+
+
+
+
+
+
+
+ 38
+
+
+
+
+
+
+
+
+
+
+ 4326
+
+
+ EPSG
+
+
+
+
+
+
+
+
+
+
+ {meta['dataset_title']}
+
+
+
+
+ {meta['created_at'].isoformat()}+07:00
+
+
+
+
+
+
+
+ {meta['date_created'].year}
+
+
+
+
+ {meta['contact_person_name']}
+
+
+ {meta['organization_name']}
+
+
+
+
+
+
+ {meta['organization_phone']}
+
+
+ {meta['organization_phone']}
+
+
+
+
+
+
+ {meta['organization_address']}
+
+
+ Surabaya
+
+
+ Indonesia
+
+
+ {meta['organization_email']}
+
+
+
+
+ 08.00-16.00
+
+
+
+
+
+
+
+
+
+ Timezone: UTC+7 (Asia/Jakarta)
+
+
+
+
+ {meta['dataset_abstract']}
+
+
+ {meta['dataset_abstract']}
+
+
+
+
+
+
+
+ Lab AI Polinema
+
+
+ Lab AI Polinema
+
+
+
+
+
+
+
+ {meta['organization_phone']}
+
+
+ {meta['organization_phone']}
+
+
+
+
+
+
+ {meta['organization_address']}
+
+
+ Surabaya
+
+
+ Jawa Timur
+
+
+ Indonesia
+
+
+ {meta['organization_email']}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {keywords_xml}
+
+
+
+
+
+
+
+
+
+
+
+ Penggunaan data harus mencantumkan sumber: {meta['organization_name']}.
+
+
+
+
+
+
+
+
+
+
+
+ 25000
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {extent['xmin']}
+ {extent['xmax']}
+ {extent['ymin']}
+ {extent['ymax']}
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ {meta['dataset_title']}
+
+
+
+
+ {meta['created_at'].isoformat()}+07:00
+
+
+
+
+
+
+
+ {meta['date_created'].year}
+
+
+
+
+
+
+
+
+
+
+
+
+ {geoserver_links["wms_url"]}
+
+
+ DB:POSTGIS
+
+
+ {meta["dataset_title"]}
+
+
+ {meta["dataset_title"]}
+
+
+
+
+
+
+ {geoserver_links["wms_url"]}
+
+
+ WWW:LINK-1.0-http--link
+
+
+ {meta["dataset_title"]}
+
+
+ {meta["dataset_title"]}
+
+
+
+
+
+
+ {geoserver_links["wms_url"]}
+
+
+ OGC:WMS
+
+
+ {meta["dataset_title"]}
+
+
+
+
+
+
+
+ {geoserver_links["wfs_url"]}
+
+
+ OGC:WFS
+
+
+ {meta["dataset_title"]}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Data dihasilkan dari digitasi peta dasar skala 1:25000 menggunakan QGIS.
+
+
+
+
+
+
+"""
+
+
+# Geonetwork version 4.4.9.0
+def upload_metadata_to_geonetwork(xml_metadata: str):
+ # session = requests.Session()
+ # session.auth = (GEONETWORK_USER, GEONETWORK_PASS)
+
+ # # 1. Get XSRF token
+ # try:
+ # info_url = f"{GEONETWORK_URL}/srv/eng/info?type=me"
+ # session.get(info_url)
+ # except requests.exceptions.RequestException as e:
+ # raise HTTPException(status_code=503, detail=f"Failed to connect to GeoNetwork: {e}")
+
+ # xsrf_token = session.cookies.get('XSRF-TOKEN')
+ # if not xsrf_token:
+ # raise HTTPException(status_code=500, detail="Could not retrieve XSRF-TOKEN from GeoNetwork.")
+
+ session, xsrf_token = create_gn_session()
+ headers = {
+ 'X-XSRF-TOKEN': xsrf_token,
+ 'Accept': 'application/json'
+ }
+
+ GN_API_RECORDS_URL = f"{GEONETWORK_URL}/srv/api/records"
+
+ # 2. GeoNetwork requires a multipart/form-data upload
+ files = {
+ 'file': ('metadata.xml', xml_metadata, 'application/xml')
+ }
+
+ params = {
+ "ownerGroup": 1, # all
+ "ownerUser": 1 # admin
+ }
+
+ response = session.post(
+ GN_API_RECORDS_URL,
+ params=params,
+ files=files,
+ headers=headers,
+ cookies=session.cookies.get_dict()
+ )
+
+ metadata_infos = response.json().get("metadataInfos", {})
+ uuid = None
+ for records in metadata_infos.values():
+ if records and isinstance(records, list):
+ uuid = records[0].get("uuid")
+ break
+ if not uuid:
+ raise ValueError("UUID not found in GeoNetwork response")
+
+ publish_record(session, uuid)
+
+ # print("response", response.json())
+ return uuid
+
+
+
+async def publish_metadata(table_name: str, geoserver_links: dict):
+
+ extent = await get_extent(table_name)
+ meta = await get_author_metadata(table_name)
+ xml = generate_metadata_xml(
+ table_name=meta["dataset_title"],
+ meta=meta,
+ extent=extent,
+ geoserver_links=geoserver_links
+ )
+
+ xml_clean = fix_xml_urls(xml)
+ uuid = upload_metadata_to_geonetwork(xml_clean)
+
+ print(f"[GeoNetwork] Metadata uploaded. UUID = {uuid}")
+
+ return uuid
+
+
+
+def publish_record(session, uuid):
+ print('[uuid]', uuid)
+ xsrf_token = session.cookies.get('XSRF-TOKEN')
+
+ headers = {
+ "X-XSRF-TOKEN": xsrf_token,
+ "Accept": "application/json",
+ "Content-Type": "application/json"
+ }
+
+ url = f"{GEONETWORK_URL}/srv/api/records/{uuid}/sharing"
+
+ payload = {
+ "clear": True,
+ "privileges": [
+ {
+ "group": 1,
+ "operations": {
+ "view": True
+ }
+ }
+ ]
+ }
+
+ response = session.put(url, json=payload, headers=headers)
+ response.raise_for_status()
+
+
+# single stand func
+# def publish_record(uuid):
+# session, xsrf_token = create_gn_session()
+
+# headers = {
+# "X-XSRF-TOKEN": xsrf_token,
+# "Content-Type": "application/json"
+# }
+
+# url = f"{GEONETWORK_URL}/srv/api/records/{uuid}/sharing"
+
+# payload = {
+# "clear": True,
+# "privileges": [
+# {"group": 1, "operations": {"view": True}}
+# ]
+# }
+
+# resp = session.put(url, json=payload, headers=headers)
+# resp.raise_for_status()
diff --git a/app/mapset_pipeline/core/publication/publish_geoserver.py b/app/mapset_pipeline/core/publication/publish_geoserver.py
new file mode 100755
index 0000000..36dfe4d
--- /dev/null
+++ b/app/mapset_pipeline/core/publication/publish_geoserver.py
@@ -0,0 +1,300 @@
+import requests
+import json
+import os
+from app.core.config import GEOSERVER_URL, GEOSERVER_USER, GEOSERVER_PASS, GEOSERVER_WORKSPACE
+
+# DATASTORE = "postgis" #per OPD
+DATASTORE = "server_lokal"
+# SLD_DIR = "./styles"
+
+# BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+# SLD_DIR = os.path.join(BASE_DIR, "styles")
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+MAIN_DIR = os.path.abspath(os.path.join(BASE_DIR, "..", ".."))
+SLD_DIR = os.path.join(MAIN_DIR, "style_temp")
+
+
+def publish_layer_to_geoserver(table: str, job_id: str):
+ print(f"[GeoServer] Publish layer + upload SLD: {table}")
+
+ # ==========================
+ # 1. Publish Feature Type
+ # ==========================
+ # ft_url = f"{GEOSERVER_URL}/rest/workspaces/{GEOSERVER_WORKSPACE}/datastores/{DATASTORE}/featuretypes"
+ ft_url = f"{GEOSERVER_URL}/rest/workspaces/{GEOSERVER_WORKSPACE}/datastores/{DATASTORE}/featuretypes?computeDefault=true"
+
+ payload = {
+ "featureType": {
+ "name": table,
+ "nativeName": table,
+ "enabled": True
+ }
+ }
+
+ requests.post(
+ ft_url,
+ auth=(GEOSERVER_USER, GEOSERVER_PASS),
+ headers={"Content-Type": "application/json"},
+ data=json.dumps(payload)
+ )
+
+ print(f"[GeoServer] FeatureType published for: {table}")
+
+ # ==========================================
+ # 2. Upload SLD file to GeoServer
+ # ==========================================
+
+ sld_file = f"{SLD_DIR}/{job_id}.sld"
+ style_name = table # style name sama dengan table
+
+ if not os.path.exists(sld_file):
+ print(f"[WARNING] SLD file tidak ditemukan: {sld_file}")
+ else:
+ print(f"[GeoServer] Upload SLD {sld_file}")
+
+ #old
+ # style_url = f"{GEOSERVER_URL}/rest/styles"
+
+ # with open(sld_file, "rb") as sld:
+ # requests.post(
+ # f"{style_url}?name={style_name}&workspace={GEOSERVER_WORKSPACE}",
+ # auth=(GEOSERVER_USER, GEOSERVER_PASS),
+ # headers={"Content-Type": "application/vnd.ogc.sld+xml"},
+ # data=sld.read()
+ # )
+
+ # print(f"[GeoServer] SLD uploaded: {style_name}")
+
+
+
+ #new
+ style_url = (
+ f"{GEOSERVER_URL}/rest/workspaces/"
+ f"{GEOSERVER_WORKSPACE}/styles"
+ )
+
+ with open(sld_file, "r", encoding="utf-8") as f:
+ sld_content = f.read()
+
+ # 🔥 INI BARIS PENTINGNYA
+ sld_content = sld_content.lstrip("\ufeff \t\r\n")
+
+ resp = requests.post(
+ f"{style_url}?name={style_name}",
+ auth=(GEOSERVER_USER, GEOSERVER_PASS),
+ headers={"Content-Type": "application/vnd.ogc.sld+xml"},
+ data=sld_content.encode("utf-8")
+ )
+
+
+ if resp.status_code not in (200, 201):
+ raise Exception(
+ f"Upload SLD gagal ({resp.status_code}): {resp.text}"
+ )
+
+ print(f"[GeoServer] SLD uploaded: {style_name}")
+
+
+
+
+ # ==========================================
+ # 3. Apply SLD to the layer
+ # ==========================================
+
+ layer_url = f"{GEOSERVER_URL}/rest/layers/{GEOSERVER_WORKSPACE}:{table}"
+
+ payload = {
+ "layer": {
+ "defaultStyle": {
+ "name": style_name,
+ "workspace": GEOSERVER_WORKSPACE
+ },
+ "enabled": True
+ }
+ }
+
+ requests.put(
+ layer_url,
+ auth=(GEOSERVER_USER, GEOSERVER_PASS),
+ headers={"Content-Type": "application/json"},
+ data=json.dumps(payload)
+ )
+
+ print(f"[GeoServer] SLD applied as default style for {table}")
+
+ # ==========================================
+ # 4. Delete SLD file from local folder
+ # ==========================================
+
+ os.remove(sld_file)
+ print(f"[CLEANUP] SLD file removed: {sld_file}")
+
+ # ==============================================
+ # 5. Reload GeoServer (optional but recommended)
+ # ==============================================
+ requests.post(
+ f"{GEOSERVER_URL}/rest/reload",
+ auth=(GEOSERVER_USER, GEOSERVER_PASS)
+ )
+
+ # ====================================================
+ # 7. Generate GeoServer WMS/WFS link untuk GeoNetwork
+ # ====================================================
+
+ wms_link = (
+ f"{GEOSERVER_URL}/{GEOSERVER_WORKSPACE}/wms?"
+ f"service=WMS&request=GetMap&layers={GEOSERVER_WORKSPACE}:{table}"
+ )
+ wfs_link = (
+ f"{GEOSERVER_URL}/{GEOSERVER_WORKSPACE}/wfs?"
+ f"service=WFS&request=GetFeature&typeName={GEOSERVER_WORKSPACE}:{table}"
+ )
+ # print(f"[GeoServer] WMS URL: {wms_link}")
+ # print(f"[GeoServer] WFS URL: {wfs_link}")
+ # print(f"[GeoServer] Reload completed. Layer {table} ready.")
+ openlayer_url = (
+ f"{GEOSERVER_URL}/{GEOSERVER_WORKSPACE}/wms?"
+ f"service=WMS"
+ f"&version=1.1.0"
+ f"&request=GetMap"
+ f"&layers={GEOSERVER_WORKSPACE}:{table}"
+ f"&styles="
+ f"&bbox=110.89528623700005%2C-8.780412043999945%2C116.26994997700001%2C-5.042971664999925"
+ f"&width=768"
+ f"&height=384"
+ f"&srs=EPSG:4326"
+ f"&format=application/openlayers"
+ )
+
+ return {
+ "table": table,
+ "style": style_name,
+ "wms_url": wms_link,
+ "wfs_url": wfs_link,
+ "layer_url": openlayer_url
+ }
+
+
+
+
+
+# use default style
+# def publish_layer_to_geoserver(table: str):
+
+# print(f"[GeoServer] Publish layer: {table}")
+
+# # ========== 1. Publish Feature Type ==========
+# ft_url = f"{GEOSERVER_URL}/rest/workspaces/{WORKSPACE}/datastores/{DATASTORE}/featuretypes"
+
+# payload = {
+# "featureType": {
+# "name": table,
+# "nativeName": table,
+# "enabled": True
+# }
+# }
+
+# requests.post(
+# ft_url,
+# auth=(GEOSERVER_USER, GEOSERVER_PASS),
+# headers={"Content-Type": "application/json"},
+# data=json.dumps(payload)
+# )
+
+# # ===================================================
+# # 2. Tentukan SLD file (prioritas table.sld → fallback default)
+# # ===================================================
+# table_sld = SLD_DIR / f"{table}.sld"
+# default_sld = SLD_DIR / "default_style.sld"
+
+# if table_sld.exists():
+# chosen_sld = table_sld
+# delete_after = True
+# style_name = table # pakai nama style sama dengan layer
+# print(f"[SLD] Menggunakan SLD khusus: {chosen_sld}")
+# else:
+# chosen_sld = default_sld
+# delete_after = False
+# style_name = "default_style"
+# print(f"[SLD] Menggunakan default SLD: {chosen_sld}")
+
+# # ==========================================
+# # 3. Upload SLD
+# # ==========================================
+# style_url = f"{GEOSERVER_URL}/rest/styles"
+
+# with open(chosen_sld, "rb") as sld:
+# requests.post(
+# f"{style_url}?name={style_name}&workspace={WORKSPACE}",
+# auth=(GEOSERVER_USER, GEOSERVER_PASS),
+# headers={"Content-Type": "application/vnd.ogc.sld+xml"},
+# data=sld.read()
+# )
+
+# print(f"[GeoServer] SLD uploaded: {style_name}")
+
+# # ==========================================
+# # 4. Apply SLD ke layer
+# # ==========================================
+# layer_url = f"{GEOSERVER_URL}/rest/layers/{WORKSPACE}:{table}"
+
+# payload = {
+# "layer": {
+# "defaultStyle": {
+# "name": style_name,
+# "workspace": WORKSPACE
+# },
+# "enabled": True
+# }
+# }
+
+# requests.put(
+# layer_url,
+# auth=(GEOSERVER_USER, GEOSERVER_PASS),
+# headers={"Content-Type": "application/json"},
+# data=json.dumps(payload)
+# )
+
+# print(f"[GeoServer] Style '{style_name}' applied to layer '{table}'")
+
+# # ==========================================
+# # 5. Delete table.sld jika ada
+# # ==========================================
+# if delete_after:
+# table_sld.unlink()
+# print(f"[CLEANUP] File SLD '{table}.sld' dihapus")
+
+# # ====================================================
+# # 6. Reload GeoServer (opsional tapi aman)
+# # ====================================================
+# requests.post(
+# f"{GEOSERVER_URL}/rest/reload",
+# auth=(GEOSERVER_USER, GEOSERVER_PASS)
+# )
+
+# # ====================================================
+# # 7. Generate GeoServer WMS/WFS link untuk GeoNetwork
+# # ====================================================
+
+# wms_link = (
+# f"{GEOSERVER_URL}/{WORKSPACE}/wms?"
+# f"service=WMS&request=GetMap&layers={WORKSPACE}:{table}"
+# )
+
+# wfs_link = (
+# f"{GEOSERVER_URL}/{WORKSPACE}/wfs?"
+# f"service=WFS&request=GetFeature&typeName={WORKSPACE}:{table}"
+# )
+
+# print(f"[GeoServer] WMS URL: {wms_link}")
+# print(f"[GeoServer] WFS URL: {wfs_link}")
+
+# return {
+# "table": table,
+# "style": style_name,
+# "wms_url": wms_link,
+# "wfs_url": wfs_link
+# }
+
+
diff --git a/app/mapset_pipeline/core/readers/__init__.py b/app/mapset_pipeline/core/readers/__init__.py
new file mode 100755
index 0000000..cbab1ee
--- /dev/null
+++ b/app/mapset_pipeline/core/readers/__init__.py
@@ -0,0 +1,18 @@
+# Import fungsi utama dari masing-masing file reader
+# (Titik '.' berarti import dari folder yang sama)
+
+from .reader_csv import read_csv
+from .reader_shp import read_shp
+from .reader_gdb import read_gdb
+from .reader_mpk import read_mpk
+from .reader_pdf import read_pdf, convert_df
+
+# Opsional: Mendefinisikan apa yang akan ter-import jika orang mengetik "from ... import *"
+__all__ = [
+ "read_csv",
+ "read_shp",
+ "read_gdb",
+ "read_mpk",
+ "read_pdf",
+ "convert_df"
+]
\ No newline at end of file
diff --git a/app/mapset_pipeline/core/readers/reader_csv.py b/app/mapset_pipeline/core/readers/reader_csv.py
new file mode 100755
index 0000000..55034b6
--- /dev/null
+++ b/app/mapset_pipeline/core/readers/reader_csv.py
@@ -0,0 +1,228 @@
+import pandas as pd
+import re
+import csv
+import os
+
+def detect_header_line(path, max_rows=10):
+ with open(path, 'r', encoding='utf-8', errors='ignore') as f:
+ lines = [next(f) for _ in range(max_rows)]
+ header_line_idx = 0
+ best_score = -1
+ for i, line in enumerate(lines):
+ cells = re.split(r'[;,|\t]', line.strip())
+ alpha_ratio = sum(bool(re.search(r'[A-Za-z]', c)) for c in cells) / max(len(cells), 1)
+ digit_ratio = sum(bool(re.search(r'\d', c)) for c in cells) / max(len(cells), 1)
+ score = alpha_ratio - digit_ratio
+ if score > best_score:
+ best_score = score
+ header_line_idx = i
+ return header_line_idx
+
+def detect_delimiter(path, sample_size=2048):
+ with open(path, 'r', encoding='utf-8', errors='ignore') as f:
+ sample = f.read(sample_size)
+ sniffer = csv.Sniffer()
+ try:
+ dialect = sniffer.sniff(sample)
+ return dialect.delimiter
+ except Exception:
+ for delim in [',', ';', '\t', '|']:
+ if delim in sample:
+ return delim
+ return ','
+
+
+# def read_csv(path: str, sheet: str = None):
+# ext = os.path.splitext(path)[1].lower()
+
+# try:
+# if ext in ['.csv']:
+# header_line = detect_header_line(path)
+# delimiter = detect_delimiter(path)
+# print(f"[INFO] Detected header line: {header_line + 1}, delimiter: '{delimiter}'")
+
+# df = pd.read_csv(
+# path,
+# header=header_line,
+# sep=delimiter,
+# encoding='utf-8',
+# low_memory=False,
+# thousands=','
+# )
+
+# elif ext in ['.xlsx', '.xls']:
+# print(f"[INFO] Membaca file Excel: {os.path.basename(path)}")
+# xls = pd.ExcelFile(path)
+# print(f"[INFO] Ditemukan {len(xls.sheet_names)} sheet: {xls.sheet_names}")
+
+# if sheet:
+# if sheet not in xls.sheet_names:
+# raise ValueError(f"Sheet '{sheet}' tidak ditemukan dalam file {os.path.basename(path)}")
+# print(f"[INFO] Membaca sheet yang ditentukan: '{sheet}'")
+# df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str)
+# df = df.dropna(how='all').dropna(axis=1, how='all')
+
+# else:
+# print("[INFO] Tidak ada sheet yang ditentukan, mencari sheet paling relevan...")
+# best_sheet = None
+# best_score = -1
+# best_df = None
+
+# for sheet_name in xls.sheet_names:
+# try:
+# temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str)
+# temp_df = temp_df.dropna(how='all').dropna(axis=1, how='all')
+
+# if len(temp_df) == 0 or len(temp_df.columns) < 2:
+# continue
+
+# # hitung skor relevansi
+# text_ratio = temp_df.applymap(lambda x: isinstance(x, str)).sum().sum() / (temp_df.size or 1)
+# row_score = len(temp_df)
+# score = (row_score * 0.7) + (text_ratio * 100)
+
+# if score > best_score:
+# best_score = score
+# best_sheet = sheet_name
+# best_df = temp_df
+
+# except Exception as e:
+# print(f"[WARN] Gagal membaca sheet {sheet_name}: {e}")
+# continue
+
+# if best_df is not None:
+# print(f"[INFO] Sheet terpilih: '{best_sheet}' dengan skor {best_score:.2f}")
+# df = best_df
+# else:
+# raise ValueError("Tidak ada sheet valid yang dapat dibaca.")
+
+# for col in df.columns:
+# if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
+# df[col] = df[col].astype(str).str.replace(',', '', regex=False)
+# df[col] = pd.to_numeric(df[col], errors='ignore')
+
+# else:
+# raise ValueError("Format file tidak dikenali (hanya .csv, .xlsx, .xls)")
+
+# except Exception as e:
+# print(f"[WARN] Gagal membaca file ({e}), fallback ke default reader.")
+# df = pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
+
+# df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]
+# df.columns = [str(c).strip() for c in df.columns]
+# df = df.dropna(how='all')
+
+# return df
+
+
+
+def read_csv(path: str, sheet: str = None):
+ ext = os.path.splitext(path)[1].lower()
+ df = pd.DataFrame() # Inisialisasi default
+
+ try:
+ # --- BLOK PEMBACAAN FILE ---
+ if ext in ['.csv']:
+ header_line = detect_header_line(path)
+ delimiter = detect_delimiter(path)
+ print(f"[INFO] Detected header line: {header_line + 1}, delimiter: '{delimiter}'")
+
+ df = pd.read_csv(
+ path,
+ header=header_line,
+ sep=delimiter,
+ encoding='utf-8',
+ low_memory=False,
+ thousands=','
+ )
+
+ elif ext in ['.xlsx', '.xls']:
+ print(f"[INFO] Membaca file Excel: {os.path.basename(path)}")
+ xls = pd.ExcelFile(path, engine='openpyxl') # Pakai engine openpyxl
+ print(f"[INFO] Ditemukan {len(xls.sheet_names)} sheet: {xls.sheet_names}")
+
+ if sheet:
+ if sheet not in xls.sheet_names:
+ raise ValueError(f"Sheet '{sheet}' tidak ditemukan.")
+ print(f"[INFO] Membaca sheet yang ditentukan: '{sheet}'")
+ # Tambahkan engine='openpyxl'
+ df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str, engine='openpyxl')
+ df = df.dropna(how='all').dropna(axis=1, how='all')
+
+ else:
+ # Logika pencarian sheet terbaik (tidak berubah, hanya indentasi)
+ print("[INFO] Tidak ada sheet yang ditentukan, mencari sheet paling relevan...")
+ best_sheet = None
+ best_score = -1
+ best_df = None
+
+ for sheet_name in xls.sheet_names:
+ try:
+ temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str, engine='openpyxl')
+ temp_df = temp_df.dropna(how='all').dropna(axis=1, how='all')
+
+ if len(temp_df) == 0 or len(temp_df.columns) < 2:
+ continue
+
+ text_ratio = temp_df.applymap(lambda x: isinstance(x, str)).sum().sum() / (temp_df.size or 1)
+ row_score = len(temp_df)
+ score = (row_score * 0.7) + (text_ratio * 100)
+
+ if score > best_score:
+ best_score = score
+ best_sheet = sheet_name
+ best_df = temp_df
+ except Exception as e:
+ print(f"[WARN] Gagal membaca sheet {sheet_name}: {e}")
+ continue
+
+ if best_df is not None:
+ print(f"[INFO] Sheet terpilih: '{best_sheet}' dengan skor {best_score:.2f}")
+ df = best_df
+ else:
+ raise ValueError("Tidak ada sheet valid yang dapat dibaca.")
+
+ else:
+ raise ValueError("Format file tidak dikenali (hanya .csv, .xlsx, .xls)")
+
+ # --- BLOK PEMBERSIHAN (Dilakukan setelah file sukses terbaca) ---
+ # Kita bungkus ini agar error konversi angka TIDAK menggagalkan pembacaan file
+ if not df.empty:
+ df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')]
+ df.columns = [str(c).strip() for c in df.columns]
+ df = df.dropna(how='all')
+
+ # Konversi Angka yang Lebih Aman
+ for col in df.columns:
+ try:
+ # Cek apakah kolom terlihat seperti angka
+ if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any():
+ # Bersihkan koma
+ clean_col = df[col].astype(str).str.replace(',', '', regex=False)
+ # Gunakan errors='coerce' agar jika ada error value (NaN/REF), dia jadi NaN, bukan crash
+ df[col] = pd.to_numeric(clean_col, errors='coerce')
+ except Exception as ex:
+ # Jika konversi gagal, biarkan sebagai string/object dan lanjut ke kolom berikutnya
+ print(f"[WARN] Gagal konversi numerik pada kolom '{col}': {ex}")
+ pass
+
+ return df
+
+ except Exception as e:
+ # --- ERROR HANDLING YANG BENAR ---
+ print(f"[WARN] Gagal membaca file utama ({e}).")
+
+ # Hanya lakukan fallback CSV jika file aslinya MEMANG CSV (atau txt)
+ # Jangan paksa baca .xlsx pakai read_csv
+ if ext in ['.csv', '.txt']:
+ print("[INFO] Mencoba fallback ke default CSV reader...")
+ try:
+ return pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',')
+ except Exception as e2:
+ print(f"[ERROR] Fallback CSV juga gagal: {e2}")
+
+ # Jika file Excel gagal dibaca, return DataFrame kosong atau raise error
+ print("[ERROR] Tidak dapat memulihkan pembacaan file Excel.")
+ return pd.DataFrame()
+
+
diff --git a/app/mapset_pipeline/core/readers/reader_gdb.py b/app/mapset_pipeline/core/readers/reader_gdb.py
new file mode 100755
index 0000000..843f2d5
--- /dev/null
+++ b/app/mapset_pipeline/core/readers/reader_gdb.py
@@ -0,0 +1,75 @@
+import geopandas as gpd
+import fiona
+import zipfile
+import tempfile
+import os
+import shutil
+
+def read_gdb(zip_path: str, layer: str = None):
+ if not zip_path.lower().endswith(".zip"):
+ raise ValueError("File GDB harus berupa ZIP yang berisi folder .gdb atau file .gdbtable")
+
+ tmpdir = tempfile.mkdtemp()
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
+ zip_ref.extractall(tmpdir)
+
+ macosx_path = os.path.join(tmpdir, "__MACOSX")
+ if os.path.exists(macosx_path):
+ shutil.rmtree(macosx_path)
+
+ gdb_folders = []
+ for root, dirs, _ in os.walk(tmpdir):
+ for d in dirs:
+ if d.lower().endswith(".gdb"):
+ gdb_folders.append(os.path.join(root, d))
+
+ if not gdb_folders:
+ gdbtable_files = []
+ for root, _, files in os.walk(tmpdir):
+ for f in files:
+ if f.lower().endswith(".gdbtable"):
+ gdbtable_files.append(os.path.join(root, f))
+
+ if gdbtable_files:
+ first_folder = os.path.dirname(gdbtable_files[0])
+ base_name = os.path.basename(first_folder)
+ gdb_folder_path = os.path.join(tmpdir, f"{base_name}.gdb")
+
+ os.makedirs(gdb_folder_path, exist_ok=True)
+
+ for fpath in os.listdir(first_folder):
+ if ".gdb" in fpath.lower():
+ shutil.move(os.path.join(first_folder, fpath), os.path.join(gdb_folder_path, fpath))
+
+ gdb_folders.append(gdb_folder_path)
+ # print(f"[INFO] Rebuilt GDB folder from nested structure: {gdb_folder_path}")
+ else:
+ # print("[DEBUG] Isi ZIP:", os.listdir(tmpdir))
+ shutil.rmtree(tmpdir)
+ raise ValueError("Tidak ditemukan folder .gdb atau file .gdbtable di dalam ZIP")
+
+ gdb_path = gdb_folders[0]
+
+ layers = fiona.listlayers(gdb_path)
+ # print(f"[INFO] Layer tersedia: {layers}")
+
+ chosen_layer = layer or (layers[0] if layers else None)
+ if not chosen_layer:
+ shutil.rmtree(tmpdir)
+ raise ValueError("Tidak ada layer GDB yang bisa dibaca.")
+
+ print(f"[DEBUG] Membaca layer: {chosen_layer}")
+
+ try:
+ gdf = gpd.read_file(gdb_path, layer=chosen_layer)
+ except Exception as e:
+ shutil.rmtree(tmpdir)
+ raise ValueError(f"Gagal membaca layer dari GDB: {e}")
+
+ if gdf.crs is None:
+ # print("[WARN] CRS tidak terdeteksi, diasumsikan EPSG:4326")
+ gdf.set_crs("EPSG:4326", inplace=True)
+
+
+ shutil.rmtree(tmpdir)
+ return gdf
diff --git a/app/mapset_pipeline/core/readers/reader_mpk.py b/app/mapset_pipeline/core/readers/reader_mpk.py
new file mode 100755
index 0000000..a466e58
--- /dev/null
+++ b/app/mapset_pipeline/core/readers/reader_mpk.py
@@ -0,0 +1,72 @@
+import os
+import tempfile
+import json
+from io import BytesIO
+import geopandas as gpd
+from py7zr import SevenZipFile
+import pyogrio
+
+
+def find_data_source(extract_dir: str):
+ """
+ Cari data sumber (.gdb atau .shp) di dalam folder hasil ekstrak.
+ """
+ for root, dirs, _ in os.walk(extract_dir):
+ for d in dirs:
+ if d.lower().endswith(".gdb"):
+ return os.path.join(root, d)
+
+ for root, _, files in os.walk(extract_dir):
+ for f in files:
+ if f.lower().endswith(".shp"):
+ return os.path.join(root, f)
+
+ raise ValueError("Tidak ditemukan data source yang didukung (.gdb atau .shp).")
+
+
+def get_main_layer(gdb_path: str):
+ """
+ Ambil nama layer utama dari geodatabase (.gdb).
+ """
+ try:
+ layers = pyogrio.list_layers(gdb_path)
+ for layer in layers:
+ if not layer[0].lower().endswith("__attach"):
+ return layer[0]
+ if layers:
+ return layers[0][0]
+ raise ValueError(f"Tidak ada layer utama yang valid di {gdb_path}")
+ except Exception as e:
+ raise ValueError(f"Gagal membaca daftar layer GDB: {e}")
+
+
+def read_mpk(path: str):
+ mpk_bytes = None
+ with open(path, "rb") as f:
+ mpk_bytes = f.read()
+
+ if not mpk_bytes:
+ raise ValueError("File MPK kosong atau tidak valid.")
+
+ with tempfile.TemporaryDirectory() as tempdir:
+ try:
+ with SevenZipFile(BytesIO(mpk_bytes), mode="r") as z:
+ z.extractall(path=tempdir)
+ except Exception as e:
+ raise ValueError(f"File MPK rusak atau tidak valid: {e}")
+
+ src_path = find_data_source(tempdir)
+
+ if src_path.lower().endswith(".gdb"):
+ layer_name = get_main_layer(src_path)
+ gdf = gpd.read_file(src_path, layer=layer_name)
+ else:
+ gdf = gpd.read_file(src_path)
+
+ if gdf.crs is None:
+ raise ValueError("CRS tidak terdeteksi. Pastikan file memiliki informasi proyeksi (.prj).")
+
+ gdf = gdf.to_crs(epsg=4326)
+
+ print(f"[INFO] Berhasil membaca {len(gdf)} fitur")
+ return gdf
\ No newline at end of file
diff --git a/app/mapset_pipeline/core/readers/reader_pdf.py b/app/mapset_pipeline/core/readers/reader_pdf.py
new file mode 100755
index 0000000..d3b772a
--- /dev/null
+++ b/app/mapset_pipeline/core/readers/reader_pdf.py
@@ -0,0 +1,288 @@
+import re
+import pdfplumber
+import pandas as pd
+from app.mapset_pipeline.utils.pdf_cleaner import get_number_column_index, get_start_end_number, normalize_number_column, row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column
+from services.upload_file.upload_exceptions import PDFReadError
+from utils.logger_config import setup_logger
+
+logger = setup_logger(__name__)
+
+def detect_header_rows(rows):
+ if not rows:
+ return []
+
+ ratios = [row_ratio(r) for r in rows]
+ body_start_index = None
+
+ for i in range(1, len(rows)):
+ row = rows[i]
+ if has_mixed_text_and_numbers(row):
+ body_start_index = i
+ break
+ if ratios[i] > 0.3:
+ body_start_index = i
+ break
+ if any(isinstance(c, str) and re.match(r'^\d+$', c.strip()) for c in row):
+ body_start_index = i
+ break
+ if ratios[i - 1] == 0 and ratios[i] > 0:
+ body_start_index = i
+ break
+
+ if body_start_index is None:
+ body_start_index = len(rows)
+
+ potential_headers = rows[:body_start_index]
+ body_filtered = rows[body_start_index:]
+ header_filtered = []
+ for idx, row in enumerate(potential_headers):
+ if is_short_text_row(row):
+ if idx + 1 < len(potential_headers) and ratios[idx + 1] == 0:
+ header_filtered.append(row)
+ else:
+ continue
+ else:
+ header_filtered.append(row)
+
+ return header_filtered, body_filtered
+
+
+def merge_multiline_header(header_rows):
+ final_header = []
+ for col in zip(*header_rows):
+ val = next((v for v in reversed(col) if v and str(v).strip()), '')
+ val = str(val).replace('\n', ' ').strip()
+ final_header.append(val)
+ final_header = [v for v in final_header if v not in ['', None]]
+ return final_header
+
+def merge_parsed_table(tables):
+ roots = []
+ fragments = []
+
+ # STEP 1: klasifikasi
+ for table in tables:
+ num_idx = get_number_column_index(table["columns"])
+ if num_idx is None:
+ roots.append(table)
+ continue
+
+ start_no, _ = get_start_end_number(table["rows"], num_idx)
+ if start_no == 1:
+ roots.append(table)
+ else:
+ fragments.append(table)
+
+ # STEP 2: merge fragment ke root
+ for frag in fragments:
+ frag_idx = get_number_column_index(frag["columns"])
+ f_start, _ = get_start_end_number(frag["rows"], frag_idx)
+
+ for root in roots:
+ if root["columns"] != frag["columns"]:
+ continue
+
+ root_idx = get_number_column_index(root["columns"])
+ _, r_end = get_start_end_number(root["rows"], root_idx)
+
+ if f_start == r_end + 1:
+ root["rows"].extend(frag["rows"])
+ break # fragment hanya boleh nempel ke 1 root
+
+ return roots
+
+
+def read_pdf(path: str, page: str):
+ """
+ Membaca tabel dari file PDF secara semi-otomatis menggunakan `pdfplumber`.
+
+ Alur utama proses:
+ 1. **Buka file PDF** menggunakan pdfplumber.
+ 2. **Pilih halaman** berdasarkan input `page` (misalnya "1,3-5" untuk halaman 1 dan 3–5).
+ 3. **Deteksi tabel** di setiap halaman yang dipilih.
+ 4. **Ekstraksi tabel mentah** (list of list) dari setiap halaman.
+ 5. **Pisahkan baris header dan body** dengan fungsi `detect_header_rows()`.
+ 6. **Gabungkan header multi-baris** (misalnya tabel dengan dua baris judul kolom).
+ 7. **Bersihkan body tabel** menggunakan `cleaning_column()`:
+ - Menghapus kolom nomor urut.
+ - Menyesuaikan jumlah kolom dengan header.
+ 8. **Gabungkan hasil akhir** ke dalam format JSON dengan struktur:
+ {
+ "title": ,
+ "columns": [...],
+ "rows": [...]
+ }
+ 9. **Filter tambahan** dengan `filter_geo_admin_column()` (khusus metadata geospasial).
+ 10. **Kembalikan hasil** berupa list JSON siap dikirim ke frontend API.
+
+ Args:
+ path (str): Lokasi file PDF yang akan dibaca.
+ page (str): Nomor halaman atau rentang halaman, contoh: "1", "2-4", "1,3-5".
+
+ Returns:
+ list[dict]: Daftar tabel hasil ekstraksi dengan struktur kolom dan baris.
+
+ Raises:
+ PDFReadError: Jika terjadi kesalahan saat membaca atau parsing PDF.
+ """
+ # try:
+ # pdf_path = path
+ # selectedPage = page if page else "1"
+ # tables_data = []
+
+ # with pdfplumber.open(pdf_path) as pdf:
+ # total_pages = len(pdf.pages)
+ # selected_pages = parse_page_selection(selectedPage, total_pages)
+
+ # logger.info(f"[INFO] Total Halaman PDF: {total_pages}")
+ # logger.info(f"[INFO] Total Halaman yang dipilih: {len(selected_pages)}")
+ # logger.info(f"[INFO] Halaman yang dipilih untuk dibaca: {selected_pages}")
+
+ # for page_num in selected_pages:
+ # pdf_page = pdf.pages[page_num - 1]
+ # tables = pdf_page.find_tables()
+ # logger.info(f"\n\n[INFO] Halaman {page_num}: {len(tables)} tabel terdeteksi")
+
+ # # pembacaan title ini tidak valid untuk halaman lanscape
+ # # for line in pdf_page.extract_text_lines():
+ # # if line['top'] > tables[0].bbox[1]:
+ # # break
+ # # previous_line = line
+ # # print('[TITLE]', previous_line['text'])
+
+ # for i, t in enumerate(tables, start=1):
+ # table = t.extract()
+ # if len(table) > 2:
+ # print(f"[TBL] tabel : {i} - halaman {page_num}")
+ # tables_data.append(table)
+
+ # logger.info(f"\nTotal tabel terbaca: {len(tables_data)}\n")
+
+ # header_only, body_only = [], []
+ # for tbl in tables_data:
+ # head, body = detect_header_rows(tbl)
+ # header_only.append(head)
+ # body_only.append(body)
+
+ # clean_header = [merge_multiline_header(h) for h in header_only]
+ # clean_body = []
+
+ # for i, raw_body in enumerate(body_only):
+ # con_body = [[cell for cell in row if cell not in (None, '')] for row in raw_body]
+ # cleaned = cleaning_column(clean_header[i], [con_body])
+ # clean_body.append(cleaned[0])
+
+ # parsed = []
+ # for i, (cols, rows) in enumerate(zip(clean_header, clean_body), start=1):
+ # parsed.append({
+ # "title": str(i),
+ # "columns": cols,
+ # "rows": rows
+ # })
+
+ # # =================================================================
+
+ # clean_parsed = filter_geo_admin_column(parsed)
+ # merge_parsed = merge_parsed_table(clean_parsed)
+
+ # logger.info(f"\nTotal tabel valid: {len(merge_parsed)}\n")
+
+ # ordered_tables = [normalize_number_column(t) for t in merge_parsed]
+ # return ordered_tables
+
+ # except Exception as e:
+ # raise PDFReadError(f"Gagal membaca PDF: {e}", code=422)
+
+ try:
+ pdf_path = path
+ selectedPage = page if page else "1"
+ tables_data = []
+
+ with pdfplumber.open(pdf_path) as pdf:
+ total_pages = len(pdf.pages)
+ selected_pages = parse_page_selection(selectedPage, total_pages)
+
+ logger.info(f"[INFO] Total Halaman PDF: {total_pages}")
+ logger.info(f"[INFO] Total Halaman yang dipilih: {len(selected_pages)}")
+ logger.info(f"[INFO] Halaman yang dipilih untuk dibaca: {selected_pages}")
+
+ for page_num in selected_pages:
+ pdf_page = pdf.pages[page_num - 1]
+ tables = pdf_page.find_tables()
+ logger.info(f"\n\n[INFO] Halaman {page_num}: {len(tables)} tabel terdeteksi")
+
+ # pembacaan title ini tidak valid untuk halaman lanscape
+ # for line in pdf_page.extract_text_lines():
+ # if line['top'] > tables[0].bbox[1]:
+ # break
+ # previous_line = line
+ # print('[TITLE]', previous_line['text'])
+
+ for i, t in enumerate(tables, start=1):
+ table = t.extract()
+ if len(table) > 2:
+ print(f"[TBL] tabel : {i} - halaman {page_num}")
+ tables_data.append({"page": f"halaman {page_num} - {i}", "table": table})
+
+ logger.info(f"\nTotal tabel terbaca: {len(tables_data)}\n")
+
+ header_only, body_only, page_info = [], [], []
+ for tbl in tables_data:
+ head, body = detect_header_rows(tbl["table"])
+ header_only.append(head)
+ body_only.append(body)
+ page_info.append(tbl["page"])
+
+ clean_header = [merge_multiline_header(h) for h in header_only]
+ clean_body = []
+
+ for i, raw_body in enumerate(body_only):
+ con_body = [[cell for cell in row if cell not in (None, '')] for row in raw_body]
+ cleaned = cleaning_column(clean_header[i], [con_body])
+ clean_body.append(cleaned[0])
+
+ parsed = []
+ for i, (cols, rows, page) in enumerate(zip(clean_header, clean_body, page_info), start=1):
+ parsed.append({
+ "title": page,
+ "columns": cols,
+ "rows": rows
+ })
+
+ # =================================================================
+
+ clean_parsed = filter_geo_admin_column(parsed)
+ merge_parsed = merge_parsed_table(clean_parsed)
+
+ logger.info(f"\nTotal tabel valid: {len(merge_parsed)}\n")
+
+ ordered_tables = [normalize_number_column(t) for t in merge_parsed]
+ return ordered_tables
+
+ except Exception as e:
+ raise PDFReadError(f"Gagal membaca PDF: {e}", code=422)
+
+
+def convert_df(payload):
+ try:
+ if "columns" not in payload or "rows" not in payload:
+ raise ValueError("Payload tidak memiliki key 'columns' atau 'rows'.")
+
+ if not isinstance(payload["columns"], list):
+ raise TypeError("'columns' harus berupa list.")
+ if not isinstance(payload["rows"], list):
+ raise TypeError("'rows' harus berupa list.")
+
+ for i, row in enumerate(payload["rows"]):
+ if len(row) != len(payload["columns"]):
+ raise ValueError(f"Jumlah elemen di baris ke-{i} tidak sesuai jumlah kolom.")
+
+ df = pd.DataFrame(payload["rows"], columns=payload["columns"])
+
+ if "title" in payload:
+ df.attrs["title"] = payload["title"]
+
+ return df
+
+ except Exception as e:
+ raise PDFReadError(f"Gagal konversi payload ke DataFrame: {e}", code=400)
diff --git a/app/mapset_pipeline/core/readers/reader_shp.py b/app/mapset_pipeline/core/readers/reader_shp.py
new file mode 100755
index 0000000..ff4bb4e
--- /dev/null
+++ b/app/mapset_pipeline/core/readers/reader_shp.py
@@ -0,0 +1,60 @@
+import geopandas as gpd
+import fiona
+import zipfile
+import tempfile
+import os
+import shutil
+from shapely.geometry import shape
+
+def read_shp(path: str):
+ if not path:
+ raise ValueError("Path shapefile tidak boleh kosong.")
+
+ tmpdir = None
+ shp_path = None
+
+ if path.lower().endswith(".zip"):
+ tmpdir = tempfile.mkdtemp()
+ with zipfile.ZipFile(path, "r") as zip_ref:
+ zip_ref.extractall(tmpdir)
+
+ shp_files = []
+ for root, _, files in os.walk(tmpdir):
+ for f in files:
+ if f.lower().endswith(".shp"):
+ shp_files.append(os.path.join(root, f))
+
+ if not shp_files:
+ raise ValueError("Tidak ditemukan file .shp di dalam ZIP.")
+ shp_path = shp_files[0]
+ print(f"[DEBUG] Membaca shapefile: {os.path.basename(shp_path)}")
+
+ else:
+ shp_path = path
+
+ try:
+ gdf = gpd.read_file(shp_path)
+ except Exception as e:
+ raise ValueError(f"Gagal membaca shapefile: {e}")
+
+ if "geometry" not in gdf.columns or gdf.geometry.is_empty.all():
+ print("[WARN] Geometry kosong. Mencoba membangun ulang dari fitur mentah...")
+
+ with fiona.open(shp_path) as src:
+ features = []
+ for feat in src:
+ geom = shape(feat["geometry"]) if feat["geometry"] else None
+ props = feat["properties"]
+ props["geometry"] = geom
+ features.append(props)
+
+ gdf = gpd.GeoDataFrame(features, geometry="geometry", crs=src.crs)
+
+ if gdf.crs is None:
+ # print("[WARN] CRS tidak terdeteksi. Diasumsikan EPSG:4326")
+ gdf.set_crs("EPSG:4326", inplace=True)
+
+ if tmpdir and os.path.exists(tmpdir):
+ shutil.rmtree(tmpdir)
+
+ return gdf
diff --git a/app/mapset_pipeline/data/repository.py b/app/mapset_pipeline/data/repository.py
new file mode 100755
index 0000000..3af018b
--- /dev/null
+++ b/app/mapset_pipeline/data/repository.py
@@ -0,0 +1,259 @@
+import os
+import json
+import asyncio
+import pandas as pd
+from shapely import wkt, wkb
+from shapely.geometry import MultiPolygon, MultiLineString
+from sqlalchemy import text
+from sqlalchemy.exc import SQLAlchemyError
+
+# Import koneksi database Anda
+from database.connection import engine
+from app.mapset_pipeline.utils.formatters import str_to_date
+
+
+async def generate_unique_table_name(base_name: str) -> str:
+ """Generate nama tabel unik, menambahkan suffix angka jika sudah ada."""
+ base_name = base_name.lower().replace(" ", "_").replace("-", "_")
+ table_name = base_name
+ counter = 2
+
+ async with engine.connect() as conn:
+ while True:
+ # Cek keberadaan tabel di schema public (atau default search path)
+ result = await conn.execute(
+ text("SELECT to_regclass(:tname)"),
+ {"tname": table_name}
+ )
+ exists = result.scalar()
+
+ if not exists:
+ return table_name
+
+ table_name = f"{base_name}_{counter}"
+ counter += 1
+
+
+async def insert_parquet_to_postgis(filename: str, table_name: str):
+ """
+ Membaca file parquet sementara, membersihkan data, dan melakukan COPY
+ ke PostGIS menggunakan asyncpg pool untuk performa tinggi.
+ """
+ from main import db_pool
+ file_path = os.path.join("tmp", filename)
+
+ if not os.path.exists(file_path):
+ raise FileNotFoundError(f"File temp {file_path} tidak ditemukan")
+
+ try:
+ loop = asyncio.get_running_loop()
+ # Baca parquet (CPU bound, run in executor jika file sangat besar)
+ df = await loop.run_in_executor(None, pd.read_parquet, file_path)
+
+ # 1. CLEANING NAMA KOLOM
+ df.columns = [str(col).strip().upper() for col in df.columns]
+
+ # Standarisasi kolom GEOM
+ if "GEOM" in df.columns:
+ df.rename(columns={"GEOM": "GEOM"}, inplace=True)
+
+ if "GEOM" not in df.columns:
+ raise ValueError("Kolom GEOM tidak ditemukan dalam Parquet")
+
+ # 2. PREPARE DATA ROWS
+ clean_rows = []
+ geom_types = set()
+
+ # Atribut selain GEOM
+ attr_columns = [col for col in df.columns if col != "GEOM"]
+
+ for row in df.itertuples(index=False):
+ # --- Handle GEOM ---
+ raw_geom = getattr(row, "GEOM", None)
+ if not raw_geom: continue
+
+ try:
+ geom = None
+ if isinstance(raw_geom, str):
+ geom = wkt.loads(raw_geom)
+ elif isinstance(raw_geom, bytes):
+ geom = wkb.loads(raw_geom)
+
+ if not geom: continue
+
+ # Fix Invalid Geometry
+ if not geom.is_valid:
+ geom = geom.buffer(0)
+
+ # Force Multi-Geometry agar seragam
+ gtype = geom.geom_type.upper()
+ if gtype == "POLYGON": geom = MultiPolygon([geom])
+ elif gtype == "LINESTRING": geom = MultiLineString([geom])
+
+ geom_types.add(geom.geom_type)
+
+ # Convert ke EWKT (SRID 4326)
+ ewkt = f"SRID=4326;{geom.wkt}"
+
+ except Exception:
+ continue # Skip baris dengan geom rusak
+
+ # --- Handle Attributes (FORCE STRING) ---
+ row_data = []
+ for col in attr_columns:
+ val = getattr(row, col, None)
+ if val is not None:
+ row_data.append(str(val))
+ else:
+ row_data.append(None)
+
+ row_data.append(ewkt)
+ clean_rows.append(tuple(row_data))
+
+ if not clean_rows:
+ raise ValueError("Data valid kosong setelah pemrosesan geometry")
+
+ # 3. DATABASE OPERATIONS
+ final_geom_type = list(geom_types)[0].upper() if geom_types else "GEOM"
+ if "MULTI" not in final_geom_type and final_geom_type != "GEOM":
+ final_geom_type = "MULTI" + final_geom_type
+
+ # A. CREATE TABLE
+ col_defs = [f'"{col}" TEXT' for col in attr_columns] # Semua atribut jadi TEXT dulu agar aman
+
+ create_sql = f"""
+ CREATE TABLE {table_name} (
+ _id SERIAL PRIMARY KEY,
+ {', '.join(col_defs)},
+ geom TEXT
+ );
+ """
+
+ async with db_pool.acquire() as conn:
+ # Create Table
+ await conn.execute(create_sql)
+
+ # B. COPY Data (Bulk Insert)
+ target_cols = attr_columns + ['geom']
+ # asyncpg otomatis meng-quote nama kolom
+ await conn.copy_records_to_table(
+ table_name,
+ records=clean_rows,
+ columns=target_cols
+ )
+
+ # C. ALTER COLUMN GEOMETRY & INDEX
+ alter_sql = f"""
+ ALTER TABLE {table_name}
+ ALTER COLUMN geom TYPE geometry({final_geom_type}, 4326)
+ USING ST_Force2D(geom::geometry)::geometry({final_geom_type}, 4326);
+
+ CREATE INDEX idx_{table_name}_geom ON {table_name} USING GIST (geom);
+ """
+ await conn.execute(alter_sql)
+
+ print(f"[SUCCESS] Upload {len(clean_rows)} baris ke tabel {table_name}.")
+
+ # Hapus file temp setelah sukses
+ try:
+ os.remove(file_path)
+ except OSError:
+ pass
+
+ return {
+ "table_name": table_name,
+ "row_count": len(clean_rows),
+ "geom_type": final_geom_type
+ }
+
+ except Exception as e:
+ print(f"[ERROR] Processing parquet to DB: {e}")
+ raise e
+
+
+async def save_author_metadata(payload_author: dict, table_name: str, dataset_title: str,
+ geom_types: list, row_count: int, user_id: int):
+ """
+ Menyimpan metadata author dan informasi dataset ke tabel backend.author_metadata.
+ """
+ query = text("""
+ INSERT INTO backend.author_metadata (
+ table_title,
+ dataset_title,
+ dataset_abstract,
+ keywords,
+ topic_category,
+ date_created,
+ dataset_status,
+ organization_name,
+ contact_person_name,
+ contact_email,
+ contact_phone,
+ geom_type,
+ user_id,
+ process,
+ geometry_count
+ ) VALUES (
+ :table_title,
+ :dataset_title,
+ :dataset_abstract,
+ :keywords,
+ :topic_category,
+ :date_created,
+ :dataset_status,
+ :organization_name,
+ :contact_person_name,
+ :contact_email,
+ :contact_phone,
+ :geom_type,
+ :user_id,
+ :process,
+ :geometry_count
+ )
+ """)
+
+ params = {
+ "table_title": table_name,
+ "dataset_title": dataset_title,
+ "dataset_abstract": payload_author.get("abstract"),
+ "keywords": payload_author.get("keywords"),
+ "topic_category": ", ".join(payload_author.get("topicCategory", [])),
+ "date_created": str_to_date(payload_author.get("dateCreated")),
+ "dataset_status": payload_author.get("status"),
+ "organization_name": payload_author.get("organization"),
+ "contact_person_name": payload_author.get("contactName"),
+ "contact_email": payload_author.get("contactEmail"),
+ "contact_phone": payload_author.get("contactPhone"),
+ "geom_type": json.dumps(geom_types),
+ "user_id": user_id,
+ "process": 'CLEANSING',
+ "geometry_count": row_count
+ }
+
+ async with engine.begin() as conn:
+ await conn.execute(query, params)
+
+
+async def call_cleansing_procedure(table_name: str):
+ """
+ Menjalankan stored procedure cleansing geometry di database.
+ """
+ try:
+ print(f"[INFO] Memulai cleansing database untuk tabel: {table_name}")
+
+ async with engine.begin() as conn:
+ # Menggunakan parameter binding yang aman
+ await conn.execute(
+ text("CALL pr_cleansing_satupeta_polygon(:table_name, NULL);"),
+ {"table_name": table_name}
+ )
+
+ print(f"[SUCCESS] Cleansing selesai untuk tabel: {table_name}")
+ return "done"
+
+ except SQLAlchemyError as e:
+ print(f"[ERROR] Cleansing database gagal: {e}")
+ # Kita raise error agar Service tahu kalau proses ini gagal
+ raise RuntimeError(f"Database cleansing failed: {str(e)}")
+
+
diff --git a/app/mapset_pipeline/service.py b/app/mapset_pipeline/service.py
new file mode 100755
index 0000000..2ead95d
--- /dev/null
+++ b/app/mapset_pipeline/service.py
@@ -0,0 +1,286 @@
+import os
+import shutil
+import pandas as pd
+from fastapi import UploadFile, HTTPException
+from typing import Optional
+
+# --- Internal Modules ---
+from .api.schemas import UploadRequest, PdfRequest
+from .core.processing.analyzer import analyze_and_clean_dataframe, publish_mapset
+from .core.readers import (
+ read_csv,
+ read_shp,
+ read_gdb,
+ read_mpk,
+ read_pdf,
+ convert_df
+)
+from .data.repository import (
+ generate_unique_table_name,
+ insert_parquet_to_postgis,
+ save_author_metadata,
+ call_cleansing_procedure
+)
+
+from app.mapset_pipeline.utils.file_ops import (
+ detect_zip_type,
+ generate_job_id,
+)
+from app.mapset_pipeline.utils.formatters import (
+ save_xml_to_sld,
+)
+
+# --- Legacy/External Modules (Sesuai kode asli Anda) ---
+from app.core.config import UPLOAD_FOLDER, MAX_FILE_MB, GEONETWORK_URL
+from utils.logger_config import log_activity
+
+# from api.routers.datasets_router import (
+# upload_to_main
+# )
+
+async def handle_file_analysis(
+ file: UploadFile,
+ page: Optional[str] = "",
+ sheet: Optional[str] = "",
+ fileDesc: Optional[str] = ""
+):
+ """
+ Orchestrator untuk endpoint /upload.
+ 1. Simpan file fisik.
+ 2. Pilih Reader berdasarkan ekstensi.
+ 3. Panggil Processor untuk analisis.
+ 4. Bersihkan file fisik.
+ """
+ fname = file.filename
+ ext = os.path.splitext(fname)[1].lower()
+
+ # 1. Validasi & Simpan File
+ # Membaca file in-memory untuk cek ukuran (hati-hati memory usage untuk file besar)
+ contents = await file.read()
+ size_mb = len(contents) / (1024 * 1024)
+ if size_mb > MAX_FILE_MB:
+ raise HTTPException(status_code=413, detail="Ukuran File Terlalu Besar")
+
+ tmp_path = UPLOAD_FOLDER / fname
+ # Pastikan folder ada
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+
+ with open(tmp_path, "wb") as f:
+ f.write(contents)
+
+ df = None
+ try:
+ # 2. Routing Reader Berdasarkan Ekstensi
+ print(f"[INFO] Processing file type: {ext}")
+
+ if ext == ".csv":
+ df = read_csv(str(tmp_path))
+ elif ext == ".xlsx":
+ df = read_csv(str(tmp_path), sheet) # Asumsi read_csv handle xlsx juga sesuai kode asli
+ elif ext == ".mpk":
+ df = read_mpk(str(tmp_path))
+ elif ext == ".pdf":
+ # Logic PDF agak unik, bisa return list tabel atau df
+ tbl = read_pdf(tmp_path, page)
+ if len(tbl) == 0:
+ return {
+ "message": "Tidak ditemukan tabel valid pada halaman yang dipilih",
+ "tables": {},
+ "file_type": ext
+ }
+ elif len(tbl) > 1:
+ return {
+ "message": "File berhasil dibaca, ditemukan banyak tabel.",
+ "tables": tbl,
+ "file_type": ext
+ }
+ else:
+ df = convert_df(tbl[0])
+ elif ext == ".zip":
+ zip_type = detect_zip_type(str(tmp_path))
+ if zip_type == "shp":
+ df = read_shp(str(tmp_path))
+ elif zip_type == "gdb":
+ df = read_gdb(str(tmp_path))
+ else:
+ raise HTTPException(status_code=400, detail="ZIP file tidak mengandung SHP / GDB valid.")
+ else:
+ raise HTTPException(status_code=400, detail="Unsupported file type")
+
+ # Cek Dataframe Kosong
+ if df is None or (hasattr(df, "empty") and df.empty):
+ raise HTTPException(status_code=422, detail="File berhasil dibaca, tetapi tidak ditemukan tabel valid")
+
+ # 3. Panggil Processor (Logic Cleaning & Validasi)
+ result_analysis = await analyze_and_clean_dataframe(df, ext, fname, fileDesc)
+ return result_analysis
+
+ except Exception as e:
+ print(f"[ERROR] handle_file_analysis: {e}")
+ raise HTTPException(status_code=500, detail=str(e))
+
+ finally:
+ # 4. Cleanup Uploaded File (Raw File)
+ # Kita hapus file upload asli, tapi file temp parquet (hasil processor)
+ # tetap hidup sampai frontend mengirim request ingest
+ if tmp_path.exists():
+ try:
+ os.remove(tmp_path)
+ except Exception:
+ pass
+
+
+async def process_pdf_file(payload: PdfRequest):
+ """
+ Helper khusus jika user mengupload PDF dan ingin memilih tabel tertentu.
+ """
+ try:
+ # Convert request body ke DataFrame (sesuai logic reader_pdf)
+ # Kita mock convert_df karena di kode asli import dari reader_pdf
+ # yang mungkin mengharapkan format dict khusus
+ df = convert_df(payload.model_dump())
+
+ if df is None or (hasattr(df, "empty") and df.empty):
+ raise HTTPException(status_code=422, detail="Tidak ada tabel valid dalam PDF")
+
+ # Reuse logic processor yang sama
+ return await analyze_and_clean_dataframe(
+ df, '.pdf', payload.fileName, payload.fileDesc
+ )
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+
+async def execute_postgis_ingestion(payload: UploadRequest, user_id: int):
+ """
+ Orchestrator untuk endpoint /process-to-postgis.
+ 1. Terima data (JSON rows).
+ 2. Convert ke Parquet Temporary.
+ 3. Upload ke PostGIS (via Repository).
+ 4. Simpan Metadata (via Repository).
+ 5. Trigger Cleansing & Publishing.
+ 6. Logging.
+ """
+ job_id = generate_job_id(str(user_id))
+
+ try:
+ # 1. Generate Nama Tabel
+ table_name = await generate_unique_table_name(payload.title)
+
+ # 2. Persiapan Data (JSON -> DataFrame -> Parquet)
+ # Kita perlu save ke parquet karena repository insert_parquet_to_postgis membaca file
+ # Ini juga memisahkan memory load antara API dan DB Process
+ df = pd.DataFrame(payload.rows)
+
+ # Upper case columns
+ df.columns = [col.upper() for col in df.columns]
+
+ # Rename Geometry jika perlu (standarisasi input dari frontend)
+ if "GEOMETRY" in df.columns:
+ df.rename(columns={"GEOMETRY": "GEOM"}, inplace=True)
+
+ # Simpan ke file temp untuk diproses repository
+ temp_parquet_name = f"{job_id}.parquet"
+ temp_parquet_path = os.path.join("tmp", temp_parquet_name)
+ os.makedirs("tmp", exist_ok=True)
+
+ # Save parquet (gunakan engine pyarrow atau fastparquet)
+ df.to_parquet(temp_parquet_path, index=False)
+
+ # 3. Insert ke PostGIS
+ # Fungsi ini akan membaca file parquet tadi, membersihkan geom, dan copy ke DB
+ db_result = await insert_parquet_to_postgis(temp_parquet_name, table_name)
+
+ # 4. Simpan Metadata
+ # Ambil list geom type dan row count dari hasil insert DB (lebih akurat)
+ final_geom_types = [db_result['geom_type']] # Disederhanakan jadi list
+ row_count = db_result['row_count']
+
+ await save_author_metadata(
+ payload_author=payload.author,
+ table_name=table_name,
+ dataset_title=payload.title,
+ geom_types=final_geom_types,
+ row_count=row_count,
+ user_id=user_id
+ )
+
+ # 5. Logging Activity
+ await log_activity(
+ user_id=user_id,
+ action_type="UPLOAD",
+ action_title=f"Upload dataset {table_name}",
+ details={"table_name": table_name, "rows": row_count}
+ )
+
+ # 6. Post-Processing (External APIs)
+ result = {
+ "job_id": job_id,
+ "job_status": "wait",
+ "table_name": table_name,
+ "status": "success",
+ "message": f"Tabel '{table_name}' berhasil dibuat.",
+ "total_rows": row_count,
+ "geometry_type": final_geom_types,
+ "crs": payload.author.get("crs", "EPSG:4326"),
+ "metadata_uuid": ""
+ }
+
+ # Save Style (SLD)
+ save_xml_to_sld(payload.style, job_id)
+
+ # CLEANSING WITH QUERY
+ try:
+ cleansing_status = await call_cleansing_procedure(table_name)
+ except Exception as e:
+ cleansing_status = "failed"
+ print(f"Cleansing warning: {e}")
+ result['job_status'] = cleansing_status
+
+ # Publish Layer (Geoserver/Geonetwork)
+ publish_info = await publish_mapset(table_name, job_id)
+ result['metadata_uuid'] = publish_info.get('uuid', '')
+
+ # 7. Upload to Main Portal (Mapset Integration)
+ mapset_payload = {
+ "name": payload.title,
+ "description": payload.author.get("abstract"),
+ "scale": "1:25000",
+ # ID Hardcoded sesuai kode asli (pertimbangkan pindah ke config/env)
+ 'projection_system_id': '0196c746-d1ba-7f1c-9706-5df738679cc7',
+ "category_id": payload.author.get("mapsetCategory"),
+ "data_status": "sementara",
+ 'classification_id': '01968b4b-d3f9-76c9-888c-ee887ac31ce4',
+ 'producer_id': '01968b54-0000-7a67-bd10-975b8923b93e',
+ "layer_type": final_geom_types[0],
+ 'source_id': ['019c03ef-35e1-738b-858d-871dc7d1e4d6'],
+ "layer_url": publish_info.get('geos_link', ''),
+ "metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish_info.get('uuid', '')}",
+ "coverage_level": "provinsi",
+ "coverage_area": "kabupaten",
+ "data_update_period": "Tahunan",
+ "data_version": "2026",
+ "is_popular": False,
+ "is_active": True,
+ 'regional_id': '01968b53-a910-7a67-bd10-975b8923b92e',
+ "notes": "Mapset baru dibuat",
+ "status_validation": "on_verification",
+ }
+
+ # await upload_to_main(mapset_payload)
+
+ return result
+
+ except Exception as e:
+ # Error Handling & Logging
+ await log_activity(
+ user_id=user_id,
+ action_type="ERROR",
+ action_title="Upload gagal",
+ details={"error": str(e)}
+ )
+ print(f"[ERROR] execute_postgis_ingestion: {e}")
+ # Re-raise sebagai HTTP Exception agar router mengembalikan 500 yang rapi
+ raise HTTPException(status_code=500, detail=str(e))
+
+
diff --git a/app/mapset_pipeline/utils/file_ops.py b/app/mapset_pipeline/utils/file_ops.py
new file mode 100755
index 0000000..7029ef2
--- /dev/null
+++ b/app/mapset_pipeline/utils/file_ops.py
@@ -0,0 +1,105 @@
+import os
+import uuid
+import zipfile
+import geopandas as gpd
+from shapely import wkt
+from shapely.errors import ShapelyError
+from datetime import datetime
+
+
+def detect_zip_type(zip_path: str) -> str:
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
+ files = zip_ref.namelist()
+
+ if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files):
+ return "gdb"
+
+ if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files):
+ return "gdb"
+
+ if any(f.lower().endswith(".shp") for f in files):
+ return "shp"
+
+ return "unknown"
+
+
+def generate_unique_filename(folder="tmp", ext="parquet", digits=6):
+ os.makedirs(folder, exist_ok=True)
+ while True:
+ file_id = file_id = uuid.uuid4().int
+ filename = f"{folder}/{file_id}.{ext}"
+
+ if not os.path.exists(filename):
+ return filename
+
+
+def generate_job_id(user_id: str) -> str:
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+ return f"{user_id}_{timestamp}"
+
+
+def dataframe_validation(df_input, tmp_file):
+ """
+ Fungsi ini berjalan di thread terpisah (CPU bound).
+ Melakukan validasi, cleaning, dan export ke parquet.
+ """
+ # 1. Copy agar tidak mengubah data asli
+ export_df = df_input.copy()
+
+ # =========================================================================
+ # TAHAP 1: SAFE WKT LOADING
+ # =========================================================================
+ def safe_load_wkt(raw):
+ if not isinstance(raw, str):
+ return None
+ try:
+ return wkt.loads(raw)
+ # 2. GANTI CATCH BLOCK INI
+ # except (WKTReadingError, Exception): <-- LAMA
+ except (ShapelyError, Exception):
+ return None
+
+ # Terapkan safe load
+ export_df["geom"] = export_df["geometry"].apply(safe_load_wkt)
+
+ # =========================================================================
+ # TAHAP 2: FILTER NULL & INVALID GEOMETRY
+ # =========================================================================
+ # Hapus baris di mana konversi WKT gagal (None)
+ export_df = export_df[export_df["geom"].notnull()]
+ print("df", export_df)
+ if export_df.empty:
+ raise ValueError("Tidak ada data spasial valid yang ditemukan.")
+
+ # Jadikan GeoDataFrame
+ export_df = gpd.GeoDataFrame(export_df, geometry="geom")
+
+ # =========================================================================
+ # TAHAP 3: FIX TOPOLOGY (PENTING!)
+ # =========================================================================
+ # Cek validitas (misal: Polygon yang garisnya menabrak diri sendiri)
+ # buffer(0) adalah trik standar GIS untuk memperbaiki topologi ringan
+ export_df["geom"] = export_df["geom"].apply(
+ lambda g: g.buffer(0) if not g.is_valid else g
+ )
+
+ # Hapus lagi jika setelah di-fix malah jadi kosong (jarang terjadi, tapi aman)
+ export_df = export_df[~export_df["geom"].is_empty]
+
+ # =========================================================================
+ # TAHAP 4: FINALISASI (CRS & RENAME)
+ # =========================================================================
+ export_df = export_df.drop(columns=["geometry"]) # Buang kolom string WKT lama
+ export_df = export_df.set_crs("EPSG:4326", allow_override=True)
+
+ # Rename kolom atribut ke UPPERCASE, biarkan 'geom' lowercase
+ # .strip() untuk membuang spasi hantu (" ID " -> "ID")
+ export_df = export_df.rename(
+ columns=lambda c: str(c).strip().upper() if c != "geom" else c
+ )
+
+ # Simpan ke Parquet
+ export_df.to_parquet(tmp_file)
+
+ return len(export_df)
+
diff --git a/app/mapset_pipeline/utils/formatters.py b/app/mapset_pipeline/utils/formatters.py
new file mode 100755
index 0000000..504e3bf
--- /dev/null
+++ b/app/mapset_pipeline/utils/formatters.py
@@ -0,0 +1,43 @@
+import os
+import pandas as pd
+import numpy as np
+from shapely.geometry import base as shapely_base
+from shapely.geometry.base import BaseGeometry
+from datetime import datetime
+
+
+def safe_json(value):
+ """Konversi aman untuk semua tipe numpy/pandas/shapely ke tipe JSON-serializable"""
+ if isinstance(value, (np.int64, np.int32)):
+ return int(value)
+ if isinstance(value, (np.float64, np.float32)):
+ return float(value)
+ if isinstance(value, pd.Timestamp):
+ return value.isoformat()
+ if isinstance(value, shapely_base.BaseGeometry):
+ return str(value) # convert to WKT string
+ if pd.isna(value):
+ return None
+ return value
+
+
+def str_to_date(raw_date: str):
+ if raw_date:
+ try:
+ return datetime.strptime(raw_date, "%Y-%m-%d").date()
+ except Exception as e:
+ print("[WARNING] Tidak bisa parse dateCreated:", e)
+ return None
+
+
+def save_xml_to_sld(xml_string, filename):
+ folder_path = 'style_temp'
+ os.makedirs(folder_path, exist_ok=True)
+
+ file_path = os.path.join(folder_path, f"{filename}.sld")
+
+ with open(file_path, "w", encoding="utf-8") as f:
+ f.write(xml_string)
+
+ return file_path
+
diff --git a/app/mapset_pipeline/utils/pdf_cleaner.py b/app/mapset_pipeline/utils/pdf_cleaner.py
new file mode 100755
index 0000000..69d84c0
--- /dev/null
+++ b/app/mapset_pipeline/utils/pdf_cleaner.py
@@ -0,0 +1,208 @@
+import re
+import itertools
+
+geo_admin_keywords = [
+ 'lat', 'lon', 'long', 'latitude', 'longitude', 'koordinat', 'geometry', 'geometri',
+ 'desa', 'kelurahan', 'kel', 'kecamatan', 'kabupaten', 'kab', 'kota', 'provinsi',
+ 'lokasi', 'region', 'area', 'zone', 'boundary', 'batas'
+]
+
+def normalize_text(text):
+ text = text.lower()
+ text = re.sub(r'[^a-z0-9/ ]+', ' ', text)
+ text = re.sub(r'\s+', ' ', text).strip()
+ return text
+
+def generate_combined_patterns(keywords):
+ combos = list(itertools.combinations(keywords, 2))
+ patterns = []
+ for a, b in combos:
+ patterns.append(rf'{a}\s*/\s*{b}')
+ patterns.append(rf'{b}\s*/\s*{a}')
+ return patterns
+
+combined_patterns = generate_combined_patterns(geo_admin_keywords)
+
+def contains_geo_admin_keywords(text):
+ text_clean = normalize_text(text)
+ if len(text_clean) < 3:
+ return False
+
+ for pattern in combined_patterns:
+ if re.search(pattern, text_clean):
+ return True
+
+ for kw in geo_admin_keywords:
+ if re.search(rf'(^|[\s/_-]){kw}([\s/_-]|$)', text_clean):
+ return True
+
+ return False
+
+def filter_geo_admin_column(tables):
+ filtered = []
+ for table in tables:
+ found = any(contains_geo_admin_keywords(col) for col in table['columns'])
+ if found:
+ filtered.append(table)
+ return filtered
+
+
+NUMBER_HEADER_KEYWORDS = [
+ "no","no.","nomor","nomor urut","no urut","No","Nomor","No Urut","Index",
+ "ID","Sr No","S/N","SN","Sl No"
+]
+
+def has_number_header(header):
+ header_text = header
+ return any(keyword in header_text for keyword in NUMBER_HEADER_KEYWORDS)
+
+def is_numbering_column(col_values):
+ numeric_like = 0
+ total = 0
+ for v in col_values:
+ if not v or not isinstance(v, str):
+ continue
+ total += 1
+ if re.fullmatch(r"0*\d{1,3}", v.strip()):
+ numeric_like += 1
+ return total > 0 and (numeric_like / total) > 0.6
+
+def is_numeric_value(v):
+ if v is None:
+ return False
+ if isinstance(v, (int, float)):
+ return True
+ if isinstance(v, str) and re.fullmatch(r"0*\d{1,3}", v.strip()):
+ return True
+ return False
+
+def cleaning_column(headers, bodies):
+ cleaned_bodies = []
+
+ for header, body in zip(headers, bodies):
+ if not body:
+ cleaned_bodies.append(body)
+ continue
+
+ header_has_number = has_number_header(header)
+ first_col = [row[0] for row in body if row and len(row) > 0]
+ first_col_is_numbering = is_numbering_column(first_col)
+
+ if not header_has_number and first_col_is_numbering:
+ new_body = []
+ for row in body:
+ if not row:
+ continue
+ first_val = row[0]
+ if is_numeric_value(first_val) and len(row) > 1:
+ new_body.append(row[1:])
+ else:
+ new_body.append(row)
+ body = new_body
+
+ header_len = len(headers)
+ filtered_body = [row for row in body if len(row) == header_len]
+
+ cleaned_bodies.append(filtered_body)
+
+ return cleaned_bodies
+
+def parse_page_selection(selectedPage: str, total_pages: int):
+ if not selectedPage:
+ return list(range(1, total_pages + 1))
+
+ pages = set()
+ parts = re.split(r'[,\s]+', selectedPage.strip())
+
+ for part in parts:
+ if '-' in part:
+ try:
+ start, end = map(int, part.split('-'))
+ pages.update(range(start, end + 1))
+ except ValueError:
+ continue
+ else:
+ try:
+ pages.add(int(part))
+ except ValueError:
+ continue
+
+ valid_pages = [p for p in sorted(pages) if 1 <= p <= total_pages]
+ return valid_pages
+
+def is_number(s):
+ if s is None:
+ return False
+ s = str(s).strip().replace(',', '').replace('.', '')
+ return s.isdigit()
+
+def row_ratio(row):
+ non_empty = [c for c in row if c not in (None, '', ' ')]
+ if not non_empty:
+ return 0
+ num_count = sum(is_number(c) for c in non_empty)
+ return num_count / len(non_empty)
+
+def has_mixed_text_and_numbers(row):
+ non_empty = [c for c in row if c not in (None, '', ' ')]
+ has_text = any(isinstance(c, str) and re.search(r'[A-Za-z]', str(c)) for c in non_empty)
+ has_num = any(is_number(c) for c in non_empty)
+ return has_text and has_num
+
+def is_short_text_row(row):
+ """Deteksi baris teks pendek (1-2 kolom teks pendek)."""
+ non_empty = [str(c).strip() for c in row if c not in (None, '', ' ')]
+ if not non_empty:
+ return False
+ text_only = all(not is_number(c) for c in non_empty)
+ joined = " ".join(non_empty)
+ return text_only and len(non_empty) <= 2 and len(joined) < 20
+
+
+
+
+
+
+
+
+def get_number_column_index(columns):
+ for i, col in enumerate(columns):
+ if has_number_header(col):
+ return i
+ return None
+
+def get_start_end_number(rows, idx):
+ try:
+ start_no = int(rows[0][idx])
+ end_no = int(rows[-1][idx])
+ return start_no, end_no
+ except:
+ return None, None
+
+def normalize_number_column(table):
+ columns = table["columns"]
+ rows = table["rows"]
+
+ num_idx = get_number_column_index(columns)
+ if num_idx is None:
+ return table
+
+ current = None
+
+ for row in rows:
+ try:
+ val = int(row[num_idx])
+ except:
+ continue
+
+ if current is None:
+ current = val
+ else:
+ if val <= current:
+ current += 1
+ else:
+ current = val
+
+ row[num_idx] = str(current)
+
+ return table
diff --git a/app/models/__init__.py b/app/models/__init__.py
old mode 100644
new mode 100755
diff --git a/app/models/base.py b/app/models/base.py
old mode 100644
new mode 100755
diff --git a/app/models/category_model.py b/app/models/category_model.py
old mode 100644
new mode 100755
diff --git a/app/models/classification_model.py b/app/models/classification_model.py
old mode 100644
new mode 100755
diff --git a/app/models/credential_model.py b/app/models/credential_model.py
old mode 100644
new mode 100755
diff --git a/app/models/feedback_model.py b/app/models/feedback_model.py
old mode 100644
new mode 100755
diff --git a/app/models/file_model.py b/app/models/file_model.py
old mode 100644
new mode 100755
diff --git a/app/models/map_access_model.py b/app/models/map_access_model.py
old mode 100644
new mode 100755
diff --git a/app/models/map_projection_system_model.py b/app/models/map_projection_system_model.py
old mode 100644
new mode 100755
diff --git a/app/models/map_source_model.py b/app/models/map_source_model.py
old mode 100644
new mode 100755
diff --git a/app/models/mapset_history_model.py b/app/models/mapset_history_model.py
old mode 100644
new mode 100755
diff --git a/app/models/mapset_model.py b/app/models/mapset_model.py
old mode 100644
new mode 100755
diff --git a/app/models/news_model.py b/app/models/news_model.py
old mode 100644
new mode 100755
diff --git a/app/models/organization_model.py b/app/models/organization_model.py
old mode 100644
new mode 100755
diff --git a/app/models/refresh_token_model.py b/app/models/refresh_token_model.py
old mode 100644
new mode 100755
diff --git a/app/models/regional_model.py b/app/models/regional_model.py
old mode 100644
new mode 100755
diff --git a/app/models/role_model.py b/app/models/role_model.py
old mode 100644
new mode 100755
diff --git a/app/models/user_model.py b/app/models/user_model.py
old mode 100644
new mode 100755
diff --git a/app/repositories/__init__.py b/app/repositories/__init__.py
old mode 100644
new mode 100755
diff --git a/app/repositories/base.py b/app/repositories/base.py
old mode 100644
new mode 100755
diff --git a/app/repositories/category_repository.py b/app/repositories/category_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/classification_repository.py b/app/repositories/classification_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/credential_repository.py b/app/repositories/credential_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/feedback_repository.py b/app/repositories/feedback_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/file_repository.py b/app/repositories/file_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/map_access_repository.py b/app/repositories/map_access_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/map_projection_system_repository.py b/app/repositories/map_projection_system_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/map_source_repository.py b/app/repositories/map_source_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/map_source_usage_repository.py b/app/repositories/map_source_usage_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/mapset_history_repository.py b/app/repositories/mapset_history_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/mapset_repository.py b/app/repositories/mapset_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/news_repository.py b/app/repositories/news_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/organization_repository.py b/app/repositories/organization_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/regional_repository.py b/app/repositories/regional_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/role_repository.py b/app/repositories/role_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/token_repository.py b/app/repositories/token_repository.py
old mode 100644
new mode 100755
diff --git a/app/repositories/user_repository.py b/app/repositories/user_repository.py
old mode 100644
new mode 100755
diff --git a/app/response/res.py b/app/response/res.py
old mode 100644
new mode 100755
diff --git a/app/schemas/__init__.py b/app/schemas/__init__.py
old mode 100644
new mode 100755
diff --git a/app/schemas/base.py b/app/schemas/base.py
old mode 100644
new mode 100755
diff --git a/app/schemas/category_schema.py b/app/schemas/category_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/classification_schema.py b/app/schemas/classification_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/count_schema.py b/app/schemas/count_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/credential_schema.py b/app/schemas/credential_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/error_schema.py b/app/schemas/error_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/feedback_schema.py b/app/schemas/feedback_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/file_schema.py b/app/schemas/file_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/map_access_schema.py b/app/schemas/map_access_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/map_projection_system_schema.py b/app/schemas/map_projection_system_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/map_source_schema.py b/app/schemas/map_source_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/mapset_history_schema.py b/app/schemas/mapset_history_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/mapset_schema.py b/app/schemas/mapset_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/news_schema.py b/app/schemas/news_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/organization_schema.py b/app/schemas/organization_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/regional_schema.py b/app/schemas/regional_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/role_schema.py b/app/schemas/role_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/token_schema.py b/app/schemas/token_schema.py
old mode 100644
new mode 100755
diff --git a/app/schemas/user_schema.py b/app/schemas/user_schema.py
old mode 100644
new mode 100755
diff --git a/app/services/__init__.py b/app/services/__init__.py
old mode 100644
new mode 100755
diff --git a/app/services/auth_service.py b/app/services/auth_service.py
old mode 100644
new mode 100755
diff --git a/app/services/base.py b/app/services/base.py
old mode 100644
new mode 100755
diff --git a/app/services/category_service.py b/app/services/category_service.py
old mode 100644
new mode 100755
diff --git a/app/services/classification_service.py b/app/services/classification_service.py
old mode 100644
new mode 100755
diff --git a/app/services/count_service.py b/app/services/count_service.py
old mode 100644
new mode 100755
diff --git a/app/services/credential_service.py b/app/services/credential_service.py
old mode 100644
new mode 100755
diff --git a/app/services/feedback_service.py b/app/services/feedback_service.py
old mode 100644
new mode 100755
diff --git a/app/services/file_service.py b/app/services/file_service.py
old mode 100644
new mode 100755
diff --git a/app/services/map_access_service.py b/app/services/map_access_service.py
old mode 100644
new mode 100755
diff --git a/app/services/map_projection_system_service.py b/app/services/map_projection_system_service.py
old mode 100644
new mode 100755
diff --git a/app/services/map_source_service.py b/app/services/map_source_service.py
old mode 100644
new mode 100755
diff --git a/app/services/mapset_history_service.py b/app/services/mapset_history_service.py
old mode 100644
new mode 100755
diff --git a/app/services/mapset_service.py b/app/services/mapset_service.py
old mode 100644
new mode 100755
diff --git a/app/services/news_service.py b/app/services/news_service.py
old mode 100644
new mode 100755
diff --git a/app/services/organization_service.py b/app/services/organization_service.py
old mode 100644
new mode 100755
diff --git a/app/services/regional_service.py b/app/services/regional_service.py
old mode 100644
new mode 100755
diff --git a/app/services/role_service.py b/app/services/role_service.py
old mode 100644
new mode 100755
diff --git a/app/services/user_service.py b/app/services/user_service.py
old mode 100644
new mode 100755
diff --git a/app/utils/__init__.py b/app/utils/__init__.py
old mode 100644
new mode 100755
diff --git a/app/utils/encryption.py b/app/utils/encryption.py
old mode 100644
new mode 100755
diff --git a/app/utils/helpers.py b/app/utils/helpers.py
old mode 100644
new mode 100755
diff --git a/app/utils/logger_config.py b/app/utils/logger_config.py
old mode 100644
new mode 100755
diff --git a/app/utils/system.py b/app/utils/system.py
old mode 100644
new mode 100755
diff --git a/docker-compose.yml b/docker-compose.yml
old mode 100644
new mode 100755
diff --git a/environment.env b/environment.env
old mode 100644
new mode 100755
diff --git a/migrations/README b/migrations/README
old mode 100644
new mode 100755
diff --git a/migrations/env.py b/migrations/env.py
old mode 100644
new mode 100755
diff --git a/migrations/script.py.mako b/migrations/script.py.mako
old mode 100644
new mode 100755
diff --git a/migrations/scripts.py b/migrations/scripts.py
old mode 100644
new mode 100755
diff --git a/migrations/versions/20241203_1200_initial_schema.py b/migrations/versions/20241203_1200_initial_schema.py
old mode 100644
new mode 100755
diff --git a/migrations/versions/20241204_0000_seed_initial_data.py b/migrations/versions/20241204_0000_seed_initial_data.py
old mode 100644
new mode 100755
diff --git a/migrations/versions/__init__.py b/migrations/versions/__init__.py
old mode 100644
new mode 100755
diff --git a/poetry.lock b/poetry.lock
old mode 100644
new mode 100755
diff --git a/pyproject.toml b/pyproject.toml
old mode 100644
new mode 100755
diff --git a/run.py b/run.py
old mode 100644
new mode 100755
diff --git a/tests/__init__.py b/tests/__init__.py
old mode 100644
new mode 100755
diff --git a/tests/conftest.py b/tests/conftest.py
old mode 100644
new mode 100755
diff --git a/tests/test_api/__init__.py b/tests/test_api/__init__.py
old mode 100644
new mode 100755
diff --git a/tests/test_services/__init__.py b/tests/test_services/__init__.py
old mode 100644
new mode 100755