From 6c0d8729f7a7bcb03e5aa51b3054710663c00979 Mon Sep 17 00:00:00 2001 From: DmsAnhr Date: Mon, 23 Feb 2026 12:20:42 +0700 Subject: [PATCH] export --- .DS_Store | Bin .env.example | 0 .gitignore | 3 + .pre-commit-config.yaml | 0 .python-version | 0 Dockerfile | 0 README.md | 0 addons.txt | 11 + alembic.ini | 0 app/__init__.py | 0 app/api/dependencies/__init__.py | 0 app/api/dependencies/auth.py | 0 app/api/dependencies/database.py | 0 app/api/dependencies/factory.py | 0 app/api/v1/__init__.py | 0 app/api/v1/routes/__init__.py | 0 app/api/v1/routes/auth_route.py | 0 app/api/v1/routes/category_route.py | 0 app/api/v1/routes/classification_route.py | 0 app/api/v1/routes/count_route.py | 0 app/api/v1/routes/credential_route.py | 0 app/api/v1/routes/feedback_route.py | 0 app/api/v1/routes/file_route.py | 0 app/api/v1/routes/geonetwork_route.py | 0 .../v1/routes/map_projection_system_route.py | 0 app/api/v1/routes/map_source_route.py | 0 app/api/v1/routes/mapset_history_route.py | 0 app/api/v1/routes/mapset_route.py | 0 app/api/v1/routes/news_route.py | 0 app/api/v1/routes/organization_route.py | 0 app/api/v1/routes/regional_route.py | 0 app/api/v1/routes/role_route.py | 0 app/api/v1/routes/user_route.py | 0 app/core/__init__.py | 0 app/core/config.py | 0 app/core/data_types.py | 0 app/core/database.py | 0 app/core/exceptions.py | 0 app/core/minio_client.py | 0 app/core/params.py | 0 app/core/responses.py | 0 app/core/security.py | 0 app/main.py | 0 app/mapset_pipeline/__init__,.py | 0 app/mapset_pipeline/api/router.py | 41 ++ app/mapset_pipeline/api/schemas.py | 17 + app/mapset_pipeline/core/clients/ai_client.py | 49 ++ .../core/processing/analyzer.py | 181 +++++ .../core/processing/geometry_build.py | 466 ++++++++++++ .../core/publication/publish_geonetwork.py | 693 ++++++++++++++++++ .../core/publication/publish_geoserver.py | 300 ++++++++ app/mapset_pipeline/core/readers/__init__.py | 18 + .../core/readers/reader_csv.py | 228 ++++++ .../core/readers/reader_gdb.py | 75 ++ .../core/readers/reader_mpk.py | 72 ++ .../core/readers/reader_pdf.py | 288 ++++++++ .../core/readers/reader_shp.py | 60 ++ app/mapset_pipeline/data/repository.py | 259 +++++++ app/mapset_pipeline/service.py | 286 ++++++++ app/mapset_pipeline/utils/file_ops.py | 105 +++ app/mapset_pipeline/utils/formatters.py | 43 ++ app/mapset_pipeline/utils/pdf_cleaner.py | 208 ++++++ app/models/__init__.py | 0 app/models/base.py | 0 app/models/category_model.py | 0 app/models/classification_model.py | 0 app/models/credential_model.py | 0 app/models/feedback_model.py | 0 app/models/file_model.py | 0 app/models/map_access_model.py | 0 app/models/map_projection_system_model.py | 0 app/models/map_source_model.py | 0 app/models/mapset_history_model.py | 0 app/models/mapset_model.py | 0 app/models/news_model.py | 0 app/models/organization_model.py | 0 app/models/refresh_token_model.py | 0 app/models/regional_model.py | 0 app/models/role_model.py | 0 app/models/user_model.py | 0 app/repositories/__init__.py | 0 app/repositories/base.py | 0 app/repositories/category_repository.py | 0 app/repositories/classification_repository.py | 0 app/repositories/credential_repository.py | 0 app/repositories/feedback_repository.py | 0 app/repositories/file_repository.py | 0 app/repositories/map_access_repository.py | 0 .../map_projection_system_repository.py | 0 app/repositories/map_source_repository.py | 0 .../map_source_usage_repository.py | 0 app/repositories/mapset_history_repository.py | 0 app/repositories/mapset_repository.py | 0 app/repositories/news_repository.py | 0 app/repositories/organization_repository.py | 0 app/repositories/regional_repository.py | 0 app/repositories/role_repository.py | 0 app/repositories/token_repository.py | 0 app/repositories/user_repository.py | 0 app/response/res.py | 0 app/schemas/__init__.py | 0 app/schemas/base.py | 0 app/schemas/category_schema.py | 0 app/schemas/classification_schema.py | 0 app/schemas/count_schema.py | 0 app/schemas/credential_schema.py | 0 app/schemas/error_schema.py | 0 app/schemas/feedback_schema.py | 0 app/schemas/file_schema.py | 0 app/schemas/map_access_schema.py | 0 app/schemas/map_projection_system_schema.py | 0 app/schemas/map_source_schema.py | 0 app/schemas/mapset_history_schema.py | 0 app/schemas/mapset_schema.py | 0 app/schemas/news_schema.py | 0 app/schemas/organization_schema.py | 0 app/schemas/regional_schema.py | 0 app/schemas/role_schema.py | 0 app/schemas/token_schema.py | 0 app/schemas/user_schema.py | 0 app/services/__init__.py | 0 app/services/auth_service.py | 0 app/services/base.py | 0 app/services/category_service.py | 0 app/services/classification_service.py | 0 app/services/count_service.py | 0 app/services/credential_service.py | 0 app/services/feedback_service.py | 0 app/services/file_service.py | 0 app/services/map_access_service.py | 0 app/services/map_projection_system_service.py | 0 app/services/map_source_service.py | 0 app/services/mapset_history_service.py | 0 app/services/mapset_service.py | 0 app/services/news_service.py | 0 app/services/organization_service.py | 0 app/services/regional_service.py | 0 app/services/role_service.py | 0 app/services/user_service.py | 0 app/utils/__init__.py | 0 app/utils/encryption.py | 0 app/utils/helpers.py | 0 app/utils/logger_config.py | 0 app/utils/system.py | 0 docker-compose.yml | 0 environment.env | 0 migrations/README | 0 migrations/env.py | 0 migrations/script.py.mako | 0 migrations/scripts.py | 0 .../versions/20241203_1200_initial_schema.py | 0 .../20241204_0000_seed_initial_data.py | 0 migrations/versions/__init__.py | 0 poetry.lock | 0 pyproject.toml | 0 run.py | 0 tests/__init__.py | 0 tests/conftest.py | 0 tests/test_api/__init__.py | 0 tests/test_services/__init__.py | 0 160 files changed, 3403 insertions(+) mode change 100644 => 100755 .DS_Store mode change 100644 => 100755 .env.example mode change 100644 => 100755 .gitignore mode change 100644 => 100755 .pre-commit-config.yaml mode change 100644 => 100755 .python-version mode change 100644 => 100755 Dockerfile mode change 100644 => 100755 README.md create mode 100755 addons.txt mode change 100644 => 100755 alembic.ini mode change 100644 => 100755 app/__init__.py mode change 100644 => 100755 app/api/dependencies/__init__.py mode change 100644 => 100755 app/api/dependencies/auth.py mode change 100644 => 100755 app/api/dependencies/database.py mode change 100644 => 100755 app/api/dependencies/factory.py mode change 100644 => 100755 app/api/v1/__init__.py mode change 100644 => 100755 app/api/v1/routes/__init__.py mode change 100644 => 100755 app/api/v1/routes/auth_route.py mode change 100644 => 100755 app/api/v1/routes/category_route.py mode change 100644 => 100755 app/api/v1/routes/classification_route.py mode change 100644 => 100755 app/api/v1/routes/count_route.py mode change 100644 => 100755 app/api/v1/routes/credential_route.py mode change 100644 => 100755 app/api/v1/routes/feedback_route.py mode change 100644 => 100755 app/api/v1/routes/file_route.py mode change 100644 => 100755 app/api/v1/routes/geonetwork_route.py mode change 100644 => 100755 app/api/v1/routes/map_projection_system_route.py mode change 100644 => 100755 app/api/v1/routes/map_source_route.py mode change 100644 => 100755 app/api/v1/routes/mapset_history_route.py mode change 100644 => 100755 app/api/v1/routes/mapset_route.py mode change 100644 => 100755 app/api/v1/routes/news_route.py mode change 100644 => 100755 app/api/v1/routes/organization_route.py mode change 100644 => 100755 app/api/v1/routes/regional_route.py mode change 100644 => 100755 app/api/v1/routes/role_route.py mode change 100644 => 100755 app/api/v1/routes/user_route.py mode change 100644 => 100755 app/core/__init__.py mode change 100644 => 100755 app/core/config.py mode change 100644 => 100755 app/core/data_types.py mode change 100644 => 100755 app/core/database.py mode change 100644 => 100755 app/core/exceptions.py mode change 100644 => 100755 app/core/minio_client.py mode change 100644 => 100755 app/core/params.py mode change 100644 => 100755 app/core/responses.py mode change 100644 => 100755 app/core/security.py mode change 100644 => 100755 app/main.py create mode 100755 app/mapset_pipeline/__init__,.py create mode 100755 app/mapset_pipeline/api/router.py create mode 100755 app/mapset_pipeline/api/schemas.py create mode 100755 app/mapset_pipeline/core/clients/ai_client.py create mode 100755 app/mapset_pipeline/core/processing/analyzer.py create mode 100755 app/mapset_pipeline/core/processing/geometry_build.py create mode 100755 app/mapset_pipeline/core/publication/publish_geonetwork.py create mode 100755 app/mapset_pipeline/core/publication/publish_geoserver.py create mode 100755 app/mapset_pipeline/core/readers/__init__.py create mode 100755 app/mapset_pipeline/core/readers/reader_csv.py create mode 100755 app/mapset_pipeline/core/readers/reader_gdb.py create mode 100755 app/mapset_pipeline/core/readers/reader_mpk.py create mode 100755 app/mapset_pipeline/core/readers/reader_pdf.py create mode 100755 app/mapset_pipeline/core/readers/reader_shp.py create mode 100755 app/mapset_pipeline/data/repository.py create mode 100755 app/mapset_pipeline/service.py create mode 100755 app/mapset_pipeline/utils/file_ops.py create mode 100755 app/mapset_pipeline/utils/formatters.py create mode 100755 app/mapset_pipeline/utils/pdf_cleaner.py mode change 100644 => 100755 app/models/__init__.py mode change 100644 => 100755 app/models/base.py mode change 100644 => 100755 app/models/category_model.py mode change 100644 => 100755 app/models/classification_model.py mode change 100644 => 100755 app/models/credential_model.py mode change 100644 => 100755 app/models/feedback_model.py mode change 100644 => 100755 app/models/file_model.py mode change 100644 => 100755 app/models/map_access_model.py mode change 100644 => 100755 app/models/map_projection_system_model.py mode change 100644 => 100755 app/models/map_source_model.py mode change 100644 => 100755 app/models/mapset_history_model.py mode change 100644 => 100755 app/models/mapset_model.py mode change 100644 => 100755 app/models/news_model.py mode change 100644 => 100755 app/models/organization_model.py mode change 100644 => 100755 app/models/refresh_token_model.py mode change 100644 => 100755 app/models/regional_model.py mode change 100644 => 100755 app/models/role_model.py mode change 100644 => 100755 app/models/user_model.py mode change 100644 => 100755 app/repositories/__init__.py mode change 100644 => 100755 app/repositories/base.py mode change 100644 => 100755 app/repositories/category_repository.py mode change 100644 => 100755 app/repositories/classification_repository.py mode change 100644 => 100755 app/repositories/credential_repository.py mode change 100644 => 100755 app/repositories/feedback_repository.py mode change 100644 => 100755 app/repositories/file_repository.py mode change 100644 => 100755 app/repositories/map_access_repository.py mode change 100644 => 100755 app/repositories/map_projection_system_repository.py mode change 100644 => 100755 app/repositories/map_source_repository.py mode change 100644 => 100755 app/repositories/map_source_usage_repository.py mode change 100644 => 100755 app/repositories/mapset_history_repository.py mode change 100644 => 100755 app/repositories/mapset_repository.py mode change 100644 => 100755 app/repositories/news_repository.py mode change 100644 => 100755 app/repositories/organization_repository.py mode change 100644 => 100755 app/repositories/regional_repository.py mode change 100644 => 100755 app/repositories/role_repository.py mode change 100644 => 100755 app/repositories/token_repository.py mode change 100644 => 100755 app/repositories/user_repository.py mode change 100644 => 100755 app/response/res.py mode change 100644 => 100755 app/schemas/__init__.py mode change 100644 => 100755 app/schemas/base.py mode change 100644 => 100755 app/schemas/category_schema.py mode change 100644 => 100755 app/schemas/classification_schema.py mode change 100644 => 100755 app/schemas/count_schema.py mode change 100644 => 100755 app/schemas/credential_schema.py mode change 100644 => 100755 app/schemas/error_schema.py mode change 100644 => 100755 app/schemas/feedback_schema.py mode change 100644 => 100755 app/schemas/file_schema.py mode change 100644 => 100755 app/schemas/map_access_schema.py mode change 100644 => 100755 app/schemas/map_projection_system_schema.py mode change 100644 => 100755 app/schemas/map_source_schema.py mode change 100644 => 100755 app/schemas/mapset_history_schema.py mode change 100644 => 100755 app/schemas/mapset_schema.py mode change 100644 => 100755 app/schemas/news_schema.py mode change 100644 => 100755 app/schemas/organization_schema.py mode change 100644 => 100755 app/schemas/regional_schema.py mode change 100644 => 100755 app/schemas/role_schema.py mode change 100644 => 100755 app/schemas/token_schema.py mode change 100644 => 100755 app/schemas/user_schema.py mode change 100644 => 100755 app/services/__init__.py mode change 100644 => 100755 app/services/auth_service.py mode change 100644 => 100755 app/services/base.py mode change 100644 => 100755 app/services/category_service.py mode change 100644 => 100755 app/services/classification_service.py mode change 100644 => 100755 app/services/count_service.py mode change 100644 => 100755 app/services/credential_service.py mode change 100644 => 100755 app/services/feedback_service.py mode change 100644 => 100755 app/services/file_service.py mode change 100644 => 100755 app/services/map_access_service.py mode change 100644 => 100755 app/services/map_projection_system_service.py mode change 100644 => 100755 app/services/map_source_service.py mode change 100644 => 100755 app/services/mapset_history_service.py mode change 100644 => 100755 app/services/mapset_service.py mode change 100644 => 100755 app/services/news_service.py mode change 100644 => 100755 app/services/organization_service.py mode change 100644 => 100755 app/services/regional_service.py mode change 100644 => 100755 app/services/role_service.py mode change 100644 => 100755 app/services/user_service.py mode change 100644 => 100755 app/utils/__init__.py mode change 100644 => 100755 app/utils/encryption.py mode change 100644 => 100755 app/utils/helpers.py mode change 100644 => 100755 app/utils/logger_config.py mode change 100644 => 100755 app/utils/system.py mode change 100644 => 100755 docker-compose.yml mode change 100644 => 100755 environment.env mode change 100644 => 100755 migrations/README mode change 100644 => 100755 migrations/env.py mode change 100644 => 100755 migrations/script.py.mako mode change 100644 => 100755 migrations/scripts.py mode change 100644 => 100755 migrations/versions/20241203_1200_initial_schema.py mode change 100644 => 100755 migrations/versions/20241204_0000_seed_initial_data.py mode change 100644 => 100755 migrations/versions/__init__.py mode change 100644 => 100755 poetry.lock mode change 100644 => 100755 pyproject.toml mode change 100644 => 100755 run.py mode change 100644 => 100755 tests/__init__.py mode change 100644 => 100755 tests/conftest.py mode change 100644 => 100755 tests/test_api/__init__.py mode change 100644 => 100755 tests/test_services/__init__.py diff --git a/.DS_Store b/.DS_Store old mode 100644 new mode 100755 diff --git a/.env.example b/.env.example old mode 100644 new mode 100755 diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index 0a19790..161b876 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,6 @@ cython_debug/ # PyPI configuration file .pypirc + + +tests/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml old mode 100644 new mode 100755 diff --git a/.python-version b/.python-version old mode 100644 new mode 100755 diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/addons.txt b/addons.txt new file mode 100755 index 0000000..74121b4 --- /dev/null +++ b/addons.txt @@ -0,0 +1,11 @@ +pandas = "^3.0.0" +geopandas = "^1.1.2" +fiona = "^1.10.1" +numpy = "^2.4.2" +pdfplumber = "^0.11.9" +py7zr = "^1.1.0" +pyogrio = "^0.12.1" +rapidfuzz = "^3.14.3" +requests = "^2.32.5" +openpyxl = "^3.1.5" +pyarrow = "21.0.0" \ No newline at end of file diff --git a/alembic.ini b/alembic.ini old mode 100644 new mode 100755 diff --git a/app/__init__.py b/app/__init__.py old mode 100644 new mode 100755 diff --git a/app/api/dependencies/__init__.py b/app/api/dependencies/__init__.py old mode 100644 new mode 100755 diff --git a/app/api/dependencies/auth.py b/app/api/dependencies/auth.py old mode 100644 new mode 100755 diff --git a/app/api/dependencies/database.py b/app/api/dependencies/database.py old mode 100644 new mode 100755 diff --git a/app/api/dependencies/factory.py b/app/api/dependencies/factory.py old mode 100644 new mode 100755 diff --git a/app/api/v1/__init__.py b/app/api/v1/__init__.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/__init__.py b/app/api/v1/routes/__init__.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/auth_route.py b/app/api/v1/routes/auth_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/category_route.py b/app/api/v1/routes/category_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/classification_route.py b/app/api/v1/routes/classification_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/count_route.py b/app/api/v1/routes/count_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/credential_route.py b/app/api/v1/routes/credential_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/feedback_route.py b/app/api/v1/routes/feedback_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/file_route.py b/app/api/v1/routes/file_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/geonetwork_route.py b/app/api/v1/routes/geonetwork_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/map_projection_system_route.py b/app/api/v1/routes/map_projection_system_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/map_source_route.py b/app/api/v1/routes/map_source_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/mapset_history_route.py b/app/api/v1/routes/mapset_history_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/mapset_route.py b/app/api/v1/routes/mapset_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/news_route.py b/app/api/v1/routes/news_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/organization_route.py b/app/api/v1/routes/organization_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/regional_route.py b/app/api/v1/routes/regional_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/role_route.py b/app/api/v1/routes/role_route.py old mode 100644 new mode 100755 diff --git a/app/api/v1/routes/user_route.py b/app/api/v1/routes/user_route.py old mode 100644 new mode 100755 diff --git a/app/core/__init__.py b/app/core/__init__.py old mode 100644 new mode 100755 diff --git a/app/core/config.py b/app/core/config.py old mode 100644 new mode 100755 diff --git a/app/core/data_types.py b/app/core/data_types.py old mode 100644 new mode 100755 diff --git a/app/core/database.py b/app/core/database.py old mode 100644 new mode 100755 diff --git a/app/core/exceptions.py b/app/core/exceptions.py old mode 100644 new mode 100755 diff --git a/app/core/minio_client.py b/app/core/minio_client.py old mode 100644 new mode 100755 diff --git a/app/core/params.py b/app/core/params.py old mode 100644 new mode 100755 diff --git a/app/core/responses.py b/app/core/responses.py old mode 100644 new mode 100755 diff --git a/app/core/security.py b/app/core/security.py old mode 100644 new mode 100755 diff --git a/app/main.py b/app/main.py old mode 100644 new mode 100755 diff --git a/app/mapset_pipeline/__init__,.py b/app/mapset_pipeline/__init__,.py new file mode 100755 index 0000000..e69de29 diff --git a/app/mapset_pipeline/api/router.py b/app/mapset_pipeline/api/router.py new file mode 100755 index 0000000..cf644ce --- /dev/null +++ b/app/mapset_pipeline/api/router.py @@ -0,0 +1,41 @@ +# services/file_pipeline/router.py +from fastapi import APIRouter, Depends, File, UploadFile, Form +from .schemas import UploadRequest, PdfRequest +from app.mapset_pipeline.service import handle_file_analysis, process_pdf_file, execute_postgis_ingestion +from app.response.res import successRes, errorRes + +router = APIRouter(prefix="/pipeline", tags=["File Pipeline"]) + +@router.post("/analyze") +async def upload_file( + file: UploadFile = File(...), + page: str = Form(""), + sheet: str = Form(""), + fileDesc: str = Form("") +): + try: + data = await handle_file_analysis(file, page, sheet, fileDesc) + return successRes(data=data) + except Exception as e: + return errorRes(message="Upload failed", details=str(e), status_code=500) + + +@router.post("/analyze/pdf") +async def upload_file( + payload: PdfRequest +): + try: + res = await process_pdf_file(payload) + return res + except Exception as e: + return errorRes(message="Upload failed", details=str(e), status_code=500) + + +@router.post("/publish") +async def process_to_postgis(payload: UploadRequest): + # user_id bisa diambil dari dependency injection auth + try: + data = await execute_postgis_ingestion(payload, user_id=2) + return successRes(data=data) + except Exception as e: + return errorRes(message="Processing failed", details=str(e), status_code=500) \ No newline at end of file diff --git a/app/mapset_pipeline/api/schemas.py b/app/mapset_pipeline/api/schemas.py new file mode 100755 index 0000000..7b1d9fe --- /dev/null +++ b/app/mapset_pipeline/api/schemas.py @@ -0,0 +1,17 @@ +from pydantic import BaseModel +from typing import List, Dict, Any + +class PdfRequest(BaseModel): + title: str + columns: List[str] + rows: List[List] + fileName: str + fileDesc: str + +class UploadRequest(BaseModel): + title: str + path: str + rows: List[dict] + columns: List[str] + author: Dict[str, Any] + style: str \ No newline at end of file diff --git a/app/mapset_pipeline/core/clients/ai_client.py b/app/mapset_pipeline/core/clients/ai_client.py new file mode 100755 index 0000000..df3b65d --- /dev/null +++ b/app/mapset_pipeline/core/clients/ai_client.py @@ -0,0 +1,49 @@ +import requests +from typing import Dict, Any +from app.core.config import GEN_AI_URL + +URL = GEN_AI_URL + + +def generate_metadata(payload: Dict[str, Any]) -> Dict[str, Any]: + headers = { + "Content-Type": "application/json", + "API_KEY": "testsatupeta" + } + + try: + response = requests.post( + f"{URL}", + json=payload, + headers=headers, + ) + + # response.raise_for_status() + return response.json() + + except requests.exceptions.RequestException as e: + return { + "success": False, + "error": str(e) + } + + +if __name__ == "__main__": + # Contoh payload + payload = { + "nama_file_peta": "peta bencana.pdf", + "nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD)", + "tipe_data_spasial": "Multipolygon", + "struktur_atribut_data": {}, + "metadata": { + "judul": "", + "abstrak": "", + "tujuan": "", + "keyword": [], + "kategori": [], + "kategori_mapset": "" + } + } + + result = generate_metadata(payload) + print(result) diff --git a/app/mapset_pipeline/core/processing/analyzer.py b/app/mapset_pipeline/core/processing/analyzer.py new file mode 100755 index 0000000..2ef09ea --- /dev/null +++ b/app/mapset_pipeline/core/processing/analyzer.py @@ -0,0 +1,181 @@ +import os +import asyncio +import pandas as pd +import geopandas as gpd +from app.response.res import errorRes + +from app.mapset_pipeline.utils.file_ops import generate_unique_filename, dataframe_validation +from app.mapset_pipeline.utils.formatters import safe_json +from .geometry_build import is_geom_empty, detect_and_build_geometry, attach_polygon_geometry_auto +from app.mapset_pipeline.core.clients.ai_client import generate_metadata +from app.mapset_pipeline.core.publication.publish_geoserver import publish_layer_to_geoserver +from app.mapset_pipeline.core.publication.publish_geonetwork import publish_metadata + +async def analyze_and_clean_dataframe(df: pd.DataFrame, ext: str, filename: str, fileDesc: str): + """ + Fungsi utama untuk memproses DataFrame: + 1. Deteksi Geometri + 2. Validasi & Hitung Statistik + 3. Generate Preview & Warnings + 4. Generate Metadata (AI) + 5. Simpan ke Temporary Parquet + """ + + # 1. Deteksi Geometri + result = detect_and_build_geometry(df, master_polygons=None) + + if not hasattr(result, "geometry") or result.geometry.isna().all(): + result = attach_polygon_geometry_auto(result) + + def normalize_geom_type(geom_type): + if geom_type and geom_type.startswith("Multi"): + return geom_type.replace("Multi", "") + return geom_type + + # 2. Analisis Tipe Geometri + if isinstance(result, gpd.GeoDataFrame) and "geometry" in result.columns: + geom_types = ( + result.geometry + .dropna() + .geom_type + .apply(normalize_geom_type) + .unique() + ) + geom_type = geom_types[0] if len(geom_types) > 0 else "None" + null_geom = result.geometry.isna().sum() + + print(f"[INFO] Tipe Geometry: {geom_type}") + print(f"[INFO] Jumlah geometry kosong: {null_geom}") + else: + # Fallback jika gagal mendeteksi geometry + res = { + "message": "Tidak menemukan tabel yang relevan atau kolom geometri.", + "file_type": ext, + "rows": len(df), + "columns": len(df.columns), + "geometry_valid": 0, + "geometry_empty": 0, + "geometry_valid_percent": 0, + "warnings": [], + "warning_examples": [], + "preview": [] + } + # Kita raise error dictionary agar bisa ditangkap oleh router/service + # Atau return dictionary error structure + return errorRes(message="Tidak berhasil mencocokan geometry pada tabel.", details=res, status_code=422) + + # 3. Cleaning Data Values + result = result.replace([pd.NA, float('inf'), float('-inf')], None) + + # Convert Geometry ke WKT untuk analisis teks + if isinstance(result, gpd.GeoDataFrame) and 'geometry' in result.columns: + # Kita perlu simpan WKT string agar serializable saat preview + # Tapi biarkan geometry asli untuk proses parquet nanti + pass + + # Hitung Statistik Validitas + empty_count = result['geometry'].apply(is_geom_empty).sum() + valid_count = len(result) - empty_count + match_percentage = (valid_count / len(result)) * 100 + + warnings = [] + if empty_count > 0: + warnings.append( + f"{empty_count} dari {len(result)} baris tidak memiliki geometry yang valid " + f"({100 - match_percentage:.2f}% data gagal cocok)." + ) + + # Ambil contoh data error + if empty_count > 0: + examples = result[result['geometry'].apply(is_geom_empty)].head(500) + warning_examples = examples.to_dict(orient="records") + else: + warning_examples = [] + + # Prepare Preview Data (Convert WKT for JSON response) + # Kita copy agar tidak merusak dataframe utama + data_df = result.copy() + if 'geometry' in data_df.columns: + data_df['geometry'] = data_df['geometry'].apply( + lambda g: g.wkt if g is not None else None + ) + + preview_data = data_df.to_dict(orient="records") + + # Sanitasi JSON (numpy types -> python types) + preview_safe = [ + {k: safe_json(v) for k, v in row.items()} for row in preview_data + ] + + warning_safe = [ + {k: safe_json(v) for k, v in row.items()} for row in warning_examples + ] + + # 4. AI Metadata Generation + ai_context = { + "nama_file_peta": filename, + "nama_opd": "Badan Penanggulangan Bencana Daerah (BPBD) Provinsi Jatim", # Sebaiknya dinamis + "tipe_data_spasial": geom_type, + "deskripsi_singkat": fileDesc, + "struktur_atribut_data": {}, + } + + try: + ai_suggest = generate_metadata(ai_context) + except Exception as e: + print(f"[WARNING] Gagal generate metadata AI: {e}") + ai_suggest = {} + + # 5. Simpan ke Temporary Parquet + # Gunakan filename unik agar thread safe + tmp_file = generate_unique_filename(folder="tmp", ext="parquet") + + # Proses konversi synchronous dijalankan di thread terpisah agar tidak blocking + print('start') + await asyncio.to_thread(dataframe_validation, data_df, tmp_file) + print('pass') + + response = { + "message": "File berhasil dibaca dan dianalisis.", + "file_name": filename, + "file_type": ext, + "rows": int(len(result)), + "columns": list(map(str, result.columns)), + "geometry_valid": int(valid_count), + "geometry_empty": int(empty_count), + "geometry_valid_percent": float(round(match_percentage, 2)), + "geometry_type": geom_type, + "warnings": warnings, + "warning_rows": warning_safe, + "preview": preview_safe, + "metadata_suggest": ai_suggest, + "tmp_path": tmp_file + } + + return response + + +async def publish_mapset(table_name: str, job_id: str): + try: + + geos_link = publish_layer_to_geoserver(table_name, job_id) + + uuid = await publish_metadata( + table_name=table_name, + geoserver_links=geos_link + ) + + # await update_job_status(table_name, "FINISHED", job_id) + + # return uuid + return { + "geos_link": geos_link["layer_url"], + # "uuid": uuid + "uuid": "123123" + } + + except Exception as e: + # await update_job_status(table_name, "FAILED", job_id) + raise RuntimeError(f"Publish layer gagal: {e}") from e + + diff --git a/app/mapset_pipeline/core/processing/geometry_build.py b/app/mapset_pipeline/core/processing/geometry_build.py new file mode 100755 index 0000000..91fbe9c --- /dev/null +++ b/app/mapset_pipeline/core/processing/geometry_build.py @@ -0,0 +1,466 @@ +import geopandas as gpd +from shapely.geometry import Point, LineString +import pandas as pd +import numpy as np +import re +import os +from shapely import wkt +from rapidfuzz import process, fuzz +from sqlalchemy import create_engine +from shapely.geometry.base import BaseGeometry +from app.core.config import REFERENCE_DB_URL, REFERENCE_SCHEMA, DESA_REF, KEC_REF, KAB_REF + +# ============================================================ +# KONFIGURASI DAN KONSTANTA +# ============================================================ + +COLUMN_ALIASES = { + 'desa': ['desa', 'kelurahan', 'desa_kelurahan', 'desa/kelurahan', 'nama_desa', 'nama_kelurahan', 'Desa/Kel'], + 'kecamatan': ['kec', 'kecamatan', 'nama_kec', 'nama_kecamatan'], + 'kabupaten': ['kab', 'kabupaten', 'kota', 'kabupaten_kota', 'kota_kabupaten', 'kab/kota', 'kota/kabupaten', 'kota/kab'] +} + +# ============================================================ +# FUNGSI BANTU ADMINISTRATIF +# ============================================================ + +def find_admin_column(df, aliases): + """Mencari kolom yang paling cocok untuk tiap level admin (desa/kec/kab)""" + matched = {} + for level, alias_list in aliases.items(): + for col in df.columns: + col_norm = col.strip().lower().replace(' ', '_').replace('/', '_') + if any(alias in col_norm for alias in alias_list): + matched[level] = col + break + return matched + + +def detect_smallest_admin_level(df): + """Mendeteksi level administratif terkecil yang ada di DataFrame""" + cols = [c.lower() for c in df.columns] + if any('desa' in c or 'kelurahan' in c for c in cols): + return 'desa' + elif any('kecamatan' in c for c in cols): + return 'kecamatan' + elif any('kab' in c or 'kota' in c for c in cols): + return 'kabupaten' + return None + + +def fuzzy_merge(df, master, left_key, right_key, threshold=85): + """Melakukan fuzzy matching antar nama wilayah""" + matches = df[left_key].apply( + lambda x: process.extractOne(str(x), master[right_key], score_cutoff=threshold) + ) + df['match'] = matches.apply(lambda m: m[0] if m else None) + merged = df.merge(master, left_on='match', right_on=right_key, how='left') + return merged + + + + + +def normalize_name(name: str, level: str = None): + if not isinstance(name, str): + return None + + name = name.strip() + if not name: + return None + + name = re.sub(r'\s*\([^)]*\)\s*', '', name) + + raw = name.lower() + raw = re.sub(r'^(desa|kelurahan|kel|dusun|kampung)\s+', '', raw) + raw = re.sub(r'^(kecamatan|kec)\s+', '', raw) + raw = re.sub(r'^(kabupaten|kab\.?|kab)\s+', '', raw) + + if level in ["kabupaten", "kota"]: + raw = re.sub(r'^(kota\s+)', '', raw) + + raw = re.sub(r'[^a-z\s]', '', raw) + raw = re.sub(r'\s+', ' ', raw).strip() + + tokens = raw.split() + + merged_tokens = [] + i = 0 + while i < len(tokens): + if i < len(tokens) - 1: + sim = fuzz.ratio(tokens[i], tokens[i + 1]) + if sim > 75: + merged_tokens.append(tokens[i] + tokens[i + 1]) + i += 2 + continue + merged_tokens.append(tokens[i]) + i += 1 + + cleaned_tokens = [] + prev = None + for tok in merged_tokens: + if prev and fuzz.ratio(prev, tok) > 95: + continue + cleaned_tokens.append(tok) + prev = tok + + raw = " ".join(cleaned_tokens) + formatted = raw.title() + + if level in ["kabupaten", "kota"]: + if "kota" in name.lower(): + if not formatted.startswith("Kota "): + formatted = f"Kota {formatted}" + else: + formatted = formatted.replace("Kota ", "") + + return formatted + + + + +def is_geom_empty(g): + """True jika geometry None, NaN, atau geometry Shapely kosong.""" + if g is None: + return True + if isinstance(g, float) and pd.isna(g): + return True + if isinstance(g, BaseGeometry): + return g.is_empty + return False + + + + + +import math + +def normalize_lon(val, is_lat=False): + if pd.isna(val): + return None + try: + v = float(val) + except: + return None + + av = abs(v) + if av == 0: + return v + + if (-180 <= v <= 180 and not is_lat) or (-90 <= v <= 90 and is_lat): + return v + + for factor in [1, 10, 100, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9]: + nv = v / factor + if (not is_lat and -180 <= nv <= 180) or (is_lat and -90 <= nv <= 90): + return nv + + return None + + + +def normalize_lat(val): + if pd.isna(val): + return None + v = float(val) + av = abs(v) + if av > 1e9: # contoh: -8167413802 (10 digit) + return v / 1e9 + elif av > 1e8: # fallback jika ada variasi + return v / 1e8 + else: + return v + + +# ============================================================ +# FUNGSI UTAMA GEOMETRY DETECTION (LAT/LON / PATH) +# ============================================================ +def detect_and_build_geometry(df: pd.DataFrame, master_polygons: gpd.GeoDataFrame = None): + """ + Mendeteksi dan membentuk geometry dari DataFrame. + Bisa dari lat/lon, WKT, atau join ke master polygon (jika disediakan). + """ + + if isinstance(df, gpd.GeoDataFrame): + geom_cols = [ + c for c in df.columns + if re.match(r'^(geometry|geom|the_geom|wkb_geometry)$', c, re.IGNORECASE) + or c.lower().startswith("geom") + or c.lower().endswith("geometry") + ] + # if "geometry" in df.columns and df.geometry.notna().any(): + if geom_cols: + geom_count = df.geometry.notna().sum() + geom_type = list(df.geom_type.unique()) + print(f"[INFO] Detected existing geometry in GeoDataFrame ({geom_count} features, {geom_type}).") + return df + + lat_col = next((c for c in df.columns if re.search(r'\b(lat|latitude|y[_\s]*coord|y$)\b', c.lower())), None) + lon_col = next((c for c in df.columns if re.search(r'\b(lon|long|longitude|x[_\s]*coord|x$)\b', c.lower())), None) + + if lat_col and lon_col: + df[lat_col] = pd.to_numeric(df[lat_col], errors='coerce') + df[lon_col] = pd.to_numeric(df[lon_col], errors='coerce') + + df[lon_col] = df[lon_col].apply(lambda x: normalize_lon(x, is_lat=False)) + df[lat_col] = df[lat_col].apply(normalize_lat) + + gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[lon_col], df[lat_col]), crs="EPSG:4326") + print("[INFO] Geometry dibangun dari kolom lat/lon.") + return gdf + + coord_col = next( + (c for c in df.columns if re.search(r'(geom|geometry|wkt|shp|shape|path|coord)', c.lower())), None + ) + + if coord_col and df[coord_col].notnull().any(): + sample_val = str(df[coord_col].dropna().iloc[0]).strip() + + if sample_val.startswith('['): + def parse_geom(val): + try: + pts = eval(val) + return LineString(pts) + except Exception: + return None + df['geometry'] = df[coord_col].apply(parse_geom) + gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326") + print("[INFO] Geometry dibangun dari kolom koordinat/path (list of points).") + return gdf + + elif any(x in sample_val.upper() for x in ["POINT", "LINESTRING", "POLYGON"]): + try: + df['geometry'] = df[coord_col].apply( + lambda g: wkt.loads(g) if isinstance(g, str) and any( + x in g.upper() for x in ["POINT", "LINESTRING", "POLYGON"] + ) else None + ) + gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326") + print("[INFO] Geometry dibangun dari kolom WKT (Point/Line/Polygon/MultiPolygon).") + return gdf + except Exception as e: + print(f"[WARN] Gagal parsing kolom geometry sebagai WKT: {e}") + + + + if master_polygons is not None: + df.columns = df.columns.str.lower().str.strip().str.replace(' ', '_').str.replace('/', '_') + matches = find_admin_column(df, COLUMN_ALIASES) + + if 'desa' in matches: + admin_col = matches['desa'] + merged = df.merge(master_polygons, left_on=admin_col, right_on='nama_desa', how='left') + if merged['geometry'].isna().sum() > 0: + merged = fuzzy_merge(df, master_polygons, admin_col, 'nama_desa') + gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=master_polygons.crs) + return gdf + + elif 'kecamatan' in matches: + admin_col = matches['kecamatan'] + merged = df.merge(master_polygons, left_on=admin_col, right_on='nama_kecamatan', how='left') + gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=master_polygons.crs) + return gdf + + elif 'kabupaten' in matches: + admin_col = matches['kabupaten'] + merged = df.merge(master_polygons, left_on=admin_col, right_on='nama_kabupaten', how='left') + gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=master_polygons.crs) + return gdf + + print("[WARN] Tidak ditemukan geometry (lat/lon, path, atau master).") + return df + + +# def get_reference_polygons(level): +# """Mengambil data batas wilayah (MultiPolygon) dari DB referensi""" +# table_map = { +# 'desa': f"{REFERENCE_SCHEMA}.administrasi_ar_keldesa_jatim", +# 'kecamatan': f"{REFERENCE_SCHEMA}.administrasi_ar_kec_jatim", +# 'kabupaten': f"{REFERENCE_SCHEMA}.administrasi_ar_kabkot_jatim" +# } + +# table_name = table_map.get(level) +# if not table_name: +# raise ValueError(f"Tidak ada tabel referensi untuk level '{level}'.") + +# engine = create_engine(REFERENCE_DB_URL) +# query = f"SELECT *, ST_Multi(geom) AS geometry FROM {table_name}" +# gdf = gpd.read_postgis(query, engine, geom_col='geometry') + +# print(f"[INFO] {len(gdf)} data referensi '{level}' berhasil dimuat dari {table_name}.") +# return gdf + + +from functools import lru_cache + +@lru_cache(maxsize=3) +def get_reference_polygons(level): + local_path = f"cache/{level}_ref.parquet" + if os.path.exists(local_path): + print(f"[CACHE] Memuat referensi '{level}' dari file lokal.") + return gpd.read_parquet(local_path) + + print(f"[DB] Mengambil data referensi '{level}' dari database...") + table_map = { + "desa": f"{REFERENCE_SCHEMA}.administrasi_ar_keldesa_jatim", + "kecamatan": f"{REFERENCE_SCHEMA}.administrasi_ar_kec_jatim", + "kabupaten": f"{REFERENCE_SCHEMA}.administrasi_ar_kabkot_jatim" + } + table_name = table_map.get(level) + engine = create_engine(REFERENCE_DB_URL) + query = f"SELECT *, ST_Multi(geom) AS geometry FROM {table_name}" + gdf = gpd.read_postgis(query, engine, geom_col="geometry") + gdf.to_parquet(local_path) + print(f"[CACHE] Disimpan ke {local_path}") + return gdf + + + + + + +# ============================================================ +# Optimize Join +# ============================================================ +def build_join_key(df, cols): + arr = df[cols].astype(str).replace("nan", "", regex=False).to_numpy() + return np.char.add.reduce(np.column_stack( + [arr[:, i] + ("|" if i < len(cols) - 1 else "") for i in range(len(cols))] + ), axis=1) + + +# ============================================================ +# FUNGSI: AUTO ATTACH POLYGON KE DATAFRAME NON-SPASIAL +# ============================================================ +def attach_polygon_geometry_auto(df: pd.DataFrame): + """ + Tambahkan kolom geometry MultiPolygon berdasarkan kombinasi + (desa/kelurahan + kecamatan + kabupaten/kota), tanpa duplikasi baris. + """ + level = detect_smallest_admin_level(df) + if not level: + print("[WARN] Tidak ditemukan kolom administratif (desa/kecamatan/kabupaten).") + return df + + print(f"[INFO] Detected smallest admin level: {level}") + ref_gdf = get_reference_polygons(level) + + desa_col = next((c for c in df.columns if any(x in c.lower() for x in ['desa', 'kelurahan'])), None) + kec_col = next((c for c in df.columns if 'kec' in c.lower()), None) + kab_col = next((c for c in df.columns if any(x in c.lower() for x in ['kab', 'kota'])), None) + + if desa_col and (not kec_col or not kab_col): + print("[ERROR] Kolom 'Desa' ditemukan tetapi kolom 'Kecamatan' dan/atau 'Kabupaten' tidak lengkap.") + print(f"[DEBUG] Ditemukan: Desa={desa_col}, Kec={kec_col}, Kab={kab_col}") + return df + + elif not desa_col and kec_col and not kab_col: + print("[ERROR] Kolom 'Kecamatan' ditemukan tetapi kolom 'Kabupaten/Kota' tidak ditemukan.") + print(f"[DEBUG] Ditemukan: Desa={desa_col}, Kec={kec_col}, Kab={kab_col}") + return df + + elif kab_col and not desa_col and not kec_col : + print("[INFO] Struktur kolom administratif valid (minimal Kabupaten/Kota ditemukan).") + print(f"[DEBUG] Ditemukan: Desa={desa_col}, Kec={kec_col}, Kab={kab_col}") + + elif not desa_col and not kec_col and not kab_col: + print("[WARN] Tidak ditemukan kolom administratif apapun (Desa/Kecamatan/Kabupaten).") + print(f"[DEBUG] Kolom CSV: {list(df.columns)}") + return df + + # kolom di referensi + desa_ref = DESA_REF + kec_ref = KEC_REF + kab_ref = KAB_REF + + if desa_col is not None: + df[desa_col] = df[desa_col].astype(str).apply(lambda x: normalize_name(x, "desa")) + + if kec_col is not None: + df[kec_col] = df[kec_col].astype(str).apply(lambda x: normalize_name(x, "kecamatan")) + + if kab_col is not None: + df[kab_col] = df[kab_col].astype(str).apply(lambda x: normalize_name(x, "kabupaten")) + + + if desa_ref is not None: + ref_gdf[desa_ref] = ref_gdf[desa_ref].astype(str).apply(lambda x: normalize_name(x, "desa")) + + if kec_ref is not None: + ref_gdf[kec_ref] = ref_gdf[kec_ref].astype(str).apply(lambda x: normalize_name(x, "kecamatan")) + + if kab_ref is not None: + ref_gdf[kab_ref] = ref_gdf[kab_ref].astype(str).apply(lambda x: normalize_name(x, "kabupaten")) + + + + + join_cols = [col for col in [desa_col, kec_col, kab_col] if col] + + if not join_cols: + print("[ERROR] Tidak ada kolom administratif yang bisa digunakan untuk join key.") + else: + join_cols_df = [col for col in [desa_col, kec_col, kab_col] if col] + join_cols_ref = [col for col in [desa_ref, kec_ref, kab_ref] if col] + + common_depth = min(len(join_cols_df), len(join_cols_ref)) + join_cols_df = join_cols_df[-common_depth:] + join_cols_ref = join_cols_ref[-common_depth:] + + # print(f"[DEBUG] Join kolom DF : {join_cols_df}") + # print(f"[DEBUG] Join kolom REF : {join_cols_ref}") + + # df["_join_key"] = df[join_cols_df].astype(str).agg("|".join, axis=1) + # ref_gdf["_join_key"] = ref_gdf[join_cols_ref].astype(str).agg("|".join, axis=1) + + df["_join_key"] = build_join_key(df, join_cols_df) + ref_gdf["_join_key"] = build_join_key(ref_gdf, join_cols_ref) + + + # print(f"[INFO] Join key berhasil dibuat dari kolom: {join_cols_df}") + + ref_lookup = ref_gdf[["_join_key", "geometry"]].drop_duplicates(subset=["_join_key"]) + df = df.merge(ref_lookup, how="left", on="_join_key") + matched = df["geometry"].notna().sum() + # print(f"[INFO] {matched} dari {len(df)} baris cocok langsung berdasarkan (desa + kec + kab/kota).") + + if matched < len(df): + unmatched = df[df["geometry"].isna()] + # print(f"[INFO] Melakukan fuzzy match untuk {len(unmatched)} baris yang belum cocok...") + + ref_dict = dict(zip(ref_lookup["_join_key"], ref_lookup["geometry"])) + + def find_fuzzy_geom(row): + key = row["_join_key"] + if not isinstance(key, str): + return None + # fuzzy old + # match = process.extractOne(key, list(ref_dict.keys()), scorer=fuzz.token_sort_ratio) + # fuzzy new + match = process.extractOne( + key, list(ref_dict.keys()), scorer=fuzz.token_set_ratio, score_cutoff=80 + ) + + if match and match[1] >= 85: + return ref_dict[match[0]] + return None + + df.loc[df["geometry"].isna(), "geometry"] = df[df["geometry"].isna()].apply(find_fuzzy_geom, axis=1) + + df = df.drop(columns=["_join_key"], errors="ignore") + + # admin_cols = [col for col in [desa_col, kec_col, kab_col] if col and col in df.columns] + # if matched < len(df): + # diff = df[df['geometry'].isna()][admin_cols] + + # print("[DEBUG] Baris yang tidak match:") + # if diff.empty: + # print("(semua baris berhasil match)") + # else: + # print(diff.to_string(index=False)) + + + # print(f"[REPORT] Total match: {df['geometry'].notna().sum()} / {len(df)} ({df['geometry'].notna().mean()*100:.2f}%)") + + + return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326") diff --git a/app/mapset_pipeline/core/publication/publish_geonetwork.py b/app/mapset_pipeline/core/publication/publish_geonetwork.py new file mode 100755 index 0000000..109a955 --- /dev/null +++ b/app/mapset_pipeline/core/publication/publish_geonetwork.py @@ -0,0 +1,693 @@ +from fastapi import HTTPException +import requests +from sqlalchemy import text +from app.core.config import GEONETWORK_PASS, GEONETWORK_URL, GEONETWORK_USER +from database.connection import engine +from datetime import datetime +from uuid import uuid4 +import re + + + +def create_gn_session(): + session = requests.Session() + session.auth = (GEONETWORK_USER, GEONETWORK_PASS) + + session.get(f"{GEONETWORK_URL}/srv/eng/info?type=me") + xsrf_token = session.cookies.get("XSRF-TOKEN") + + if not xsrf_token: + raise Exception("XSRF token missing") + + return session, xsrf_token + + + +def escape_url_params(url: str) -> str: + """ + Escape karakter berbahaya di dalam URL agar valid dalam XML. + Khususnya mengganti '&' menjadi '&' kecuali jika sudah '&'. + """ + # Ganti semua & yang bukan bagian dari & + url = re.sub(r'&(?!amp;)', '&', url) + return url + + +def fix_xml_urls(xml: str) -> str: + """ + Temukan semua ... dalam XML dan escape URL-nya. + """ + def replacer(match): + original = match.group(1).strip() + fixed = escape_url_params(original) + return f"{fixed}" + + # Replace semua ... + xml_fixed = re.sub( + r"(.*?)", + replacer, + xml, + flags=re.DOTALL + ) + + return xml_fixed + + + +async def get_extent(table_name: str): + + sql = f""" + SELECT + ST_XMin(extent), ST_YMin(extent), + ST_XMax(extent), ST_YMax(extent) + FROM ( + SELECT ST_Extent(geom) AS extent + FROM public."{table_name}" + ) AS box; + """ + + async with engine.connect() as conn: + result = await conn.execute(sql) + row = result.fetchone() + + if not row or row[0] is None: + return None + + # return { + # "xmin": row[0], + # "ymin": row[1], + # "xmax": row[2], + # "ymax": row[3] + # } + + return { + "xmin": 110.1372, # west + "ymin": -9.3029, # south + "xmax": 114.5287, # east + "ymax": -5.4819 # north + } + +async def get_author_metadata(table_name: str): + + sql = """ + SELECT am.table_title, am.dataset_title, am.dataset_abstract, am.keywords, am.date_created, + am.organization_name, am.contact_person_name, am.created_at, + am.contact_email, am.contact_phone, am.geom_type, + u.organization_id, + o.address AS organization_address, + o.email AS organization_email, + o.phone_number AS organization_phone + FROM backend.author_metadata AS am + LEFT JOIN backend.users u ON am.user_id = u.id + LEFT JOIN backend.organizations o ON u.organization_id = o.id + WHERE am.table_title = :table + LIMIT 1 + """ + + async with engine.connect() as conn: + result = await conn.execute(sql, {"table": table_name}) + row = result.fetchone() + + if not row: + raise Exception(f"Tidak ada metadata untuk tabel: {table_name}") + + # SQLAlchemy Async row support ._mapping untuk convert ke dict + return dict(row._mapping) + + +def map_geom_type(gtype): + + if gtype is None: + return "surface" + + # Jika LIST → ambil elemen pertama + if isinstance(gtype, list): + if len(gtype) > 0: + gtype = gtype[0] + else: + return "surface" + + # Setelah pasti string + gtype = str(gtype).lower() + + if "polygon" in gtype or "multi" in gtype: + return "surface" + if "line" in gtype: + return "curve" + if "point" in gtype: + return "point" + + return "surface" + + +def generate_metadata_xml(table_name, meta, extent, geoserver_links): + + keywords_xml = "".join([ + f""" + {kw.strip()} + """ for kw in meta["keywords"].split(",") + ]) + + geom_type_code = map_geom_type(meta["geom_type"]) + print('type', geom_type_code) + uuid = str(uuid4()) + + return f""" + + + {uuid} + + + + + + + + + + + + + + {meta['contact_person_name']} + + + {meta['organization_name']} + + + + + + + {meta['organization_phone']} + + + {meta['organization_phone']} + + + + + + + {meta['organization_address']} + + + Surabaya + + + Jawa Timur + + + Indonesia + + + {meta['organization_email']} + + + + + 08.00-16.00 + + + + + + + + + + {datetime.utcnow().isoformat()}+07:00 + + + ISO 19115:2003/19139 + + + 1.0 + + + + + + + + + + 38 + + + + + + + + + + + 4326 + + + EPSG + + + + + + + + + + + {meta['dataset_title']} + + + + + {meta['created_at'].isoformat()}+07:00 + + + + + + + + {meta['date_created'].year} + + + + + {meta['contact_person_name']} + + + {meta['organization_name']} + + + + + + + {meta['organization_phone']} + + + {meta['organization_phone']} + + + + + + + {meta['organization_address']} + + + Surabaya + + + Indonesia + + + {meta['organization_email']} + + + + + 08.00-16.00 + + + + + + + + + + Timezone: UTC+7 (Asia/Jakarta) + + + + + {meta['dataset_abstract']} + + + {meta['dataset_abstract']} + + + + + + + + Lab AI Polinema + + + Lab AI Polinema + + + + + + + + {meta['organization_phone']} + + + {meta['organization_phone']} + + + + + + + {meta['organization_address']} + + + Surabaya + + + Jawa Timur + + + Indonesia + + + {meta['organization_email']} + + + + + + + + + + + + + + + + + + + + {keywords_xml} + + + + + + + + + + + + Penggunaan data harus mencantumkan sumber: {meta['organization_name']}. + + + + + + + + + + + + 25000 + + + + + + + + + + + + + + + + {extent['xmin']} + {extent['xmax']} + {extent['ymin']} + {extent['ymax']} + + + + + + + + + + true + + + + + + {meta['dataset_title']} + + + + + {meta['created_at'].isoformat()}+07:00 + + + + + + + + {meta['date_created'].year} + + + + + + + + + + + + + {geoserver_links["wms_url"]} + + + DB:POSTGIS + + + {meta["dataset_title"]} + + + {meta["dataset_title"]} + + + + + + + {geoserver_links["wms_url"]} + + + WWW:LINK-1.0-http--link + + + {meta["dataset_title"]} + + + {meta["dataset_title"]} + + + + + + + {geoserver_links["wms_url"]} + + + OGC:WMS + + + {meta["dataset_title"]} + + + + + + + + {geoserver_links["wfs_url"]} + + + OGC:WFS + + + {meta["dataset_title"]} + + + + + + + + + + + + + + + + + + + + Data dihasilkan dari digitasi peta dasar skala 1:25000 menggunakan QGIS. + + + + + + +""" + + +# Geonetwork version 4.4.9.0 +def upload_metadata_to_geonetwork(xml_metadata: str): + # session = requests.Session() + # session.auth = (GEONETWORK_USER, GEONETWORK_PASS) + + # # 1. Get XSRF token + # try: + # info_url = f"{GEONETWORK_URL}/srv/eng/info?type=me" + # session.get(info_url) + # except requests.exceptions.RequestException as e: + # raise HTTPException(status_code=503, detail=f"Failed to connect to GeoNetwork: {e}") + + # xsrf_token = session.cookies.get('XSRF-TOKEN') + # if not xsrf_token: + # raise HTTPException(status_code=500, detail="Could not retrieve XSRF-TOKEN from GeoNetwork.") + + session, xsrf_token = create_gn_session() + headers = { + 'X-XSRF-TOKEN': xsrf_token, + 'Accept': 'application/json' + } + + GN_API_RECORDS_URL = f"{GEONETWORK_URL}/srv/api/records" + + # 2. GeoNetwork requires a multipart/form-data upload + files = { + 'file': ('metadata.xml', xml_metadata, 'application/xml') + } + + params = { + "ownerGroup": 1, # all + "ownerUser": 1 # admin + } + + response = session.post( + GN_API_RECORDS_URL, + params=params, + files=files, + headers=headers, + cookies=session.cookies.get_dict() + ) + + metadata_infos = response.json().get("metadataInfos", {}) + uuid = None + for records in metadata_infos.values(): + if records and isinstance(records, list): + uuid = records[0].get("uuid") + break + if not uuid: + raise ValueError("UUID not found in GeoNetwork response") + + publish_record(session, uuid) + + # print("response", response.json()) + return uuid + + + +async def publish_metadata(table_name: str, geoserver_links: dict): + + extent = await get_extent(table_name) + meta = await get_author_metadata(table_name) + xml = generate_metadata_xml( + table_name=meta["dataset_title"], + meta=meta, + extent=extent, + geoserver_links=geoserver_links + ) + + xml_clean = fix_xml_urls(xml) + uuid = upload_metadata_to_geonetwork(xml_clean) + + print(f"[GeoNetwork] Metadata uploaded. UUID = {uuid}") + + return uuid + + + +def publish_record(session, uuid): + print('[uuid]', uuid) + xsrf_token = session.cookies.get('XSRF-TOKEN') + + headers = { + "X-XSRF-TOKEN": xsrf_token, + "Accept": "application/json", + "Content-Type": "application/json" + } + + url = f"{GEONETWORK_URL}/srv/api/records/{uuid}/sharing" + + payload = { + "clear": True, + "privileges": [ + { + "group": 1, + "operations": { + "view": True + } + } + ] + } + + response = session.put(url, json=payload, headers=headers) + response.raise_for_status() + + +# single stand func +# def publish_record(uuid): +# session, xsrf_token = create_gn_session() + +# headers = { +# "X-XSRF-TOKEN": xsrf_token, +# "Content-Type": "application/json" +# } + +# url = f"{GEONETWORK_URL}/srv/api/records/{uuid}/sharing" + +# payload = { +# "clear": True, +# "privileges": [ +# {"group": 1, "operations": {"view": True}} +# ] +# } + +# resp = session.put(url, json=payload, headers=headers) +# resp.raise_for_status() diff --git a/app/mapset_pipeline/core/publication/publish_geoserver.py b/app/mapset_pipeline/core/publication/publish_geoserver.py new file mode 100755 index 0000000..36dfe4d --- /dev/null +++ b/app/mapset_pipeline/core/publication/publish_geoserver.py @@ -0,0 +1,300 @@ +import requests +import json +import os +from app.core.config import GEOSERVER_URL, GEOSERVER_USER, GEOSERVER_PASS, GEOSERVER_WORKSPACE + +# DATASTORE = "postgis" #per OPD +DATASTORE = "server_lokal" +# SLD_DIR = "./styles" + +# BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +# SLD_DIR = os.path.join(BASE_DIR, "styles") + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +MAIN_DIR = os.path.abspath(os.path.join(BASE_DIR, "..", "..")) +SLD_DIR = os.path.join(MAIN_DIR, "style_temp") + + +def publish_layer_to_geoserver(table: str, job_id: str): + print(f"[GeoServer] Publish layer + upload SLD: {table}") + + # ========================== + # 1. Publish Feature Type + # ========================== + # ft_url = f"{GEOSERVER_URL}/rest/workspaces/{GEOSERVER_WORKSPACE}/datastores/{DATASTORE}/featuretypes" + ft_url = f"{GEOSERVER_URL}/rest/workspaces/{GEOSERVER_WORKSPACE}/datastores/{DATASTORE}/featuretypes?computeDefault=true" + + payload = { + "featureType": { + "name": table, + "nativeName": table, + "enabled": True + } + } + + requests.post( + ft_url, + auth=(GEOSERVER_USER, GEOSERVER_PASS), + headers={"Content-Type": "application/json"}, + data=json.dumps(payload) + ) + + print(f"[GeoServer] FeatureType published for: {table}") + + # ========================================== + # 2. Upload SLD file to GeoServer + # ========================================== + + sld_file = f"{SLD_DIR}/{job_id}.sld" + style_name = table # style name sama dengan table + + if not os.path.exists(sld_file): + print(f"[WARNING] SLD file tidak ditemukan: {sld_file}") + else: + print(f"[GeoServer] Upload SLD {sld_file}") + + #old + # style_url = f"{GEOSERVER_URL}/rest/styles" + + # with open(sld_file, "rb") as sld: + # requests.post( + # f"{style_url}?name={style_name}&workspace={GEOSERVER_WORKSPACE}", + # auth=(GEOSERVER_USER, GEOSERVER_PASS), + # headers={"Content-Type": "application/vnd.ogc.sld+xml"}, + # data=sld.read() + # ) + + # print(f"[GeoServer] SLD uploaded: {style_name}") + + + + #new + style_url = ( + f"{GEOSERVER_URL}/rest/workspaces/" + f"{GEOSERVER_WORKSPACE}/styles" + ) + + with open(sld_file, "r", encoding="utf-8") as f: + sld_content = f.read() + + # 🔥 INI BARIS PENTINGNYA + sld_content = sld_content.lstrip("\ufeff \t\r\n") + + resp = requests.post( + f"{style_url}?name={style_name}", + auth=(GEOSERVER_USER, GEOSERVER_PASS), + headers={"Content-Type": "application/vnd.ogc.sld+xml"}, + data=sld_content.encode("utf-8") + ) + + + if resp.status_code not in (200, 201): + raise Exception( + f"Upload SLD gagal ({resp.status_code}): {resp.text}" + ) + + print(f"[GeoServer] SLD uploaded: {style_name}") + + + + + # ========================================== + # 3. Apply SLD to the layer + # ========================================== + + layer_url = f"{GEOSERVER_URL}/rest/layers/{GEOSERVER_WORKSPACE}:{table}" + + payload = { + "layer": { + "defaultStyle": { + "name": style_name, + "workspace": GEOSERVER_WORKSPACE + }, + "enabled": True + } + } + + requests.put( + layer_url, + auth=(GEOSERVER_USER, GEOSERVER_PASS), + headers={"Content-Type": "application/json"}, + data=json.dumps(payload) + ) + + print(f"[GeoServer] SLD applied as default style for {table}") + + # ========================================== + # 4. Delete SLD file from local folder + # ========================================== + + os.remove(sld_file) + print(f"[CLEANUP] SLD file removed: {sld_file}") + + # ============================================== + # 5. Reload GeoServer (optional but recommended) + # ============================================== + requests.post( + f"{GEOSERVER_URL}/rest/reload", + auth=(GEOSERVER_USER, GEOSERVER_PASS) + ) + + # ==================================================== + # 7. Generate GeoServer WMS/WFS link untuk GeoNetwork + # ==================================================== + + wms_link = ( + f"{GEOSERVER_URL}/{GEOSERVER_WORKSPACE}/wms?" + f"service=WMS&request=GetMap&layers={GEOSERVER_WORKSPACE}:{table}" + ) + wfs_link = ( + f"{GEOSERVER_URL}/{GEOSERVER_WORKSPACE}/wfs?" + f"service=WFS&request=GetFeature&typeName={GEOSERVER_WORKSPACE}:{table}" + ) + # print(f"[GeoServer] WMS URL: {wms_link}") + # print(f"[GeoServer] WFS URL: {wfs_link}") + # print(f"[GeoServer] Reload completed. Layer {table} ready.") + openlayer_url = ( + f"{GEOSERVER_URL}/{GEOSERVER_WORKSPACE}/wms?" + f"service=WMS" + f"&version=1.1.0" + f"&request=GetMap" + f"&layers={GEOSERVER_WORKSPACE}:{table}" + f"&styles=" + f"&bbox=110.89528623700005%2C-8.780412043999945%2C116.26994997700001%2C-5.042971664999925" + f"&width=768" + f"&height=384" + f"&srs=EPSG:4326" + f"&format=application/openlayers" + ) + + return { + "table": table, + "style": style_name, + "wms_url": wms_link, + "wfs_url": wfs_link, + "layer_url": openlayer_url + } + + + + + +# use default style +# def publish_layer_to_geoserver(table: str): + +# print(f"[GeoServer] Publish layer: {table}") + +# # ========== 1. Publish Feature Type ========== +# ft_url = f"{GEOSERVER_URL}/rest/workspaces/{WORKSPACE}/datastores/{DATASTORE}/featuretypes" + +# payload = { +# "featureType": { +# "name": table, +# "nativeName": table, +# "enabled": True +# } +# } + +# requests.post( +# ft_url, +# auth=(GEOSERVER_USER, GEOSERVER_PASS), +# headers={"Content-Type": "application/json"}, +# data=json.dumps(payload) +# ) + +# # =================================================== +# # 2. Tentukan SLD file (prioritas table.sld → fallback default) +# # =================================================== +# table_sld = SLD_DIR / f"{table}.sld" +# default_sld = SLD_DIR / "default_style.sld" + +# if table_sld.exists(): +# chosen_sld = table_sld +# delete_after = True +# style_name = table # pakai nama style sama dengan layer +# print(f"[SLD] Menggunakan SLD khusus: {chosen_sld}") +# else: +# chosen_sld = default_sld +# delete_after = False +# style_name = "default_style" +# print(f"[SLD] Menggunakan default SLD: {chosen_sld}") + +# # ========================================== +# # 3. Upload SLD +# # ========================================== +# style_url = f"{GEOSERVER_URL}/rest/styles" + +# with open(chosen_sld, "rb") as sld: +# requests.post( +# f"{style_url}?name={style_name}&workspace={WORKSPACE}", +# auth=(GEOSERVER_USER, GEOSERVER_PASS), +# headers={"Content-Type": "application/vnd.ogc.sld+xml"}, +# data=sld.read() +# ) + +# print(f"[GeoServer] SLD uploaded: {style_name}") + +# # ========================================== +# # 4. Apply SLD ke layer +# # ========================================== +# layer_url = f"{GEOSERVER_URL}/rest/layers/{WORKSPACE}:{table}" + +# payload = { +# "layer": { +# "defaultStyle": { +# "name": style_name, +# "workspace": WORKSPACE +# }, +# "enabled": True +# } +# } + +# requests.put( +# layer_url, +# auth=(GEOSERVER_USER, GEOSERVER_PASS), +# headers={"Content-Type": "application/json"}, +# data=json.dumps(payload) +# ) + +# print(f"[GeoServer] Style '{style_name}' applied to layer '{table}'") + +# # ========================================== +# # 5. Delete table.sld jika ada +# # ========================================== +# if delete_after: +# table_sld.unlink() +# print(f"[CLEANUP] File SLD '{table}.sld' dihapus") + +# # ==================================================== +# # 6. Reload GeoServer (opsional tapi aman) +# # ==================================================== +# requests.post( +# f"{GEOSERVER_URL}/rest/reload", +# auth=(GEOSERVER_USER, GEOSERVER_PASS) +# ) + +# # ==================================================== +# # 7. Generate GeoServer WMS/WFS link untuk GeoNetwork +# # ==================================================== + +# wms_link = ( +# f"{GEOSERVER_URL}/{WORKSPACE}/wms?" +# f"service=WMS&request=GetMap&layers={WORKSPACE}:{table}" +# ) + +# wfs_link = ( +# f"{GEOSERVER_URL}/{WORKSPACE}/wfs?" +# f"service=WFS&request=GetFeature&typeName={WORKSPACE}:{table}" +# ) + +# print(f"[GeoServer] WMS URL: {wms_link}") +# print(f"[GeoServer] WFS URL: {wfs_link}") + +# return { +# "table": table, +# "style": style_name, +# "wms_url": wms_link, +# "wfs_url": wfs_link +# } + + diff --git a/app/mapset_pipeline/core/readers/__init__.py b/app/mapset_pipeline/core/readers/__init__.py new file mode 100755 index 0000000..cbab1ee --- /dev/null +++ b/app/mapset_pipeline/core/readers/__init__.py @@ -0,0 +1,18 @@ +# Import fungsi utama dari masing-masing file reader +# (Titik '.' berarti import dari folder yang sama) + +from .reader_csv import read_csv +from .reader_shp import read_shp +from .reader_gdb import read_gdb +from .reader_mpk import read_mpk +from .reader_pdf import read_pdf, convert_df + +# Opsional: Mendefinisikan apa yang akan ter-import jika orang mengetik "from ... import *" +__all__ = [ + "read_csv", + "read_shp", + "read_gdb", + "read_mpk", + "read_pdf", + "convert_df" +] \ No newline at end of file diff --git a/app/mapset_pipeline/core/readers/reader_csv.py b/app/mapset_pipeline/core/readers/reader_csv.py new file mode 100755 index 0000000..55034b6 --- /dev/null +++ b/app/mapset_pipeline/core/readers/reader_csv.py @@ -0,0 +1,228 @@ +import pandas as pd +import re +import csv +import os + +def detect_header_line(path, max_rows=10): + with open(path, 'r', encoding='utf-8', errors='ignore') as f: + lines = [next(f) for _ in range(max_rows)] + header_line_idx = 0 + best_score = -1 + for i, line in enumerate(lines): + cells = re.split(r'[;,|\t]', line.strip()) + alpha_ratio = sum(bool(re.search(r'[A-Za-z]', c)) for c in cells) / max(len(cells), 1) + digit_ratio = sum(bool(re.search(r'\d', c)) for c in cells) / max(len(cells), 1) + score = alpha_ratio - digit_ratio + if score > best_score: + best_score = score + header_line_idx = i + return header_line_idx + +def detect_delimiter(path, sample_size=2048): + with open(path, 'r', encoding='utf-8', errors='ignore') as f: + sample = f.read(sample_size) + sniffer = csv.Sniffer() + try: + dialect = sniffer.sniff(sample) + return dialect.delimiter + except Exception: + for delim in [',', ';', '\t', '|']: + if delim in sample: + return delim + return ',' + + +# def read_csv(path: str, sheet: str = None): +# ext = os.path.splitext(path)[1].lower() + +# try: +# if ext in ['.csv']: +# header_line = detect_header_line(path) +# delimiter = detect_delimiter(path) +# print(f"[INFO] Detected header line: {header_line + 1}, delimiter: '{delimiter}'") + +# df = pd.read_csv( +# path, +# header=header_line, +# sep=delimiter, +# encoding='utf-8', +# low_memory=False, +# thousands=',' +# ) + +# elif ext in ['.xlsx', '.xls']: +# print(f"[INFO] Membaca file Excel: {os.path.basename(path)}") +# xls = pd.ExcelFile(path) +# print(f"[INFO] Ditemukan {len(xls.sheet_names)} sheet: {xls.sheet_names}") + +# if sheet: +# if sheet not in xls.sheet_names: +# raise ValueError(f"Sheet '{sheet}' tidak ditemukan dalam file {os.path.basename(path)}") +# print(f"[INFO] Membaca sheet yang ditentukan: '{sheet}'") +# df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str) +# df = df.dropna(how='all').dropna(axis=1, how='all') + +# else: +# print("[INFO] Tidak ada sheet yang ditentukan, mencari sheet paling relevan...") +# best_sheet = None +# best_score = -1 +# best_df = None + +# for sheet_name in xls.sheet_names: +# try: +# temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str) +# temp_df = temp_df.dropna(how='all').dropna(axis=1, how='all') + +# if len(temp_df) == 0 or len(temp_df.columns) < 2: +# continue + +# # hitung skor relevansi +# text_ratio = temp_df.applymap(lambda x: isinstance(x, str)).sum().sum() / (temp_df.size or 1) +# row_score = len(temp_df) +# score = (row_score * 0.7) + (text_ratio * 100) + +# if score > best_score: +# best_score = score +# best_sheet = sheet_name +# best_df = temp_df + +# except Exception as e: +# print(f"[WARN] Gagal membaca sheet {sheet_name}: {e}") +# continue + +# if best_df is not None: +# print(f"[INFO] Sheet terpilih: '{best_sheet}' dengan skor {best_score:.2f}") +# df = best_df +# else: +# raise ValueError("Tidak ada sheet valid yang dapat dibaca.") + +# for col in df.columns: +# if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any(): +# df[col] = df[col].astype(str).str.replace(',', '', regex=False) +# df[col] = pd.to_numeric(df[col], errors='ignore') + +# else: +# raise ValueError("Format file tidak dikenali (hanya .csv, .xlsx, .xls)") + +# except Exception as e: +# print(f"[WARN] Gagal membaca file ({e}), fallback ke default reader.") +# df = pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',') + +# df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')] +# df.columns = [str(c).strip() for c in df.columns] +# df = df.dropna(how='all') + +# return df + + + +def read_csv(path: str, sheet: str = None): + ext = os.path.splitext(path)[1].lower() + df = pd.DataFrame() # Inisialisasi default + + try: + # --- BLOK PEMBACAAN FILE --- + if ext in ['.csv']: + header_line = detect_header_line(path) + delimiter = detect_delimiter(path) + print(f"[INFO] Detected header line: {header_line + 1}, delimiter: '{delimiter}'") + + df = pd.read_csv( + path, + header=header_line, + sep=delimiter, + encoding='utf-8', + low_memory=False, + thousands=',' + ) + + elif ext in ['.xlsx', '.xls']: + print(f"[INFO] Membaca file Excel: {os.path.basename(path)}") + xls = pd.ExcelFile(path, engine='openpyxl') # Pakai engine openpyxl + print(f"[INFO] Ditemukan {len(xls.sheet_names)} sheet: {xls.sheet_names}") + + if sheet: + if sheet not in xls.sheet_names: + raise ValueError(f"Sheet '{sheet}' tidak ditemukan.") + print(f"[INFO] Membaca sheet yang ditentukan: '{sheet}'") + # Tambahkan engine='openpyxl' + df = pd.read_excel(xls, sheet_name=sheet, header=0, dtype=str, engine='openpyxl') + df = df.dropna(how='all').dropna(axis=1, how='all') + + else: + # Logika pencarian sheet terbaik (tidak berubah, hanya indentasi) + print("[INFO] Tidak ada sheet yang ditentukan, mencari sheet paling relevan...") + best_sheet = None + best_score = -1 + best_df = None + + for sheet_name in xls.sheet_names: + try: + temp_df = pd.read_excel(xls, sheet_name=sheet_name, header=0, dtype=str, engine='openpyxl') + temp_df = temp_df.dropna(how='all').dropna(axis=1, how='all') + + if len(temp_df) == 0 or len(temp_df.columns) < 2: + continue + + text_ratio = temp_df.applymap(lambda x: isinstance(x, str)).sum().sum() / (temp_df.size or 1) + row_score = len(temp_df) + score = (row_score * 0.7) + (text_ratio * 100) + + if score > best_score: + best_score = score + best_sheet = sheet_name + best_df = temp_df + except Exception as e: + print(f"[WARN] Gagal membaca sheet {sheet_name}: {e}") + continue + + if best_df is not None: + print(f"[INFO] Sheet terpilih: '{best_sheet}' dengan skor {best_score:.2f}") + df = best_df + else: + raise ValueError("Tidak ada sheet valid yang dapat dibaca.") + + else: + raise ValueError("Format file tidak dikenali (hanya .csv, .xlsx, .xls)") + + # --- BLOK PEMBERSIHAN (Dilakukan setelah file sukses terbaca) --- + # Kita bungkus ini agar error konversi angka TIDAK menggagalkan pembacaan file + if not df.empty: + df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed')] + df.columns = [str(c).strip() for c in df.columns] + df = df.dropna(how='all') + + # Konversi Angka yang Lebih Aman + for col in df.columns: + try: + # Cek apakah kolom terlihat seperti angka + if df[col].astype(str).str.replace(',', '', regex=False).str.match(r'^-?\d+(\.\d+)?$').any(): + # Bersihkan koma + clean_col = df[col].astype(str).str.replace(',', '', regex=False) + # Gunakan errors='coerce' agar jika ada error value (NaN/REF), dia jadi NaN, bukan crash + df[col] = pd.to_numeric(clean_col, errors='coerce') + except Exception as ex: + # Jika konversi gagal, biarkan sebagai string/object dan lanjut ke kolom berikutnya + print(f"[WARN] Gagal konversi numerik pada kolom '{col}': {ex}") + pass + + return df + + except Exception as e: + # --- ERROR HANDLING YANG BENAR --- + print(f"[WARN] Gagal membaca file utama ({e}).") + + # Hanya lakukan fallback CSV jika file aslinya MEMANG CSV (atau txt) + # Jangan paksa baca .xlsx pakai read_csv + if ext in ['.csv', '.txt']: + print("[INFO] Mencoba fallback ke default CSV reader...") + try: + return pd.read_csv(path, encoding='utf-8', low_memory=False, thousands=',') + except Exception as e2: + print(f"[ERROR] Fallback CSV juga gagal: {e2}") + + # Jika file Excel gagal dibaca, return DataFrame kosong atau raise error + print("[ERROR] Tidak dapat memulihkan pembacaan file Excel.") + return pd.DataFrame() + + diff --git a/app/mapset_pipeline/core/readers/reader_gdb.py b/app/mapset_pipeline/core/readers/reader_gdb.py new file mode 100755 index 0000000..843f2d5 --- /dev/null +++ b/app/mapset_pipeline/core/readers/reader_gdb.py @@ -0,0 +1,75 @@ +import geopandas as gpd +import fiona +import zipfile +import tempfile +import os +import shutil + +def read_gdb(zip_path: str, layer: str = None): + if not zip_path.lower().endswith(".zip"): + raise ValueError("File GDB harus berupa ZIP yang berisi folder .gdb atau file .gdbtable") + + tmpdir = tempfile.mkdtemp() + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(tmpdir) + + macosx_path = os.path.join(tmpdir, "__MACOSX") + if os.path.exists(macosx_path): + shutil.rmtree(macosx_path) + + gdb_folders = [] + for root, dirs, _ in os.walk(tmpdir): + for d in dirs: + if d.lower().endswith(".gdb"): + gdb_folders.append(os.path.join(root, d)) + + if not gdb_folders: + gdbtable_files = [] + for root, _, files in os.walk(tmpdir): + for f in files: + if f.lower().endswith(".gdbtable"): + gdbtable_files.append(os.path.join(root, f)) + + if gdbtable_files: + first_folder = os.path.dirname(gdbtable_files[0]) + base_name = os.path.basename(first_folder) + gdb_folder_path = os.path.join(tmpdir, f"{base_name}.gdb") + + os.makedirs(gdb_folder_path, exist_ok=True) + + for fpath in os.listdir(first_folder): + if ".gdb" in fpath.lower(): + shutil.move(os.path.join(first_folder, fpath), os.path.join(gdb_folder_path, fpath)) + + gdb_folders.append(gdb_folder_path) + # print(f"[INFO] Rebuilt GDB folder from nested structure: {gdb_folder_path}") + else: + # print("[DEBUG] Isi ZIP:", os.listdir(tmpdir)) + shutil.rmtree(tmpdir) + raise ValueError("Tidak ditemukan folder .gdb atau file .gdbtable di dalam ZIP") + + gdb_path = gdb_folders[0] + + layers = fiona.listlayers(gdb_path) + # print(f"[INFO] Layer tersedia: {layers}") + + chosen_layer = layer or (layers[0] if layers else None) + if not chosen_layer: + shutil.rmtree(tmpdir) + raise ValueError("Tidak ada layer GDB yang bisa dibaca.") + + print(f"[DEBUG] Membaca layer: {chosen_layer}") + + try: + gdf = gpd.read_file(gdb_path, layer=chosen_layer) + except Exception as e: + shutil.rmtree(tmpdir) + raise ValueError(f"Gagal membaca layer dari GDB: {e}") + + if gdf.crs is None: + # print("[WARN] CRS tidak terdeteksi, diasumsikan EPSG:4326") + gdf.set_crs("EPSG:4326", inplace=True) + + + shutil.rmtree(tmpdir) + return gdf diff --git a/app/mapset_pipeline/core/readers/reader_mpk.py b/app/mapset_pipeline/core/readers/reader_mpk.py new file mode 100755 index 0000000..a466e58 --- /dev/null +++ b/app/mapset_pipeline/core/readers/reader_mpk.py @@ -0,0 +1,72 @@ +import os +import tempfile +import json +from io import BytesIO +import geopandas as gpd +from py7zr import SevenZipFile +import pyogrio + + +def find_data_source(extract_dir: str): + """ + Cari data sumber (.gdb atau .shp) di dalam folder hasil ekstrak. + """ + for root, dirs, _ in os.walk(extract_dir): + for d in dirs: + if d.lower().endswith(".gdb"): + return os.path.join(root, d) + + for root, _, files in os.walk(extract_dir): + for f in files: + if f.lower().endswith(".shp"): + return os.path.join(root, f) + + raise ValueError("Tidak ditemukan data source yang didukung (.gdb atau .shp).") + + +def get_main_layer(gdb_path: str): + """ + Ambil nama layer utama dari geodatabase (.gdb). + """ + try: + layers = pyogrio.list_layers(gdb_path) + for layer in layers: + if not layer[0].lower().endswith("__attach"): + return layer[0] + if layers: + return layers[0][0] + raise ValueError(f"Tidak ada layer utama yang valid di {gdb_path}") + except Exception as e: + raise ValueError(f"Gagal membaca daftar layer GDB: {e}") + + +def read_mpk(path: str): + mpk_bytes = None + with open(path, "rb") as f: + mpk_bytes = f.read() + + if not mpk_bytes: + raise ValueError("File MPK kosong atau tidak valid.") + + with tempfile.TemporaryDirectory() as tempdir: + try: + with SevenZipFile(BytesIO(mpk_bytes), mode="r") as z: + z.extractall(path=tempdir) + except Exception as e: + raise ValueError(f"File MPK rusak atau tidak valid: {e}") + + src_path = find_data_source(tempdir) + + if src_path.lower().endswith(".gdb"): + layer_name = get_main_layer(src_path) + gdf = gpd.read_file(src_path, layer=layer_name) + else: + gdf = gpd.read_file(src_path) + + if gdf.crs is None: + raise ValueError("CRS tidak terdeteksi. Pastikan file memiliki informasi proyeksi (.prj).") + + gdf = gdf.to_crs(epsg=4326) + + print(f"[INFO] Berhasil membaca {len(gdf)} fitur") + return gdf \ No newline at end of file diff --git a/app/mapset_pipeline/core/readers/reader_pdf.py b/app/mapset_pipeline/core/readers/reader_pdf.py new file mode 100755 index 0000000..d3b772a --- /dev/null +++ b/app/mapset_pipeline/core/readers/reader_pdf.py @@ -0,0 +1,288 @@ +import re +import pdfplumber +import pandas as pd +from app.mapset_pipeline.utils.pdf_cleaner import get_number_column_index, get_start_end_number, normalize_number_column, row_ratio, has_mixed_text_and_numbers, is_short_text_row, parse_page_selection, filter_geo_admin_column, cleaning_column +from services.upload_file.upload_exceptions import PDFReadError +from utils.logger_config import setup_logger + +logger = setup_logger(__name__) + +def detect_header_rows(rows): + if not rows: + return [] + + ratios = [row_ratio(r) for r in rows] + body_start_index = None + + for i in range(1, len(rows)): + row = rows[i] + if has_mixed_text_and_numbers(row): + body_start_index = i + break + if ratios[i] > 0.3: + body_start_index = i + break + if any(isinstance(c, str) and re.match(r'^\d+$', c.strip()) for c in row): + body_start_index = i + break + if ratios[i - 1] == 0 and ratios[i] > 0: + body_start_index = i + break + + if body_start_index is None: + body_start_index = len(rows) + + potential_headers = rows[:body_start_index] + body_filtered = rows[body_start_index:] + header_filtered = [] + for idx, row in enumerate(potential_headers): + if is_short_text_row(row): + if idx + 1 < len(potential_headers) and ratios[idx + 1] == 0: + header_filtered.append(row) + else: + continue + else: + header_filtered.append(row) + + return header_filtered, body_filtered + + +def merge_multiline_header(header_rows): + final_header = [] + for col in zip(*header_rows): + val = next((v for v in reversed(col) if v and str(v).strip()), '') + val = str(val).replace('\n', ' ').strip() + final_header.append(val) + final_header = [v for v in final_header if v not in ['', None]] + return final_header + +def merge_parsed_table(tables): + roots = [] + fragments = [] + + # STEP 1: klasifikasi + for table in tables: + num_idx = get_number_column_index(table["columns"]) + if num_idx is None: + roots.append(table) + continue + + start_no, _ = get_start_end_number(table["rows"], num_idx) + if start_no == 1: + roots.append(table) + else: + fragments.append(table) + + # STEP 2: merge fragment ke root + for frag in fragments: + frag_idx = get_number_column_index(frag["columns"]) + f_start, _ = get_start_end_number(frag["rows"], frag_idx) + + for root in roots: + if root["columns"] != frag["columns"]: + continue + + root_idx = get_number_column_index(root["columns"]) + _, r_end = get_start_end_number(root["rows"], root_idx) + + if f_start == r_end + 1: + root["rows"].extend(frag["rows"]) + break # fragment hanya boleh nempel ke 1 root + + return roots + + +def read_pdf(path: str, page: str): + """ + Membaca tabel dari file PDF secara semi-otomatis menggunakan `pdfplumber`. + + Alur utama proses: + 1. **Buka file PDF** menggunakan pdfplumber. + 2. **Pilih halaman** berdasarkan input `page` (misalnya "1,3-5" untuk halaman 1 dan 3–5). + 3. **Deteksi tabel** di setiap halaman yang dipilih. + 4. **Ekstraksi tabel mentah** (list of list) dari setiap halaman. + 5. **Pisahkan baris header dan body** dengan fungsi `detect_header_rows()`. + 6. **Gabungkan header multi-baris** (misalnya tabel dengan dua baris judul kolom). + 7. **Bersihkan body tabel** menggunakan `cleaning_column()`: + - Menghapus kolom nomor urut. + - Menyesuaikan jumlah kolom dengan header. + 8. **Gabungkan hasil akhir** ke dalam format JSON dengan struktur: + { + "title": , + "columns": [...], + "rows": [...] + } + 9. **Filter tambahan** dengan `filter_geo_admin_column()` (khusus metadata geospasial). + 10. **Kembalikan hasil** berupa list JSON siap dikirim ke frontend API. + + Args: + path (str): Lokasi file PDF yang akan dibaca. + page (str): Nomor halaman atau rentang halaman, contoh: "1", "2-4", "1,3-5". + + Returns: + list[dict]: Daftar tabel hasil ekstraksi dengan struktur kolom dan baris. + + Raises: + PDFReadError: Jika terjadi kesalahan saat membaca atau parsing PDF. + """ + # try: + # pdf_path = path + # selectedPage = page if page else "1" + # tables_data = [] + + # with pdfplumber.open(pdf_path) as pdf: + # total_pages = len(pdf.pages) + # selected_pages = parse_page_selection(selectedPage, total_pages) + + # logger.info(f"[INFO] Total Halaman PDF: {total_pages}") + # logger.info(f"[INFO] Total Halaman yang dipilih: {len(selected_pages)}") + # logger.info(f"[INFO] Halaman yang dipilih untuk dibaca: {selected_pages}") + + # for page_num in selected_pages: + # pdf_page = pdf.pages[page_num - 1] + # tables = pdf_page.find_tables() + # logger.info(f"\n\n[INFO] Halaman {page_num}: {len(tables)} tabel terdeteksi") + + # # pembacaan title ini tidak valid untuk halaman lanscape + # # for line in pdf_page.extract_text_lines(): + # # if line['top'] > tables[0].bbox[1]: + # # break + # # previous_line = line + # # print('[TITLE]', previous_line['text']) + + # for i, t in enumerate(tables, start=1): + # table = t.extract() + # if len(table) > 2: + # print(f"[TBL] tabel : {i} - halaman {page_num}") + # tables_data.append(table) + + # logger.info(f"\nTotal tabel terbaca: {len(tables_data)}\n") + + # header_only, body_only = [], [] + # for tbl in tables_data: + # head, body = detect_header_rows(tbl) + # header_only.append(head) + # body_only.append(body) + + # clean_header = [merge_multiline_header(h) for h in header_only] + # clean_body = [] + + # for i, raw_body in enumerate(body_only): + # con_body = [[cell for cell in row if cell not in (None, '')] for row in raw_body] + # cleaned = cleaning_column(clean_header[i], [con_body]) + # clean_body.append(cleaned[0]) + + # parsed = [] + # for i, (cols, rows) in enumerate(zip(clean_header, clean_body), start=1): + # parsed.append({ + # "title": str(i), + # "columns": cols, + # "rows": rows + # }) + + # # ================================================================= + + # clean_parsed = filter_geo_admin_column(parsed) + # merge_parsed = merge_parsed_table(clean_parsed) + + # logger.info(f"\nTotal tabel valid: {len(merge_parsed)}\n") + + # ordered_tables = [normalize_number_column(t) for t in merge_parsed] + # return ordered_tables + + # except Exception as e: + # raise PDFReadError(f"Gagal membaca PDF: {e}", code=422) + + try: + pdf_path = path + selectedPage = page if page else "1" + tables_data = [] + + with pdfplumber.open(pdf_path) as pdf: + total_pages = len(pdf.pages) + selected_pages = parse_page_selection(selectedPage, total_pages) + + logger.info(f"[INFO] Total Halaman PDF: {total_pages}") + logger.info(f"[INFO] Total Halaman yang dipilih: {len(selected_pages)}") + logger.info(f"[INFO] Halaman yang dipilih untuk dibaca: {selected_pages}") + + for page_num in selected_pages: + pdf_page = pdf.pages[page_num - 1] + tables = pdf_page.find_tables() + logger.info(f"\n\n[INFO] Halaman {page_num}: {len(tables)} tabel terdeteksi") + + # pembacaan title ini tidak valid untuk halaman lanscape + # for line in pdf_page.extract_text_lines(): + # if line['top'] > tables[0].bbox[1]: + # break + # previous_line = line + # print('[TITLE]', previous_line['text']) + + for i, t in enumerate(tables, start=1): + table = t.extract() + if len(table) > 2: + print(f"[TBL] tabel : {i} - halaman {page_num}") + tables_data.append({"page": f"halaman {page_num} - {i}", "table": table}) + + logger.info(f"\nTotal tabel terbaca: {len(tables_data)}\n") + + header_only, body_only, page_info = [], [], [] + for tbl in tables_data: + head, body = detect_header_rows(tbl["table"]) + header_only.append(head) + body_only.append(body) + page_info.append(tbl["page"]) + + clean_header = [merge_multiline_header(h) for h in header_only] + clean_body = [] + + for i, raw_body in enumerate(body_only): + con_body = [[cell for cell in row if cell not in (None, '')] for row in raw_body] + cleaned = cleaning_column(clean_header[i], [con_body]) + clean_body.append(cleaned[0]) + + parsed = [] + for i, (cols, rows, page) in enumerate(zip(clean_header, clean_body, page_info), start=1): + parsed.append({ + "title": page, + "columns": cols, + "rows": rows + }) + + # ================================================================= + + clean_parsed = filter_geo_admin_column(parsed) + merge_parsed = merge_parsed_table(clean_parsed) + + logger.info(f"\nTotal tabel valid: {len(merge_parsed)}\n") + + ordered_tables = [normalize_number_column(t) for t in merge_parsed] + return ordered_tables + + except Exception as e: + raise PDFReadError(f"Gagal membaca PDF: {e}", code=422) + + +def convert_df(payload): + try: + if "columns" not in payload or "rows" not in payload: + raise ValueError("Payload tidak memiliki key 'columns' atau 'rows'.") + + if not isinstance(payload["columns"], list): + raise TypeError("'columns' harus berupa list.") + if not isinstance(payload["rows"], list): + raise TypeError("'rows' harus berupa list.") + + for i, row in enumerate(payload["rows"]): + if len(row) != len(payload["columns"]): + raise ValueError(f"Jumlah elemen di baris ke-{i} tidak sesuai jumlah kolom.") + + df = pd.DataFrame(payload["rows"], columns=payload["columns"]) + + if "title" in payload: + df.attrs["title"] = payload["title"] + + return df + + except Exception as e: + raise PDFReadError(f"Gagal konversi payload ke DataFrame: {e}", code=400) diff --git a/app/mapset_pipeline/core/readers/reader_shp.py b/app/mapset_pipeline/core/readers/reader_shp.py new file mode 100755 index 0000000..ff4bb4e --- /dev/null +++ b/app/mapset_pipeline/core/readers/reader_shp.py @@ -0,0 +1,60 @@ +import geopandas as gpd +import fiona +import zipfile +import tempfile +import os +import shutil +from shapely.geometry import shape + +def read_shp(path: str): + if not path: + raise ValueError("Path shapefile tidak boleh kosong.") + + tmpdir = None + shp_path = None + + if path.lower().endswith(".zip"): + tmpdir = tempfile.mkdtemp() + with zipfile.ZipFile(path, "r") as zip_ref: + zip_ref.extractall(tmpdir) + + shp_files = [] + for root, _, files in os.walk(tmpdir): + for f in files: + if f.lower().endswith(".shp"): + shp_files.append(os.path.join(root, f)) + + if not shp_files: + raise ValueError("Tidak ditemukan file .shp di dalam ZIP.") + shp_path = shp_files[0] + print(f"[DEBUG] Membaca shapefile: {os.path.basename(shp_path)}") + + else: + shp_path = path + + try: + gdf = gpd.read_file(shp_path) + except Exception as e: + raise ValueError(f"Gagal membaca shapefile: {e}") + + if "geometry" not in gdf.columns or gdf.geometry.is_empty.all(): + print("[WARN] Geometry kosong. Mencoba membangun ulang dari fitur mentah...") + + with fiona.open(shp_path) as src: + features = [] + for feat in src: + geom = shape(feat["geometry"]) if feat["geometry"] else None + props = feat["properties"] + props["geometry"] = geom + features.append(props) + + gdf = gpd.GeoDataFrame(features, geometry="geometry", crs=src.crs) + + if gdf.crs is None: + # print("[WARN] CRS tidak terdeteksi. Diasumsikan EPSG:4326") + gdf.set_crs("EPSG:4326", inplace=True) + + if tmpdir and os.path.exists(tmpdir): + shutil.rmtree(tmpdir) + + return gdf diff --git a/app/mapset_pipeline/data/repository.py b/app/mapset_pipeline/data/repository.py new file mode 100755 index 0000000..3af018b --- /dev/null +++ b/app/mapset_pipeline/data/repository.py @@ -0,0 +1,259 @@ +import os +import json +import asyncio +import pandas as pd +from shapely import wkt, wkb +from shapely.geometry import MultiPolygon, MultiLineString +from sqlalchemy import text +from sqlalchemy.exc import SQLAlchemyError + +# Import koneksi database Anda +from database.connection import engine +from app.mapset_pipeline.utils.formatters import str_to_date + + +async def generate_unique_table_name(base_name: str) -> str: + """Generate nama tabel unik, menambahkan suffix angka jika sudah ada.""" + base_name = base_name.lower().replace(" ", "_").replace("-", "_") + table_name = base_name + counter = 2 + + async with engine.connect() as conn: + while True: + # Cek keberadaan tabel di schema public (atau default search path) + result = await conn.execute( + text("SELECT to_regclass(:tname)"), + {"tname": table_name} + ) + exists = result.scalar() + + if not exists: + return table_name + + table_name = f"{base_name}_{counter}" + counter += 1 + + +async def insert_parquet_to_postgis(filename: str, table_name: str): + """ + Membaca file parquet sementara, membersihkan data, dan melakukan COPY + ke PostGIS menggunakan asyncpg pool untuk performa tinggi. + """ + from main import db_pool + file_path = os.path.join("tmp", filename) + + if not os.path.exists(file_path): + raise FileNotFoundError(f"File temp {file_path} tidak ditemukan") + + try: + loop = asyncio.get_running_loop() + # Baca parquet (CPU bound, run in executor jika file sangat besar) + df = await loop.run_in_executor(None, pd.read_parquet, file_path) + + # 1. CLEANING NAMA KOLOM + df.columns = [str(col).strip().upper() for col in df.columns] + + # Standarisasi kolom GEOM + if "GEOM" in df.columns: + df.rename(columns={"GEOM": "GEOM"}, inplace=True) + + if "GEOM" not in df.columns: + raise ValueError("Kolom GEOM tidak ditemukan dalam Parquet") + + # 2. PREPARE DATA ROWS + clean_rows = [] + geom_types = set() + + # Atribut selain GEOM + attr_columns = [col for col in df.columns if col != "GEOM"] + + for row in df.itertuples(index=False): + # --- Handle GEOM --- + raw_geom = getattr(row, "GEOM", None) + if not raw_geom: continue + + try: + geom = None + if isinstance(raw_geom, str): + geom = wkt.loads(raw_geom) + elif isinstance(raw_geom, bytes): + geom = wkb.loads(raw_geom) + + if not geom: continue + + # Fix Invalid Geometry + if not geom.is_valid: + geom = geom.buffer(0) + + # Force Multi-Geometry agar seragam + gtype = geom.geom_type.upper() + if gtype == "POLYGON": geom = MultiPolygon([geom]) + elif gtype == "LINESTRING": geom = MultiLineString([geom]) + + geom_types.add(geom.geom_type) + + # Convert ke EWKT (SRID 4326) + ewkt = f"SRID=4326;{geom.wkt}" + + except Exception: + continue # Skip baris dengan geom rusak + + # --- Handle Attributes (FORCE STRING) --- + row_data = [] + for col in attr_columns: + val = getattr(row, col, None) + if val is not None: + row_data.append(str(val)) + else: + row_data.append(None) + + row_data.append(ewkt) + clean_rows.append(tuple(row_data)) + + if not clean_rows: + raise ValueError("Data valid kosong setelah pemrosesan geometry") + + # 3. DATABASE OPERATIONS + final_geom_type = list(geom_types)[0].upper() if geom_types else "GEOM" + if "MULTI" not in final_geom_type and final_geom_type != "GEOM": + final_geom_type = "MULTI" + final_geom_type + + # A. CREATE TABLE + col_defs = [f'"{col}" TEXT' for col in attr_columns] # Semua atribut jadi TEXT dulu agar aman + + create_sql = f""" + CREATE TABLE {table_name} ( + _id SERIAL PRIMARY KEY, + {', '.join(col_defs)}, + geom TEXT + ); + """ + + async with db_pool.acquire() as conn: + # Create Table + await conn.execute(create_sql) + + # B. COPY Data (Bulk Insert) + target_cols = attr_columns + ['geom'] + # asyncpg otomatis meng-quote nama kolom + await conn.copy_records_to_table( + table_name, + records=clean_rows, + columns=target_cols + ) + + # C. ALTER COLUMN GEOMETRY & INDEX + alter_sql = f""" + ALTER TABLE {table_name} + ALTER COLUMN geom TYPE geometry({final_geom_type}, 4326) + USING ST_Force2D(geom::geometry)::geometry({final_geom_type}, 4326); + + CREATE INDEX idx_{table_name}_geom ON {table_name} USING GIST (geom); + """ + await conn.execute(alter_sql) + + print(f"[SUCCESS] Upload {len(clean_rows)} baris ke tabel {table_name}.") + + # Hapus file temp setelah sukses + try: + os.remove(file_path) + except OSError: + pass + + return { + "table_name": table_name, + "row_count": len(clean_rows), + "geom_type": final_geom_type + } + + except Exception as e: + print(f"[ERROR] Processing parquet to DB: {e}") + raise e + + +async def save_author_metadata(payload_author: dict, table_name: str, dataset_title: str, + geom_types: list, row_count: int, user_id: int): + """ + Menyimpan metadata author dan informasi dataset ke tabel backend.author_metadata. + """ + query = text(""" + INSERT INTO backend.author_metadata ( + table_title, + dataset_title, + dataset_abstract, + keywords, + topic_category, + date_created, + dataset_status, + organization_name, + contact_person_name, + contact_email, + contact_phone, + geom_type, + user_id, + process, + geometry_count + ) VALUES ( + :table_title, + :dataset_title, + :dataset_abstract, + :keywords, + :topic_category, + :date_created, + :dataset_status, + :organization_name, + :contact_person_name, + :contact_email, + :contact_phone, + :geom_type, + :user_id, + :process, + :geometry_count + ) + """) + + params = { + "table_title": table_name, + "dataset_title": dataset_title, + "dataset_abstract": payload_author.get("abstract"), + "keywords": payload_author.get("keywords"), + "topic_category": ", ".join(payload_author.get("topicCategory", [])), + "date_created": str_to_date(payload_author.get("dateCreated")), + "dataset_status": payload_author.get("status"), + "organization_name": payload_author.get("organization"), + "contact_person_name": payload_author.get("contactName"), + "contact_email": payload_author.get("contactEmail"), + "contact_phone": payload_author.get("contactPhone"), + "geom_type": json.dumps(geom_types), + "user_id": user_id, + "process": 'CLEANSING', + "geometry_count": row_count + } + + async with engine.begin() as conn: + await conn.execute(query, params) + + +async def call_cleansing_procedure(table_name: str): + """ + Menjalankan stored procedure cleansing geometry di database. + """ + try: + print(f"[INFO] Memulai cleansing database untuk tabel: {table_name}") + + async with engine.begin() as conn: + # Menggunakan parameter binding yang aman + await conn.execute( + text("CALL pr_cleansing_satupeta_polygon(:table_name, NULL);"), + {"table_name": table_name} + ) + + print(f"[SUCCESS] Cleansing selesai untuk tabel: {table_name}") + return "done" + + except SQLAlchemyError as e: + print(f"[ERROR] Cleansing database gagal: {e}") + # Kita raise error agar Service tahu kalau proses ini gagal + raise RuntimeError(f"Database cleansing failed: {str(e)}") + + diff --git a/app/mapset_pipeline/service.py b/app/mapset_pipeline/service.py new file mode 100755 index 0000000..2ead95d --- /dev/null +++ b/app/mapset_pipeline/service.py @@ -0,0 +1,286 @@ +import os +import shutil +import pandas as pd +from fastapi import UploadFile, HTTPException +from typing import Optional + +# --- Internal Modules --- +from .api.schemas import UploadRequest, PdfRequest +from .core.processing.analyzer import analyze_and_clean_dataframe, publish_mapset +from .core.readers import ( + read_csv, + read_shp, + read_gdb, + read_mpk, + read_pdf, + convert_df +) +from .data.repository import ( + generate_unique_table_name, + insert_parquet_to_postgis, + save_author_metadata, + call_cleansing_procedure +) + +from app.mapset_pipeline.utils.file_ops import ( + detect_zip_type, + generate_job_id, +) +from app.mapset_pipeline.utils.formatters import ( + save_xml_to_sld, +) + +# --- Legacy/External Modules (Sesuai kode asli Anda) --- +from app.core.config import UPLOAD_FOLDER, MAX_FILE_MB, GEONETWORK_URL +from utils.logger_config import log_activity + +# from api.routers.datasets_router import ( +# upload_to_main +# ) + +async def handle_file_analysis( + file: UploadFile, + page: Optional[str] = "", + sheet: Optional[str] = "", + fileDesc: Optional[str] = "" +): + """ + Orchestrator untuk endpoint /upload. + 1. Simpan file fisik. + 2. Pilih Reader berdasarkan ekstensi. + 3. Panggil Processor untuk analisis. + 4. Bersihkan file fisik. + """ + fname = file.filename + ext = os.path.splitext(fname)[1].lower() + + # 1. Validasi & Simpan File + # Membaca file in-memory untuk cek ukuran (hati-hati memory usage untuk file besar) + contents = await file.read() + size_mb = len(contents) / (1024 * 1024) + if size_mb > MAX_FILE_MB: + raise HTTPException(status_code=413, detail="Ukuran File Terlalu Besar") + + tmp_path = UPLOAD_FOLDER / fname + # Pastikan folder ada + os.makedirs(UPLOAD_FOLDER, exist_ok=True) + + with open(tmp_path, "wb") as f: + f.write(contents) + + df = None + try: + # 2. Routing Reader Berdasarkan Ekstensi + print(f"[INFO] Processing file type: {ext}") + + if ext == ".csv": + df = read_csv(str(tmp_path)) + elif ext == ".xlsx": + df = read_csv(str(tmp_path), sheet) # Asumsi read_csv handle xlsx juga sesuai kode asli + elif ext == ".mpk": + df = read_mpk(str(tmp_path)) + elif ext == ".pdf": + # Logic PDF agak unik, bisa return list tabel atau df + tbl = read_pdf(tmp_path, page) + if len(tbl) == 0: + return { + "message": "Tidak ditemukan tabel valid pada halaman yang dipilih", + "tables": {}, + "file_type": ext + } + elif len(tbl) > 1: + return { + "message": "File berhasil dibaca, ditemukan banyak tabel.", + "tables": tbl, + "file_type": ext + } + else: + df = convert_df(tbl[0]) + elif ext == ".zip": + zip_type = detect_zip_type(str(tmp_path)) + if zip_type == "shp": + df = read_shp(str(tmp_path)) + elif zip_type == "gdb": + df = read_gdb(str(tmp_path)) + else: + raise HTTPException(status_code=400, detail="ZIP file tidak mengandung SHP / GDB valid.") + else: + raise HTTPException(status_code=400, detail="Unsupported file type") + + # Cek Dataframe Kosong + if df is None or (hasattr(df, "empty") and df.empty): + raise HTTPException(status_code=422, detail="File berhasil dibaca, tetapi tidak ditemukan tabel valid") + + # 3. Panggil Processor (Logic Cleaning & Validasi) + result_analysis = await analyze_and_clean_dataframe(df, ext, fname, fileDesc) + return result_analysis + + except Exception as e: + print(f"[ERROR] handle_file_analysis: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + finally: + # 4. Cleanup Uploaded File (Raw File) + # Kita hapus file upload asli, tapi file temp parquet (hasil processor) + # tetap hidup sampai frontend mengirim request ingest + if tmp_path.exists(): + try: + os.remove(tmp_path) + except Exception: + pass + + +async def process_pdf_file(payload: PdfRequest): + """ + Helper khusus jika user mengupload PDF dan ingin memilih tabel tertentu. + """ + try: + # Convert request body ke DataFrame (sesuai logic reader_pdf) + # Kita mock convert_df karena di kode asli import dari reader_pdf + # yang mungkin mengharapkan format dict khusus + df = convert_df(payload.model_dump()) + + if df is None or (hasattr(df, "empty") and df.empty): + raise HTTPException(status_code=422, detail="Tidak ada tabel valid dalam PDF") + + # Reuse logic processor yang sama + return await analyze_and_clean_dataframe( + df, '.pdf', payload.fileName, payload.fileDesc + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +async def execute_postgis_ingestion(payload: UploadRequest, user_id: int): + """ + Orchestrator untuk endpoint /process-to-postgis. + 1. Terima data (JSON rows). + 2. Convert ke Parquet Temporary. + 3. Upload ke PostGIS (via Repository). + 4. Simpan Metadata (via Repository). + 5. Trigger Cleansing & Publishing. + 6. Logging. + """ + job_id = generate_job_id(str(user_id)) + + try: + # 1. Generate Nama Tabel + table_name = await generate_unique_table_name(payload.title) + + # 2. Persiapan Data (JSON -> DataFrame -> Parquet) + # Kita perlu save ke parquet karena repository insert_parquet_to_postgis membaca file + # Ini juga memisahkan memory load antara API dan DB Process + df = pd.DataFrame(payload.rows) + + # Upper case columns + df.columns = [col.upper() for col in df.columns] + + # Rename Geometry jika perlu (standarisasi input dari frontend) + if "GEOMETRY" in df.columns: + df.rename(columns={"GEOMETRY": "GEOM"}, inplace=True) + + # Simpan ke file temp untuk diproses repository + temp_parquet_name = f"{job_id}.parquet" + temp_parquet_path = os.path.join("tmp", temp_parquet_name) + os.makedirs("tmp", exist_ok=True) + + # Save parquet (gunakan engine pyarrow atau fastparquet) + df.to_parquet(temp_parquet_path, index=False) + + # 3. Insert ke PostGIS + # Fungsi ini akan membaca file parquet tadi, membersihkan geom, dan copy ke DB + db_result = await insert_parquet_to_postgis(temp_parquet_name, table_name) + + # 4. Simpan Metadata + # Ambil list geom type dan row count dari hasil insert DB (lebih akurat) + final_geom_types = [db_result['geom_type']] # Disederhanakan jadi list + row_count = db_result['row_count'] + + await save_author_metadata( + payload_author=payload.author, + table_name=table_name, + dataset_title=payload.title, + geom_types=final_geom_types, + row_count=row_count, + user_id=user_id + ) + + # 5. Logging Activity + await log_activity( + user_id=user_id, + action_type="UPLOAD", + action_title=f"Upload dataset {table_name}", + details={"table_name": table_name, "rows": row_count} + ) + + # 6. Post-Processing (External APIs) + result = { + "job_id": job_id, + "job_status": "wait", + "table_name": table_name, + "status": "success", + "message": f"Tabel '{table_name}' berhasil dibuat.", + "total_rows": row_count, + "geometry_type": final_geom_types, + "crs": payload.author.get("crs", "EPSG:4326"), + "metadata_uuid": "" + } + + # Save Style (SLD) + save_xml_to_sld(payload.style, job_id) + + # CLEANSING WITH QUERY + try: + cleansing_status = await call_cleansing_procedure(table_name) + except Exception as e: + cleansing_status = "failed" + print(f"Cleansing warning: {e}") + result['job_status'] = cleansing_status + + # Publish Layer (Geoserver/Geonetwork) + publish_info = await publish_mapset(table_name, job_id) + result['metadata_uuid'] = publish_info.get('uuid', '') + + # 7. Upload to Main Portal (Mapset Integration) + mapset_payload = { + "name": payload.title, + "description": payload.author.get("abstract"), + "scale": "1:25000", + # ID Hardcoded sesuai kode asli (pertimbangkan pindah ke config/env) + 'projection_system_id': '0196c746-d1ba-7f1c-9706-5df738679cc7', + "category_id": payload.author.get("mapsetCategory"), + "data_status": "sementara", + 'classification_id': '01968b4b-d3f9-76c9-888c-ee887ac31ce4', + 'producer_id': '01968b54-0000-7a67-bd10-975b8923b93e', + "layer_type": final_geom_types[0], + 'source_id': ['019c03ef-35e1-738b-858d-871dc7d1e4d6'], + "layer_url": publish_info.get('geos_link', ''), + "metadata_url": f"{GEONETWORK_URL}/srv/eng/catalog.search#/metadata/{publish_info.get('uuid', '')}", + "coverage_level": "provinsi", + "coverage_area": "kabupaten", + "data_update_period": "Tahunan", + "data_version": "2026", + "is_popular": False, + "is_active": True, + 'regional_id': '01968b53-a910-7a67-bd10-975b8923b92e', + "notes": "Mapset baru dibuat", + "status_validation": "on_verification", + } + + # await upload_to_main(mapset_payload) + + return result + + except Exception as e: + # Error Handling & Logging + await log_activity( + user_id=user_id, + action_type="ERROR", + action_title="Upload gagal", + details={"error": str(e)} + ) + print(f"[ERROR] execute_postgis_ingestion: {e}") + # Re-raise sebagai HTTP Exception agar router mengembalikan 500 yang rapi + raise HTTPException(status_code=500, detail=str(e)) + + diff --git a/app/mapset_pipeline/utils/file_ops.py b/app/mapset_pipeline/utils/file_ops.py new file mode 100755 index 0000000..7029ef2 --- /dev/null +++ b/app/mapset_pipeline/utils/file_ops.py @@ -0,0 +1,105 @@ +import os +import uuid +import zipfile +import geopandas as gpd +from shapely import wkt +from shapely.errors import ShapelyError +from datetime import datetime + + +def detect_zip_type(zip_path: str) -> str: + with zipfile.ZipFile(zip_path, "r") as zip_ref: + files = zip_ref.namelist() + + if any(f.lower().endswith(".gdb/") or ".gdb/" in f.lower() for f in files): + return "gdb" + + if any(f.lower().endswith(ext) for ext in [".gdbtable", ".gdbtablx", ".gdbindexes", ".spx"] for f in files): + return "gdb" + + if any(f.lower().endswith(".shp") for f in files): + return "shp" + + return "unknown" + + +def generate_unique_filename(folder="tmp", ext="parquet", digits=6): + os.makedirs(folder, exist_ok=True) + while True: + file_id = file_id = uuid.uuid4().int + filename = f"{folder}/{file_id}.{ext}" + + if not os.path.exists(filename): + return filename + + +def generate_job_id(user_id: str) -> str: + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + return f"{user_id}_{timestamp}" + + +def dataframe_validation(df_input, tmp_file): + """ + Fungsi ini berjalan di thread terpisah (CPU bound). + Melakukan validasi, cleaning, dan export ke parquet. + """ + # 1. Copy agar tidak mengubah data asli + export_df = df_input.copy() + + # ========================================================================= + # TAHAP 1: SAFE WKT LOADING + # ========================================================================= + def safe_load_wkt(raw): + if not isinstance(raw, str): + return None + try: + return wkt.loads(raw) + # 2. GANTI CATCH BLOCK INI + # except (WKTReadingError, Exception): <-- LAMA + except (ShapelyError, Exception): + return None + + # Terapkan safe load + export_df["geom"] = export_df["geometry"].apply(safe_load_wkt) + + # ========================================================================= + # TAHAP 2: FILTER NULL & INVALID GEOMETRY + # ========================================================================= + # Hapus baris di mana konversi WKT gagal (None) + export_df = export_df[export_df["geom"].notnull()] + print("df", export_df) + if export_df.empty: + raise ValueError("Tidak ada data spasial valid yang ditemukan.") + + # Jadikan GeoDataFrame + export_df = gpd.GeoDataFrame(export_df, geometry="geom") + + # ========================================================================= + # TAHAP 3: FIX TOPOLOGY (PENTING!) + # ========================================================================= + # Cek validitas (misal: Polygon yang garisnya menabrak diri sendiri) + # buffer(0) adalah trik standar GIS untuk memperbaiki topologi ringan + export_df["geom"] = export_df["geom"].apply( + lambda g: g.buffer(0) if not g.is_valid else g + ) + + # Hapus lagi jika setelah di-fix malah jadi kosong (jarang terjadi, tapi aman) + export_df = export_df[~export_df["geom"].is_empty] + + # ========================================================================= + # TAHAP 4: FINALISASI (CRS & RENAME) + # ========================================================================= + export_df = export_df.drop(columns=["geometry"]) # Buang kolom string WKT lama + export_df = export_df.set_crs("EPSG:4326", allow_override=True) + + # Rename kolom atribut ke UPPERCASE, biarkan 'geom' lowercase + # .strip() untuk membuang spasi hantu (" ID " -> "ID") + export_df = export_df.rename( + columns=lambda c: str(c).strip().upper() if c != "geom" else c + ) + + # Simpan ke Parquet + export_df.to_parquet(tmp_file) + + return len(export_df) + diff --git a/app/mapset_pipeline/utils/formatters.py b/app/mapset_pipeline/utils/formatters.py new file mode 100755 index 0000000..504e3bf --- /dev/null +++ b/app/mapset_pipeline/utils/formatters.py @@ -0,0 +1,43 @@ +import os +import pandas as pd +import numpy as np +from shapely.geometry import base as shapely_base +from shapely.geometry.base import BaseGeometry +from datetime import datetime + + +def safe_json(value): + """Konversi aman untuk semua tipe numpy/pandas/shapely ke tipe JSON-serializable""" + if isinstance(value, (np.int64, np.int32)): + return int(value) + if isinstance(value, (np.float64, np.float32)): + return float(value) + if isinstance(value, pd.Timestamp): + return value.isoformat() + if isinstance(value, shapely_base.BaseGeometry): + return str(value) # convert to WKT string + if pd.isna(value): + return None + return value + + +def str_to_date(raw_date: str): + if raw_date: + try: + return datetime.strptime(raw_date, "%Y-%m-%d").date() + except Exception as e: + print("[WARNING] Tidak bisa parse dateCreated:", e) + return None + + +def save_xml_to_sld(xml_string, filename): + folder_path = 'style_temp' + os.makedirs(folder_path, exist_ok=True) + + file_path = os.path.join(folder_path, f"{filename}.sld") + + with open(file_path, "w", encoding="utf-8") as f: + f.write(xml_string) + + return file_path + diff --git a/app/mapset_pipeline/utils/pdf_cleaner.py b/app/mapset_pipeline/utils/pdf_cleaner.py new file mode 100755 index 0000000..69d84c0 --- /dev/null +++ b/app/mapset_pipeline/utils/pdf_cleaner.py @@ -0,0 +1,208 @@ +import re +import itertools + +geo_admin_keywords = [ + 'lat', 'lon', 'long', 'latitude', 'longitude', 'koordinat', 'geometry', 'geometri', + 'desa', 'kelurahan', 'kel', 'kecamatan', 'kabupaten', 'kab', 'kota', 'provinsi', + 'lokasi', 'region', 'area', 'zone', 'boundary', 'batas' +] + +def normalize_text(text): + text = text.lower() + text = re.sub(r'[^a-z0-9/ ]+', ' ', text) + text = re.sub(r'\s+', ' ', text).strip() + return text + +def generate_combined_patterns(keywords): + combos = list(itertools.combinations(keywords, 2)) + patterns = [] + for a, b in combos: + patterns.append(rf'{a}\s*/\s*{b}') + patterns.append(rf'{b}\s*/\s*{a}') + return patterns + +combined_patterns = generate_combined_patterns(geo_admin_keywords) + +def contains_geo_admin_keywords(text): + text_clean = normalize_text(text) + if len(text_clean) < 3: + return False + + for pattern in combined_patterns: + if re.search(pattern, text_clean): + return True + + for kw in geo_admin_keywords: + if re.search(rf'(^|[\s/_-]){kw}([\s/_-]|$)', text_clean): + return True + + return False + +def filter_geo_admin_column(tables): + filtered = [] + for table in tables: + found = any(contains_geo_admin_keywords(col) for col in table['columns']) + if found: + filtered.append(table) + return filtered + + +NUMBER_HEADER_KEYWORDS = [ + "no","no.","nomor","nomor urut","no urut","No","Nomor","No Urut","Index", + "ID","Sr No","S/N","SN","Sl No" +] + +def has_number_header(header): + header_text = header + return any(keyword in header_text for keyword in NUMBER_HEADER_KEYWORDS) + +def is_numbering_column(col_values): + numeric_like = 0 + total = 0 + for v in col_values: + if not v or not isinstance(v, str): + continue + total += 1 + if re.fullmatch(r"0*\d{1,3}", v.strip()): + numeric_like += 1 + return total > 0 and (numeric_like / total) > 0.6 + +def is_numeric_value(v): + if v is None: + return False + if isinstance(v, (int, float)): + return True + if isinstance(v, str) and re.fullmatch(r"0*\d{1,3}", v.strip()): + return True + return False + +def cleaning_column(headers, bodies): + cleaned_bodies = [] + + for header, body in zip(headers, bodies): + if not body: + cleaned_bodies.append(body) + continue + + header_has_number = has_number_header(header) + first_col = [row[0] for row in body if row and len(row) > 0] + first_col_is_numbering = is_numbering_column(first_col) + + if not header_has_number and first_col_is_numbering: + new_body = [] + for row in body: + if not row: + continue + first_val = row[0] + if is_numeric_value(first_val) and len(row) > 1: + new_body.append(row[1:]) + else: + new_body.append(row) + body = new_body + + header_len = len(headers) + filtered_body = [row for row in body if len(row) == header_len] + + cleaned_bodies.append(filtered_body) + + return cleaned_bodies + +def parse_page_selection(selectedPage: str, total_pages: int): + if not selectedPage: + return list(range(1, total_pages + 1)) + + pages = set() + parts = re.split(r'[,\s]+', selectedPage.strip()) + + for part in parts: + if '-' in part: + try: + start, end = map(int, part.split('-')) + pages.update(range(start, end + 1)) + except ValueError: + continue + else: + try: + pages.add(int(part)) + except ValueError: + continue + + valid_pages = [p for p in sorted(pages) if 1 <= p <= total_pages] + return valid_pages + +def is_number(s): + if s is None: + return False + s = str(s).strip().replace(',', '').replace('.', '') + return s.isdigit() + +def row_ratio(row): + non_empty = [c for c in row if c not in (None, '', ' ')] + if not non_empty: + return 0 + num_count = sum(is_number(c) for c in non_empty) + return num_count / len(non_empty) + +def has_mixed_text_and_numbers(row): + non_empty = [c for c in row if c not in (None, '', ' ')] + has_text = any(isinstance(c, str) and re.search(r'[A-Za-z]', str(c)) for c in non_empty) + has_num = any(is_number(c) for c in non_empty) + return has_text and has_num + +def is_short_text_row(row): + """Deteksi baris teks pendek (1-2 kolom teks pendek).""" + non_empty = [str(c).strip() for c in row if c not in (None, '', ' ')] + if not non_empty: + return False + text_only = all(not is_number(c) for c in non_empty) + joined = " ".join(non_empty) + return text_only and len(non_empty) <= 2 and len(joined) < 20 + + + + + + + + +def get_number_column_index(columns): + for i, col in enumerate(columns): + if has_number_header(col): + return i + return None + +def get_start_end_number(rows, idx): + try: + start_no = int(rows[0][idx]) + end_no = int(rows[-1][idx]) + return start_no, end_no + except: + return None, None + +def normalize_number_column(table): + columns = table["columns"] + rows = table["rows"] + + num_idx = get_number_column_index(columns) + if num_idx is None: + return table + + current = None + + for row in rows: + try: + val = int(row[num_idx]) + except: + continue + + if current is None: + current = val + else: + if val <= current: + current += 1 + else: + current = val + + row[num_idx] = str(current) + + return table diff --git a/app/models/__init__.py b/app/models/__init__.py old mode 100644 new mode 100755 diff --git a/app/models/base.py b/app/models/base.py old mode 100644 new mode 100755 diff --git a/app/models/category_model.py b/app/models/category_model.py old mode 100644 new mode 100755 diff --git a/app/models/classification_model.py b/app/models/classification_model.py old mode 100644 new mode 100755 diff --git a/app/models/credential_model.py b/app/models/credential_model.py old mode 100644 new mode 100755 diff --git a/app/models/feedback_model.py b/app/models/feedback_model.py old mode 100644 new mode 100755 diff --git a/app/models/file_model.py b/app/models/file_model.py old mode 100644 new mode 100755 diff --git a/app/models/map_access_model.py b/app/models/map_access_model.py old mode 100644 new mode 100755 diff --git a/app/models/map_projection_system_model.py b/app/models/map_projection_system_model.py old mode 100644 new mode 100755 diff --git a/app/models/map_source_model.py b/app/models/map_source_model.py old mode 100644 new mode 100755 diff --git a/app/models/mapset_history_model.py b/app/models/mapset_history_model.py old mode 100644 new mode 100755 diff --git a/app/models/mapset_model.py b/app/models/mapset_model.py old mode 100644 new mode 100755 diff --git a/app/models/news_model.py b/app/models/news_model.py old mode 100644 new mode 100755 diff --git a/app/models/organization_model.py b/app/models/organization_model.py old mode 100644 new mode 100755 diff --git a/app/models/refresh_token_model.py b/app/models/refresh_token_model.py old mode 100644 new mode 100755 diff --git a/app/models/regional_model.py b/app/models/regional_model.py old mode 100644 new mode 100755 diff --git a/app/models/role_model.py b/app/models/role_model.py old mode 100644 new mode 100755 diff --git a/app/models/user_model.py b/app/models/user_model.py old mode 100644 new mode 100755 diff --git a/app/repositories/__init__.py b/app/repositories/__init__.py old mode 100644 new mode 100755 diff --git a/app/repositories/base.py b/app/repositories/base.py old mode 100644 new mode 100755 diff --git a/app/repositories/category_repository.py b/app/repositories/category_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/classification_repository.py b/app/repositories/classification_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/credential_repository.py b/app/repositories/credential_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/feedback_repository.py b/app/repositories/feedback_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/file_repository.py b/app/repositories/file_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/map_access_repository.py b/app/repositories/map_access_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/map_projection_system_repository.py b/app/repositories/map_projection_system_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/map_source_repository.py b/app/repositories/map_source_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/map_source_usage_repository.py b/app/repositories/map_source_usage_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/mapset_history_repository.py b/app/repositories/mapset_history_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/mapset_repository.py b/app/repositories/mapset_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/news_repository.py b/app/repositories/news_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/organization_repository.py b/app/repositories/organization_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/regional_repository.py b/app/repositories/regional_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/role_repository.py b/app/repositories/role_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/token_repository.py b/app/repositories/token_repository.py old mode 100644 new mode 100755 diff --git a/app/repositories/user_repository.py b/app/repositories/user_repository.py old mode 100644 new mode 100755 diff --git a/app/response/res.py b/app/response/res.py old mode 100644 new mode 100755 diff --git a/app/schemas/__init__.py b/app/schemas/__init__.py old mode 100644 new mode 100755 diff --git a/app/schemas/base.py b/app/schemas/base.py old mode 100644 new mode 100755 diff --git a/app/schemas/category_schema.py b/app/schemas/category_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/classification_schema.py b/app/schemas/classification_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/count_schema.py b/app/schemas/count_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/credential_schema.py b/app/schemas/credential_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/error_schema.py b/app/schemas/error_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/feedback_schema.py b/app/schemas/feedback_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/file_schema.py b/app/schemas/file_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/map_access_schema.py b/app/schemas/map_access_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/map_projection_system_schema.py b/app/schemas/map_projection_system_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/map_source_schema.py b/app/schemas/map_source_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/mapset_history_schema.py b/app/schemas/mapset_history_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/mapset_schema.py b/app/schemas/mapset_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/news_schema.py b/app/schemas/news_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/organization_schema.py b/app/schemas/organization_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/regional_schema.py b/app/schemas/regional_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/role_schema.py b/app/schemas/role_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/token_schema.py b/app/schemas/token_schema.py old mode 100644 new mode 100755 diff --git a/app/schemas/user_schema.py b/app/schemas/user_schema.py old mode 100644 new mode 100755 diff --git a/app/services/__init__.py b/app/services/__init__.py old mode 100644 new mode 100755 diff --git a/app/services/auth_service.py b/app/services/auth_service.py old mode 100644 new mode 100755 diff --git a/app/services/base.py b/app/services/base.py old mode 100644 new mode 100755 diff --git a/app/services/category_service.py b/app/services/category_service.py old mode 100644 new mode 100755 diff --git a/app/services/classification_service.py b/app/services/classification_service.py old mode 100644 new mode 100755 diff --git a/app/services/count_service.py b/app/services/count_service.py old mode 100644 new mode 100755 diff --git a/app/services/credential_service.py b/app/services/credential_service.py old mode 100644 new mode 100755 diff --git a/app/services/feedback_service.py b/app/services/feedback_service.py old mode 100644 new mode 100755 diff --git a/app/services/file_service.py b/app/services/file_service.py old mode 100644 new mode 100755 diff --git a/app/services/map_access_service.py b/app/services/map_access_service.py old mode 100644 new mode 100755 diff --git a/app/services/map_projection_system_service.py b/app/services/map_projection_system_service.py old mode 100644 new mode 100755 diff --git a/app/services/map_source_service.py b/app/services/map_source_service.py old mode 100644 new mode 100755 diff --git a/app/services/mapset_history_service.py b/app/services/mapset_history_service.py old mode 100644 new mode 100755 diff --git a/app/services/mapset_service.py b/app/services/mapset_service.py old mode 100644 new mode 100755 diff --git a/app/services/news_service.py b/app/services/news_service.py old mode 100644 new mode 100755 diff --git a/app/services/organization_service.py b/app/services/organization_service.py old mode 100644 new mode 100755 diff --git a/app/services/regional_service.py b/app/services/regional_service.py old mode 100644 new mode 100755 diff --git a/app/services/role_service.py b/app/services/role_service.py old mode 100644 new mode 100755 diff --git a/app/services/user_service.py b/app/services/user_service.py old mode 100644 new mode 100755 diff --git a/app/utils/__init__.py b/app/utils/__init__.py old mode 100644 new mode 100755 diff --git a/app/utils/encryption.py b/app/utils/encryption.py old mode 100644 new mode 100755 diff --git a/app/utils/helpers.py b/app/utils/helpers.py old mode 100644 new mode 100755 diff --git a/app/utils/logger_config.py b/app/utils/logger_config.py old mode 100644 new mode 100755 diff --git a/app/utils/system.py b/app/utils/system.py old mode 100644 new mode 100755 diff --git a/docker-compose.yml b/docker-compose.yml old mode 100644 new mode 100755 diff --git a/environment.env b/environment.env old mode 100644 new mode 100755 diff --git a/migrations/README b/migrations/README old mode 100644 new mode 100755 diff --git a/migrations/env.py b/migrations/env.py old mode 100644 new mode 100755 diff --git a/migrations/script.py.mako b/migrations/script.py.mako old mode 100644 new mode 100755 diff --git a/migrations/scripts.py b/migrations/scripts.py old mode 100644 new mode 100755 diff --git a/migrations/versions/20241203_1200_initial_schema.py b/migrations/versions/20241203_1200_initial_schema.py old mode 100644 new mode 100755 diff --git a/migrations/versions/20241204_0000_seed_initial_data.py b/migrations/versions/20241204_0000_seed_initial_data.py old mode 100644 new mode 100755 diff --git a/migrations/versions/__init__.py b/migrations/versions/__init__.py old mode 100644 new mode 100755 diff --git a/poetry.lock b/poetry.lock old mode 100644 new mode 100755 diff --git a/pyproject.toml b/pyproject.toml old mode 100644 new mode 100755 diff --git a/run.py b/run.py old mode 100644 new mode 100755 diff --git a/tests/__init__.py b/tests/__init__.py old mode 100644 new mode 100755 diff --git a/tests/conftest.py b/tests/conftest.py old mode 100644 new mode 100755 diff --git a/tests/test_api/__init__.py b/tests/test_api/__init__.py old mode 100644 new mode 100755 diff --git a/tests/test_services/__init__.py b/tests/test_services/__init__.py old mode 100644 new mode 100755