fixing cleansing data
This commit is contained in:
parent
421d0cf90b
commit
14852b6648
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -1,5 +1,8 @@
|
||||||
|
.env
|
||||||
test_pg.py
|
test_pg.py
|
||||||
cleansing_service.py
|
cleansing_service.py
|
||||||
|
postgis_metadata.py
|
||||||
|
database.py
|
||||||
|
|
||||||
__pycache__/
|
__pycache__/
|
||||||
data/
|
data/
|
||||||
13
core/config.py
Normal file
13
core/config.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import os
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
HOST = os.getenv("host")
|
||||||
|
PORT = os.getenv("port")
|
||||||
|
DB = os.getenv("db")
|
||||||
|
USER = os.getenv("user")
|
||||||
|
PWD = os.getenv("pwd")
|
||||||
|
SCHEMA = os.getenv("schema")
|
||||||
|
GEOM_COL = os.getenv("geom_col")
|
||||||
|
|
||||||
21
database.py
21
database.py
|
|
@ -1,21 +0,0 @@
|
||||||
POSTGIS = {
|
|
||||||
"host": "192.168.60.24",
|
|
||||||
"port": "5432",
|
|
||||||
"db": "test_postgis",
|
|
||||||
"user": "postgres",
|
|
||||||
"password": "12345"
|
|
||||||
}
|
|
||||||
|
|
||||||
def build_uri(table_name: str) -> str:
|
|
||||||
return (
|
|
||||||
f"dbname='{POSTGIS['db']}' "
|
|
||||||
f"host='{POSTGIS['host']}' "
|
|
||||||
f"port='{POSTGIS['port']}' "
|
|
||||||
f"user='{POSTGIS['user']}' "
|
|
||||||
f"password='{POSTGIS['password']}' "
|
|
||||||
f"sslmode=disable "
|
|
||||||
f"table=\"public\".\"{table_name}\" "
|
|
||||||
f"key='_id'"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,598 +1,181 @@
|
||||||
from qgis.core import (
|
from qgis.core import (
|
||||||
|
QgsDataSourceUri,
|
||||||
QgsFeature,
|
QgsFeature,
|
||||||
QgsVectorLayer,
|
QgsVectorLayer,
|
||||||
QgsVectorLayerExporter,
|
QgsVectorLayerExporter,
|
||||||
QgsVectorFileWriter
|
QgsVectorFileWriter,
|
||||||
|
QgsWkbTypes
|
||||||
)
|
)
|
||||||
import processing
|
import processing
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
from database import build_uri
|
from core.config import HOST,PORT,DB,USER,PWD,SCHEMA,GEOM_COL
|
||||||
|
|
||||||
def load_layer(table_name: str):
|
def load_layer(table_name: str):
|
||||||
uri = build_uri(table_name)
|
uri = QgsDataSourceUri()
|
||||||
print('uri', uri)
|
uri.setConnection(HOST, PORT, DB, USER, PWD)
|
||||||
layer = QgsVectorLayer(uri, table_name, "postgres")
|
uri.setDataSource(SCHEMA, table_name, GEOM_COL, "", "_id")
|
||||||
|
|
||||||
|
layer = QgsVectorLayer(uri.uri(), table_name, "postgres")
|
||||||
|
|
||||||
print("Layer valid:", layer.isValid())
|
print("Layer valid:", layer.isValid())
|
||||||
return layer
|
return layer
|
||||||
|
|
||||||
|
|
||||||
# def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
|
||||||
|
|
||||||
# summary = {
|
|
||||||
# "total_features_before": layer.featureCount(),
|
|
||||||
# "invalid_geometries_before": 0,
|
|
||||||
# "invalid_geometries_fixed": 0,
|
|
||||||
# "duplicates_removed": 0,
|
|
||||||
# "sliver_removed": 0,
|
|
||||||
# "holes_removed": 0
|
|
||||||
# }
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 1. IDENTIFY INVALID GEOMETRY
|
|
||||||
# # ========================================================
|
|
||||||
# invalid_ids = []
|
|
||||||
# for f in layer.getFeatures():
|
|
||||||
# if not f.geometry().isGeosValid():
|
|
||||||
# invalid_ids.append(f.id())
|
|
||||||
|
|
||||||
# summary["invalid_geometries_before"] = len(invalid_ids)
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 2. FIX GEOMETRIES
|
|
||||||
# # ========================================================
|
|
||||||
# fixed = processing.run(
|
|
||||||
# "native:fixgeometries",
|
|
||||||
# {
|
|
||||||
# "INPUT": layer,
|
|
||||||
# "OUTPUT": "memory:"
|
|
||||||
# }
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# summary["invalid_geometries_fixed"] = len(invalid_ids)
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 3. ENSURE MULTIPOLYGON
|
|
||||||
# # ========================================================
|
|
||||||
# multipolygon = processing.run(
|
|
||||||
# "native:collect",
|
|
||||||
# {
|
|
||||||
# "INPUT": fixed,
|
|
||||||
# "OUTPUT": "memory:"
|
|
||||||
# }
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 4. REMOVE DUPLICATE ROWS
|
|
||||||
# # ========================================================
|
|
||||||
# all_fields = [f.name() for f in multipolygon.fields()]
|
|
||||||
# print("Detecting key fields:", all_fields)
|
|
||||||
|
|
||||||
# key_fields = None
|
|
||||||
|
|
||||||
# # (1) Prefer 'id'
|
|
||||||
# if "id" in all_fields:
|
|
||||||
# key_fields = ["id"]
|
|
||||||
|
|
||||||
# # (2) Else pick first integer field
|
|
||||||
# if key_fields is None:
|
|
||||||
# int_cols = [
|
|
||||||
# f.name() for f in multipolygon.fields()
|
|
||||||
# if f.typeName().lower() in ["int", "integer", "bigint"]
|
|
||||||
# ]
|
|
||||||
# if int_cols:
|
|
||||||
# key_fields = [int_cols[0]]
|
|
||||||
|
|
||||||
# # (3) Else use all fields
|
|
||||||
# if key_fields is None:
|
|
||||||
# key_fields = all_fields
|
|
||||||
|
|
||||||
# print("Using key field:", key_fields)
|
|
||||||
|
|
||||||
# dedup = processing.run(
|
|
||||||
# "native:removeduplicatesbyattribute",
|
|
||||||
# {
|
|
||||||
# "INPUT": multipolygon,
|
|
||||||
# "FIELDS": key_fields,
|
|
||||||
# "METHOD": 0,
|
|
||||||
# "OUTPUT": "memory:"
|
|
||||||
# }
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# summary["duplicates_removed"] = (
|
|
||||||
# multipolygon.featureCount() - dedup.featureCount()
|
|
||||||
# )
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 5. REMOVE DUPLICATE VERTICES
|
|
||||||
# # ========================================================
|
|
||||||
# no_dup_vertices = processing.run(
|
|
||||||
# "native:removeduplicatevertices",
|
|
||||||
# {
|
|
||||||
# "INPUT": dedup,
|
|
||||||
# "VERTICES": 0, # remove exact duplicates
|
|
||||||
# "OUTPUT": "memory:"
|
|
||||||
# }
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 6. FIX SRID (REPROJECT IF NEEDED)
|
|
||||||
# # ========================================================
|
|
||||||
# # Force SRID to 4326
|
|
||||||
# reprojected = processing.run(
|
|
||||||
# "native:reprojectlayer",
|
|
||||||
# {
|
|
||||||
# "INPUT": no_dup_vertices,
|
|
||||||
# "TARGET_CRS": "EPSG:4326",
|
|
||||||
# "OUTPUT": "memory:"
|
|
||||||
# }
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 7. REMOVE SLIVER POLYGONS (< 1 m²)
|
|
||||||
# # ========================================================
|
|
||||||
# # Filter polygons with area < 1 (threshold bisa kamu ubah)
|
|
||||||
# slivers = processing.run(
|
|
||||||
# "native:extractbyexpression",
|
|
||||||
# {
|
|
||||||
# "INPUT": reprojected,
|
|
||||||
# "EXPRESSION": "$area < 1",
|
|
||||||
# "OUTPUT": "memory:"
|
|
||||||
# }
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# summary["sliver_removed"] = slivers.featureCount()
|
|
||||||
|
|
||||||
# # Keep only polygons with area >= 1
|
|
||||||
# no_sliver = processing.run(
|
|
||||||
# "native:extractbyexpression",
|
|
||||||
# {
|
|
||||||
# "INPUT": reprojected,
|
|
||||||
# "EXPRESSION": "$area >= 1",
|
|
||||||
# "OUTPUT": "memory:"
|
|
||||||
# }
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 8. REMOVE TINY HOLES (< 1 m²)
|
|
||||||
# # ========================================================
|
|
||||||
# no_holes = processing.run(
|
|
||||||
# "native:deleteholes",
|
|
||||||
# {
|
|
||||||
# "INPUT": no_sliver,
|
|
||||||
# "MIN_AREA": 1, # minimum area of hole to keep
|
|
||||||
# "OUTPUT": "memory:"
|
|
||||||
# }
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# summary["holes_removed"] = 0 # can't count holes easily in PyQGIS
|
|
||||||
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 9. TRIM STRING FIELDS (ATTRIBUTE CLEANSING)
|
|
||||||
# # ========================================================
|
|
||||||
# trimmed = processing.run(
|
|
||||||
# "qgis:refactorfields",
|
|
||||||
# {
|
|
||||||
# "INPUT": no_holes,
|
|
||||||
# "FIELDS_MAPPING": [
|
|
||||||
# {
|
|
||||||
# "expression": f"trim(\"{field.name()}\")"
|
|
||||||
# if field.typeName().lower() in ["text", "varchar"]
|
|
||||||
# else f"\"{field.name()}\"",
|
|
||||||
# "name": field.name(),
|
|
||||||
# "type": field.type(),
|
|
||||||
# "length": field.length(),
|
|
||||||
# "precision": field.precision()
|
|
||||||
# }
|
|
||||||
# for field in no_holes.fields()
|
|
||||||
# ],
|
|
||||||
# "OUTPUT": "memory:"
|
|
||||||
# }
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # RETURN CLEANED LAYER
|
|
||||||
# # ========================================================
|
|
||||||
# return {
|
|
||||||
# "summary": summary,
|
|
||||||
# "clean_layer": trimmed
|
|
||||||
# }
|
|
||||||
|
|
||||||
|
|
||||||
# def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # INITIAL STATE
|
|
||||||
# # ========================================================
|
|
||||||
# print("\n========== START CLEANSING ==========")
|
|
||||||
# print("Step 0: Load Layer")
|
|
||||||
# print(" - Valid:", layer.isValid())
|
|
||||||
# print(" - Feature Count:", layer.featureCount())
|
|
||||||
|
|
||||||
# summary = {
|
|
||||||
# "step0_features": layer.featureCount(),
|
|
||||||
# "step1_invalid_before": 0,
|
|
||||||
# "step2_after_fix": 0,
|
|
||||||
# "step3_after_multipolygon": 0,
|
|
||||||
# "step4_duplicates_removed": 0,
|
|
||||||
# "step5_after_remove_vertices": 0,
|
|
||||||
# "step6_after_srid": 0,
|
|
||||||
# "step7_sliver_removed": 0,
|
|
||||||
# "step8_after_deleteholes": 0
|
|
||||||
# }
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 1. VALIDATE GEOMETRY
|
|
||||||
# # ========================================================
|
|
||||||
# print("\nStep 1: Identify invalid geometries")
|
|
||||||
|
|
||||||
# invalid_ids = []
|
|
||||||
# for f in layer.getFeatures():
|
|
||||||
# if not f.geometry().isGeosValid():
|
|
||||||
# invalid_ids.append(f.id())
|
|
||||||
|
|
||||||
# summary["step1_invalid_before"] = len(invalid_ids)
|
|
||||||
|
|
||||||
# print(" - Invalid geometries found:", len(invalid_ids))
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 2. FIX GEOMETRIES
|
|
||||||
# # ========================================================
|
|
||||||
# print("\nStep 2: Fix geometries")
|
|
||||||
# fixed = processing.run(
|
|
||||||
# "native:fixgeometries",
|
|
||||||
# {"INPUT": layer, "OUTPUT": "memory:"}
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# print(" - Valid:", fixed.isValid())
|
|
||||||
# print(" - Features after fix:", fixed.featureCount())
|
|
||||||
# summary["step2_after_fix"] = fixed.featureCount()
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 3. ENSURE MULTIPOLYGON
|
|
||||||
# # ========================================================
|
|
||||||
# print("\nStep 3: Ensure MULTIPOLYGON")
|
|
||||||
# multipolygon = processing.run(
|
|
||||||
# "native:collect",
|
|
||||||
# {"INPUT": fixed, "OUTPUT": "memory:"}
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# print(" - Valid:", multipolygon.isValid())
|
|
||||||
# print(" - Features:", multipolygon.featureCount())
|
|
||||||
# summary["step3_after_multipolygon"] = multipolygon.featureCount()
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 4. REMOVE DUPLICATE ROWS
|
|
||||||
# # ========================================================
|
|
||||||
# print("\nStep 4: Remove duplicate rows")
|
|
||||||
|
|
||||||
# all_fields = [f.name() for f in multipolygon.fields()]
|
|
||||||
# print(" - All fields:", all_fields)
|
|
||||||
|
|
||||||
# key_fields = None
|
|
||||||
|
|
||||||
# if "id" in all_fields:
|
|
||||||
# key_fields = ["id"]
|
|
||||||
# else:
|
|
||||||
# int_cols = [
|
|
||||||
# f.name() for f in multipolygon.fields()
|
|
||||||
# if f.typeName().lower() in ["int", "integer", "bigint"]
|
|
||||||
# ]
|
|
||||||
# if int_cols:
|
|
||||||
# key_fields = [int_cols[0]]
|
|
||||||
# else:
|
|
||||||
# key_fields = all_fields
|
|
||||||
|
|
||||||
# print(" - Using duplicate key:", key_fields)
|
|
||||||
|
|
||||||
# dedup = processing.run(
|
|
||||||
# "native:removeduplicatesbyattribute",
|
|
||||||
# {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"}
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
|
|
||||||
# summary["step4_duplicates_removed"] = duplicates_removed
|
|
||||||
|
|
||||||
# print(" - Features before:", multipolygon.featureCount())
|
|
||||||
# print(" - Features after:", dedup.featureCount())
|
|
||||||
# print(" - Duplicates removed:", duplicates_removed)
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 5. REMOVE DUPLICATE VERTICES
|
|
||||||
# # ========================================================
|
|
||||||
# print("\nStep 5: Remove duplicate vertices")
|
|
||||||
|
|
||||||
# no_dup_vertices = processing.run(
|
|
||||||
# "native:removeduplicatevertices",
|
|
||||||
# {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"}
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# print(" - Features:", no_dup_vertices.featureCount())
|
|
||||||
# summary["step5_after_remove_vertices"] = no_dup_vertices.featureCount()
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 6. FIX SRID / REPROJECT
|
|
||||||
# # ========================================================
|
|
||||||
# print("\nStep 6: Reproject (Fix SRID to EPSG:4326)")
|
|
||||||
|
|
||||||
# reprojected = processing.run(
|
|
||||||
# "native:reprojectlayer",
|
|
||||||
# {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"}
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# print(" - Features:", reprojected.featureCount())
|
|
||||||
# summary["step6_after_srid"] = reprojected.featureCount()
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 7. REMOVE SLIVER POLYGONS (< 1 m2)
|
|
||||||
# # ========================================================
|
|
||||||
# print("\nStep 7: Remove sliver polygons (<1 m²)")
|
|
||||||
|
|
||||||
# slivers = processing.run(
|
|
||||||
# "native:extractbyexpression",
|
|
||||||
# {"INPUT": reprojected, "EXPRESSION": "$area < 1", "OUTPUT": "memory:"}
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# summary["step7_sliver_removed"] = slivers.featureCount()
|
|
||||||
# print(" - Slivers found:", slivers.featureCount())
|
|
||||||
|
|
||||||
# no_sliver = processing.run(
|
|
||||||
# "native:extractbyexpression",
|
|
||||||
# {"INPUT": reprojected, "EXPRESSION": "$area >= 1", "OUTPUT": "memory:"}
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# print(" - Features left after removing slivers:", no_sliver.featureCount())
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # 8. REMOVE TINY HOLES (< 1 m2)
|
|
||||||
# # ========================================================
|
|
||||||
# print("\nStep 8: Remove tiny holes")
|
|
||||||
|
|
||||||
# no_holes = processing.run(
|
|
||||||
# "native:deleteholes",
|
|
||||||
# {"INPUT": no_sliver, "MIN_AREA": 1, "OUTPUT": "memory:"}
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# print(" - Features:", no_holes.featureCount())
|
|
||||||
# summary["step8_after_deleteholes"] = no_holes.featureCount()
|
|
||||||
|
|
||||||
# # ========================================================
|
|
||||||
# # FINISH (TRIM ATTRIBUTES)
|
|
||||||
# # ========================================================
|
|
||||||
# print("\nFinal Step: Trim string fields")
|
|
||||||
|
|
||||||
# trimmed = processing.run(
|
|
||||||
# "qgis:refactorfields",
|
|
||||||
# {
|
|
||||||
# "INPUT": no_holes,
|
|
||||||
# "FIELDS_MAPPING": [
|
|
||||||
# {
|
|
||||||
# "expression": f"trim(\"{field.name()}\")"
|
|
||||||
# if field.typeName().lower() in ["text", "varchar"]
|
|
||||||
# else f"\"{field.name()}\"",
|
|
||||||
# "name": field.name(),
|
|
||||||
# "type": field.type(),
|
|
||||||
# "length": field.length(),
|
|
||||||
# "precision": field.precision()
|
|
||||||
# }
|
|
||||||
# for field in no_holes.fields()
|
|
||||||
# ],
|
|
||||||
# "OUTPUT": "memory:"
|
|
||||||
# }
|
|
||||||
# )["OUTPUT"]
|
|
||||||
|
|
||||||
# print(" - Final feature count:", trimmed.featureCount())
|
|
||||||
# print("========== CLEANSING DONE ==========\n")
|
|
||||||
|
|
||||||
# return {
|
|
||||||
# "summary": summary,
|
|
||||||
# "clean_layer": trimmed
|
|
||||||
# }
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# self-intersection
|
# self-intersection
|
||||||
def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
||||||
|
|
||||||
# ========================================================
|
|
||||||
# INITIAL STATE
|
|
||||||
# ========================================================
|
|
||||||
print("\n========== START CLEANSING ==========")
|
print("\n========== START CLEANSING ==========")
|
||||||
print("Step 0: Load Layer")
|
print("Step 0: Load Layer")
|
||||||
print(" - Valid:", layer.isValid())
|
print(" - Valid:", layer.isValid())
|
||||||
print(" - Feature Count:", layer.featureCount())
|
print(" - Feature Count:", layer.featureCount())
|
||||||
|
print(" - type:", layer.geometryType())
|
||||||
|
|
||||||
summary = {
|
summary = {
|
||||||
"step0_features": layer.featureCount(),
|
"features": layer.featureCount(),
|
||||||
"step1_invalid_before": 0,
|
"invalid_before": 0,
|
||||||
"step1_5_self_intersections": 0,
|
"after_fixgeometries": 0,
|
||||||
"step2_after_fix": 0,
|
"after_fix": 0,
|
||||||
"step3_after_multipolygon": 0,
|
"after_multipolygon": 0,
|
||||||
"step4_duplicates_removed": 0,
|
"duplicates_removed": 0,
|
||||||
"step5_after_remove_vertices": 0,
|
"after_remove_vertices": 0,
|
||||||
"step6_after_srid": 0,
|
"after_srid": 0,
|
||||||
"step7_sliver_removed": 0,
|
"sliver_removed": 0,
|
||||||
"step8_after_deleteholes": 0
|
"after_deleteholes": 0,
|
||||||
|
"valid_after": 0
|
||||||
}
|
}
|
||||||
|
|
||||||
# ========================================================
|
# 1. Geometry validity check
|
||||||
# 1. VALIDATE GEOMETRY
|
print("\nStep 1: Geometry validity check (QGIS native)")
|
||||||
# ========================================================
|
validity = processing.run(
|
||||||
print("\nStep 1: Identify invalid geometries")
|
"qgis:checkvalidity",
|
||||||
|
{
|
||||||
|
"INPUT_LAYER": layer,
|
||||||
|
"METHOD": 2, # GEOS
|
||||||
|
"IGNORE_RING_SELF_INTERSECTION": False,
|
||||||
|
"VALID_OUTPUT": "memory:",
|
||||||
|
"INVALID_OUTPUT": "memory:",
|
||||||
|
"ERROR_OUTPUT": "memory:"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
invalid_layer = validity["INVALID_OUTPUT"]
|
||||||
|
error_table = validity["ERROR_OUTPUT"]
|
||||||
|
invalid_count = invalid_layer.featureCount()
|
||||||
|
summary["invalid_before"] = invalid_count
|
||||||
|
print(" - Invalid geometries found:", invalid_count)
|
||||||
|
print(" - Total error messages:", error_table.featureCount())
|
||||||
|
|
||||||
invalid_ids = []
|
# 1.1 Fix invalid geometries
|
||||||
for f in layer.getFeatures():
|
# print("\nStep 1.1: Fix invalid geometries (FixGeometries)")
|
||||||
if not f.geometry().isGeosValid():
|
# fixed_pre = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
invalid_ids.append(f.id())
|
# summary["after_fixgeometries"] = fixed_pre.featureCount()
|
||||||
|
# print(" - Features after FixGeometries:", fixed_pre.featureCount())
|
||||||
|
# layer = fixed_pre
|
||||||
|
|
||||||
summary["step1_invalid_before"] = len(invalid_ids)
|
# 2. Fix geometries (again)
|
||||||
print(" - Invalid geometries found:", len(invalid_ids))
|
|
||||||
|
|
||||||
# ========================================================
|
|
||||||
# 1.5 DETECT GEOMETRY ERRORS (MANUAL GEOS VALIDATION)
|
|
||||||
# ========================================================
|
|
||||||
print("\nStep 1.5: Detect geometry errors (universal GEOS-safe method)")
|
|
||||||
|
|
||||||
errors = []
|
|
||||||
|
|
||||||
for f in layer.getFeatures():
|
|
||||||
geom = f.geometry()
|
|
||||||
if not geom.isGeosValid():
|
|
||||||
# Kita hanya tandai invalid (tanpa reason)
|
|
||||||
errors.append(f.id())
|
|
||||||
|
|
||||||
summary["step1_5_geometry_errors"] = len(errors)
|
|
||||||
|
|
||||||
print(" - Geometry errors detected:", len(errors))
|
|
||||||
print(" - Invalid feature IDs (first 10):", errors[:10])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ========================================================
|
|
||||||
# 1.6 FIX INVALID GEOMETRIES (Native FixGeometries)
|
|
||||||
# ========================================================
|
|
||||||
print("\nStep 1.6: Fix invalid geometries (FixGeometries)")
|
|
||||||
|
|
||||||
fixed_pre = processing.run(
|
|
||||||
"native:fixgeometries",
|
|
||||||
{"INPUT": layer, "OUTPUT": "memory:"}
|
|
||||||
)["OUTPUT"]
|
|
||||||
|
|
||||||
summary["step1_6_after_fixgeometries"] = fixed_pre.featureCount()
|
|
||||||
|
|
||||||
print(" - Features after FixGeometries:", fixed_pre.featureCount())
|
|
||||||
|
|
||||||
layer = fixed_pre
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ========================================================
|
|
||||||
# 2. FIX GEOMETRIES (INCLUDES SELF-INTERSECTION FIX)
|
|
||||||
# ========================================================
|
|
||||||
print("\nStep 2: Fix geometries (including self-intersections)")
|
print("\nStep 2: Fix geometries (including self-intersections)")
|
||||||
|
fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
fixed = processing.run(
|
|
||||||
"native:fixgeometries",
|
|
||||||
{"INPUT": layer, "OUTPUT": "memory:"}
|
|
||||||
)["OUTPUT"]
|
|
||||||
|
|
||||||
print(" - Valid after fix:", fixed.isValid())
|
print(" - Valid after fix:", fixed.isValid())
|
||||||
print(" - Features after fix:", fixed.featureCount())
|
print(" - Features after fix:", fixed.featureCount())
|
||||||
summary["step2_after_fix"] = fixed.featureCount()
|
summary["after_fix"] = fixed.featureCount()
|
||||||
|
|
||||||
# ========================================================
|
# ========================================================
|
||||||
# 3. ENSURE MULTIPOLYGON
|
# 3. ENSURE MULTIPOLYGON (LTR compatible!!)
|
||||||
# ========================================================
|
# ========================================================
|
||||||
print("\nStep 3: Ensure MULTIPOLYGON")
|
print("\nStep 3: Ensure MULTIPOLYGON (LTR-safe method)")
|
||||||
|
|
||||||
multipolygon = processing.run(
|
# Step 3.1: Pecah multiparts → single (agar bersih)
|
||||||
"native:collect",
|
singleparts = processing.run(
|
||||||
|
"native:multiparttosingleparts",
|
||||||
{"INPUT": fixed, "OUTPUT": "memory:"}
|
{"INPUT": fixed, "OUTPUT": "memory:"}
|
||||||
)["OUTPUT"]
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
print(" - After multiparttosingleparts:", singleparts.featureCount())
|
||||||
|
|
||||||
|
# Step 3.2: Promote semua polygon → multipolygon
|
||||||
|
multipolygon = processing.run(
|
||||||
|
"native:promotetomulti",
|
||||||
|
{"INPUT": singleparts, "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
print(" - After promotetomulti:", multipolygon.featureCount())
|
||||||
print(" - Valid:", multipolygon.isValid())
|
print(" - Valid:", multipolygon.isValid())
|
||||||
print(" - Features:", multipolygon.featureCount())
|
|
||||||
summary["step3_after_multipolygon"] = multipolygon.featureCount()
|
|
||||||
|
|
||||||
# ========================================================
|
summary["after_multipolygon"] = multipolygon.featureCount()
|
||||||
# 4. REMOVE DUPLICATE ROWS
|
|
||||||
# ========================================================
|
|
||||||
|
# 4. Remove duplicate rows
|
||||||
print("\nStep 4: Remove duplicate rows")
|
print("\nStep 4: Remove duplicate rows")
|
||||||
|
|
||||||
all_fields = [f.name() for f in multipolygon.fields()]
|
all_fields = [f.name() for f in multipolygon.fields()]
|
||||||
print(" - All fields:", all_fields)
|
print(" - All fields:", all_fields)
|
||||||
|
|
||||||
if "id" in all_fields:
|
if "id" in all_fields:
|
||||||
key_fields = ["id"]
|
key_fields = ["id"]
|
||||||
else:
|
else:
|
||||||
int_cols = [
|
int_cols = [f.name() for f in multipolygon.fields() if f.typeName().lower() in ["int", "integer", "bigint"]]
|
||||||
f.name() for f in multipolygon.fields()
|
|
||||||
if f.typeName().lower() in ["int", "integer", "bigint"]
|
|
||||||
]
|
|
||||||
key_fields = [int_cols[0]] if int_cols else all_fields
|
key_fields = [int_cols[0]] if int_cols else all_fields
|
||||||
|
|
||||||
print(" - Using duplicate key:", key_fields)
|
print(" - Using duplicate key:", key_fields)
|
||||||
|
dedup = processing.run("native:removeduplicatesbyattribute", {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
dedup = processing.run(
|
|
||||||
"native:removeduplicatesbyattribute",
|
|
||||||
{"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"}
|
|
||||||
)["OUTPUT"]
|
|
||||||
|
|
||||||
duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
|
duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
|
||||||
summary["step4_duplicates_removed"] = duplicates_removed
|
summary["duplicates_removed"] = duplicates_removed
|
||||||
|
|
||||||
print(" - Features before:", multipolygon.featureCount())
|
print(" - Features before:", multipolygon.featureCount())
|
||||||
print(" - Features after:", dedup.featureCount())
|
print(" - Features after:", dedup.featureCount())
|
||||||
print(" - Duplicates removed:", duplicates_removed)
|
print(" - Duplicates removed:", duplicates_removed)
|
||||||
|
|
||||||
# ========================================================
|
# 5. Remove duplicate vertices
|
||||||
# 5. REMOVE DUPLICATE VERTICES
|
|
||||||
# ========================================================
|
|
||||||
print("\nStep 5: Remove duplicate vertices")
|
print("\nStep 5: Remove duplicate vertices")
|
||||||
|
no_dup_vertices = processing.run("native:removeduplicatevertices", {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
no_dup_vertices = processing.run(
|
|
||||||
"native:removeduplicatevertices",
|
|
||||||
{"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"}
|
|
||||||
)["OUTPUT"]
|
|
||||||
|
|
||||||
print(" - Features:", no_dup_vertices.featureCount())
|
print(" - Features:", no_dup_vertices.featureCount())
|
||||||
summary["step5_after_remove_vertices"] = no_dup_vertices.featureCount()
|
summary["after_remove_vertices"] = no_dup_vertices.featureCount()
|
||||||
|
|
||||||
# ========================================================
|
print("\nStep 5.5: Check input CRS before reprojection")
|
||||||
# 6. FIX SRID / REPROJECT
|
input_crs = no_dup_vertices.crs()
|
||||||
# ========================================================
|
if input_crs.isValid():
|
||||||
print("\nStep 6: Reproject (Fix SRID to EPSG:4326)")
|
print(" - Input CRS:", input_crs.authid())
|
||||||
|
print(" - CRS description:", input_crs.description())
|
||||||
|
else:
|
||||||
|
print(" - CRS INVALID or UNDEFINED")
|
||||||
|
|
||||||
reprojected = processing.run(
|
# 6. REPROJECT to metric CRS BEFORE any area-based ops (use EPSG:4326 or local UTM)
|
||||||
"native:reprojectlayer",
|
print("\nStep 6: Reproject layer to EPSG:4326 for metric area calculations")
|
||||||
{"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"}
|
# choose EPSG:4326 or better choose local UTM if you know it; EPSG:4326 is general metric
|
||||||
)["OUTPUT"]
|
final_proj = processing.run("native:reprojectlayer", {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
|
print(" - Features after reproject:", final_proj.featureCount())
|
||||||
|
summary["after_srid"] = final_proj.featureCount()
|
||||||
|
|
||||||
print(" - Features:", reprojected.featureCount())
|
|
||||||
summary["step6_after_srid"] = reprojected.featureCount()
|
|
||||||
|
|
||||||
# ========================================================
|
# 7. Remove sliver polygons based on metric area (< 1 m^2)
|
||||||
# 7. REMOVE SLIVER POLYGONS (< 1 m2)
|
# print("\nStep 7: Remove sliver polygons (<1 m²)")
|
||||||
# ========================================================
|
# # use $area now because layer is in meters (EPSG:3857)
|
||||||
print("\nStep 7: Remove sliver polygons (<1 m²)")
|
# slivers = processing.run("native:extractbyexpression", {"INPUT": reprojected, "EXPRESSION": "$area < 1", "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
|
# summary["sliver_removed"] = slivers.featureCount()
|
||||||
|
# print(" - Slivers found:", slivers.featureCount())
|
||||||
|
# no_sliver = processing.run(
|
||||||
|
# "native:extractbyexpression",
|
||||||
|
# {
|
||||||
|
# "INPUT": reprojected,
|
||||||
|
# "EXPRESSION": "geometry IS NOT NULL AND $area >= 1",
|
||||||
|
# "OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )["OUTPUT"]
|
||||||
|
# print(" - Features left after removing slivers:", no_sliver.featureCount())
|
||||||
|
|
||||||
slivers = processing.run(
|
# # 8. Remove tiny holes (<1 m^2) — still in metric CRS
|
||||||
"native:extractbyexpression",
|
# print("\nStep 8: Remove tiny holes (<1 m²)")
|
||||||
{"INPUT": reprojected, "EXPRESSION": "$area < 1", "OUTPUT": "memory:"}
|
# no_holes = processing.run("native:deleteholes", {"INPUT": no_sliver, "MIN_AREA": 1, "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
)["OUTPUT"]
|
# print(" - Features after delete holes:", no_holes.featureCount())
|
||||||
|
# summary["after_deleteholes"] = no_holes.featureCount()
|
||||||
|
|
||||||
summary["step7_sliver_removed"] = slivers.featureCount()
|
# # Reproject BACK to EPSG:4326 for downstream (GeoServer/PostGIS target)
|
||||||
print(" - Slivers found:", slivers.featureCount())
|
# print("\nStep 9: Reproject back to EPSG:4326")
|
||||||
|
# final_proj = processing.run("native:reprojectlayer", {"INPUT": no_holes, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
|
# print(" - Features:", final_proj.featureCount())
|
||||||
|
|
||||||
no_sliver = processing.run(
|
# Final: Trim string fields
|
||||||
"native:extractbyexpression",
|
|
||||||
{"INPUT": reprojected, "EXPRESSION": "$area >= 1", "OUTPUT": "memory:"}
|
|
||||||
)["OUTPUT"]
|
|
||||||
|
|
||||||
print(" - Features left after removing slivers:", no_sliver.featureCount())
|
|
||||||
|
|
||||||
# ========================================================
|
|
||||||
# 8. REMOVE TINY HOLES (< 1 m2)
|
|
||||||
# ========================================================
|
|
||||||
print("\nStep 8: Remove tiny holes")
|
|
||||||
|
|
||||||
no_holes = processing.run(
|
|
||||||
"native:deleteholes",
|
|
||||||
{"INPUT": no_sliver, "MIN_AREA": 1, "OUTPUT": "memory:"}
|
|
||||||
)["OUTPUT"]
|
|
||||||
|
|
||||||
print(" - Features:", no_holes.featureCount())
|
|
||||||
summary["step8_after_deleteholes"] = no_holes.featureCount()
|
|
||||||
|
|
||||||
# ========================================================
|
|
||||||
# FINAL: TRIM STRING FIELDS
|
|
||||||
# ========================================================
|
|
||||||
print("\nFinal Step: Trim string fields")
|
print("\nFinal Step: Trim string fields")
|
||||||
|
|
||||||
trimmed = processing.run(
|
trimmed = processing.run(
|
||||||
"qgis:refactorfields",
|
"qgis:refactorfields",
|
||||||
{
|
{
|
||||||
"INPUT": no_holes,
|
"INPUT": final_proj,
|
||||||
"FIELDS_MAPPING": [
|
"FIELDS_MAPPING": [
|
||||||
{
|
{
|
||||||
"expression": f"trim(\"{field.name()}\")"
|
"expression": f"trim(\"{field.name()}\")"
|
||||||
|
|
@ -603,19 +186,98 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
||||||
"length": field.length(),
|
"length": field.length(),
|
||||||
"precision": field.precision()
|
"precision": field.precision()
|
||||||
}
|
}
|
||||||
for field in no_holes.fields()
|
for field in final_proj.fields()
|
||||||
],
|
],
|
||||||
|
"KEEP_GEOMETRY": True, # <--- WAJIB
|
||||||
"OUTPUT": "memory:"
|
"OUTPUT": "memory:"
|
||||||
}
|
}
|
||||||
)["OUTPUT"]
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
|
||||||
|
valid_after = 0
|
||||||
|
for f in trimmed.getFeatures():
|
||||||
|
if f.geometry() is not None and f.geometry().isGeosValid():
|
||||||
|
valid_after += 1
|
||||||
|
summary["valid_after"] = valid_after
|
||||||
|
|
||||||
print(" - Final feature count:", trimmed.featureCount())
|
print(" - Final feature count:", trimmed.featureCount())
|
||||||
print("========== CLEANSING DONE ==========\n")
|
print("========== CLEANSING DONE ==========\n")
|
||||||
|
|
||||||
return {
|
return {"summary": summary, "clean_layer": trimmed}
|
||||||
"summary": summary,
|
|
||||||
"clean_layer": trimmed
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def cleansing_points(layer: QgsVectorLayer):
|
||||||
|
print("\n=== POINT CLEANING PIPELINE ===")
|
||||||
|
|
||||||
|
summary = {
|
||||||
|
"features_before": layer.featureCount(),
|
||||||
|
"invalid_before": 0,
|
||||||
|
"after_fix": 0,
|
||||||
|
"after_dedup": 0,
|
||||||
|
"after_reproject": 0,
|
||||||
|
"valid_after": 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 1. Check validity (will always return 0 errors for points)
|
||||||
|
validity = processing.run(
|
||||||
|
"qgis:checkvalidity",
|
||||||
|
{"INPUT_LAYER": layer, "METHOD": 2, "VALID_OUTPUT": "memory:", "INVALID_OUTPUT": "memory:", "ERROR_OUTPUT": "memory:"}
|
||||||
|
)
|
||||||
|
invalid = validity["INVALID_OUTPUT"].featureCount()
|
||||||
|
summary["invalid_before"] = invalid
|
||||||
|
print("- Invalid points:", invalid)
|
||||||
|
|
||||||
|
# 2. Fix geometries (safe)
|
||||||
|
fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
|
summary["after_fix"] = fixed.featureCount()
|
||||||
|
|
||||||
|
# 3. Remove duplicate coordinates (points only)
|
||||||
|
dedup = processing.run(
|
||||||
|
"native:removedduplicategeometries",
|
||||||
|
{"INPUT": fixed, "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
summary["after_dedup"] = dedup.featureCount()
|
||||||
|
|
||||||
|
# 4. Reproject
|
||||||
|
reproject = processing.run(
|
||||||
|
"native:reprojectlayer",
|
||||||
|
{"INPUT": dedup, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
summary["after_reproject"] = reproject.featureCount()
|
||||||
|
|
||||||
|
# 5. Trim string fields
|
||||||
|
trimmed = processing.run(
|
||||||
|
"qgis:refactorfields",
|
||||||
|
{
|
||||||
|
"INPUT": reproject,
|
||||||
|
"FIELDS_MAPPING": [
|
||||||
|
{
|
||||||
|
"expression": f"trim(\"{field.name()}\")" if field.typeName().lower() in ["text","varchar"]
|
||||||
|
else f"\"{field.name()}\"",
|
||||||
|
"name": field.name(),
|
||||||
|
"type": field.type(),
|
||||||
|
"length": field.length(),
|
||||||
|
"precision": field.precision(),
|
||||||
|
}
|
||||||
|
for field in reproject.fields()
|
||||||
|
],
|
||||||
|
"KEEP_GEOMETRY": True,
|
||||||
|
"OUTPUT": "memory:"
|
||||||
|
}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
# 6. Validity check for points (simple)
|
||||||
|
valid_after = 0
|
||||||
|
for f in trimmed.getFeatures():
|
||||||
|
if f.geometry() is not None:
|
||||||
|
valid_after += 1
|
||||||
|
|
||||||
|
summary["valid_after"] = valid_after
|
||||||
|
|
||||||
|
return {"summary": summary, "clean_layer": trimmed}
|
||||||
|
|
|
||||||
160
main.py
160
main.py
|
|
@ -1,8 +1,20 @@
|
||||||
from fastapi import FastAPI, BackgroundTasks
|
from fastapi import FastAPI, BackgroundTasks
|
||||||
from qgis_bootstrap import start_qgis
|
import psycopg2
|
||||||
|
import requests
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
from qgis_bootstrap import start_qgis
|
||||||
# from cleansing_service import load_layer, cleansing_layer
|
# from cleansing_service import load_layer, cleansing_layer
|
||||||
from full_cleansing_service import load_layer, cleansing_layer
|
from full_cleansing_service import load_layer, cleansing_layer
|
||||||
|
from qgis.core import (
|
||||||
|
QgsVectorLayer,
|
||||||
|
QgsVectorLayerExporter,
|
||||||
|
QgsDataSourceUri,
|
||||||
|
QgsProviderRegistry,
|
||||||
|
QgsCoordinateReferenceSystem
|
||||||
|
)
|
||||||
|
from qgis.PyQt.QtCore import QByteArray
|
||||||
|
from core.config import HOST,PORT,DB,USER,PWD,SCHEMA,GEOM_COL
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
@ -67,70 +79,116 @@ def run_clean_table(table_name: str, job_id: str):
|
||||||
"status": "FINISHED"
|
"status": "FINISHED"
|
||||||
}
|
}
|
||||||
|
|
||||||
import requests
|
|
||||||
requests.post(
|
requests.post(
|
||||||
"http://backend-utama:8000/jobs/callback",
|
"http://localhost:8000/jobs/callback",
|
||||||
json=callback_payload
|
json=callback_payload
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"=== Cleansing selesai untuk tabel: {table_name} ===\n")
|
print(f"=== Cleansing selesai untuk tabel: {table_name} ===\n")
|
||||||
|
|
||||||
|
def to_python(v):
|
||||||
|
# Null
|
||||||
|
if v is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# QVariant kosong
|
||||||
|
if hasattr(v, "isNull") and v.isNull():
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Convert QVariant to Python native
|
||||||
|
if hasattr(v, "toPyObject"):
|
||||||
|
return v.toPyObject()
|
||||||
|
|
||||||
|
# Fallback
|
||||||
|
return v
|
||||||
|
|
||||||
|
def save_to_postgis(layer, table_name):
|
||||||
|
host = HOST
|
||||||
|
port = PORT
|
||||||
|
db = DB
|
||||||
|
user = USER
|
||||||
|
pwd = PWD
|
||||||
|
schema = SCHEMA
|
||||||
|
geom_col = GEOM_COL
|
||||||
|
|
||||||
|
srid = layer.crs().postgisSrid()
|
||||||
|
fields = layer.fields()
|
||||||
|
|
||||||
|
# CONNECT
|
||||||
|
conn = psycopg2.connect(
|
||||||
from qgis.core import (
|
dbname=db,
|
||||||
QgsVectorLayer,
|
host=host,
|
||||||
QgsVectorLayerExporter,
|
port=port,
|
||||||
QgsDataSourceUri
|
user=user,
|
||||||
|
password=pwd
|
||||||
)
|
)
|
||||||
from database import POSTGIS
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
# DROP TABLE
|
||||||
|
cur.execute(f'DROP TABLE IF EXISTS "{schema}"."{table_name}" CASCADE')
|
||||||
|
|
||||||
def save_to_postgis(clean_layer: QgsVectorLayer, table_name: str):
|
# CREATE TABLE
|
||||||
"""
|
field_defs = []
|
||||||
Menghapus isi tabel dan menulis ulang hasil cleansing ke PostGIS.
|
for f in fields:
|
||||||
Geometry harus MULTIPOLYGON dan SRID sudah benar.
|
if f.name() == geom_col:
|
||||||
"""
|
continue
|
||||||
|
|
||||||
print(f"[DB] Menyimpan hasil cleansing ke tabel {table_name}")
|
# type mapping
|
||||||
|
t = f.typeName().lower()
|
||||||
# -------------------------------------------
|
if "int" in t:
|
||||||
# 1. Build URI PostGIS target
|
pg_type = "INTEGER"
|
||||||
# -------------------------------------------
|
elif "double" in t or "float" in t or "real" in t:
|
||||||
uri = QgsDataSourceUri()
|
pg_type = "DOUBLE PRECISION"
|
||||||
uri.setConnection(
|
|
||||||
POSTGIS['host'],
|
|
||||||
str(POSTGIS['port']),
|
|
||||||
POSTGIS['db'],
|
|
||||||
POSTGIS['user'],
|
|
||||||
POSTGIS['password']
|
|
||||||
)
|
|
||||||
|
|
||||||
# Nama schema & tabel
|
|
||||||
schema = "public"
|
|
||||||
uri.setDataSource(schema, table_name, "geom") # geometry column = geom
|
|
||||||
|
|
||||||
# -------------------------------------------
|
|
||||||
# 2. Export layer ke PostGIS (replace mode)
|
|
||||||
# -------------------------------------------
|
|
||||||
|
|
||||||
options = QgsVectorLayerExporter.ExportOptions()
|
|
||||||
options.actionOnExistingFile = QgsVectorLayerExporter.ActionOnExistingFile.OverwriteLayer
|
|
||||||
|
|
||||||
err_code, err_msg = QgsVectorLayerExporter.exportLayer(
|
|
||||||
clean_layer, # layer input
|
|
||||||
uri.uri(), # postgis connection uri
|
|
||||||
"postgres", # provider
|
|
||||||
clean_layer.crs(), # CRS layer
|
|
||||||
options
|
|
||||||
)
|
|
||||||
|
|
||||||
if err_code != QgsVectorLayerExporter.NoError:
|
|
||||||
print("[DB][ERROR] Gagal menyimpan:", err_msg)
|
|
||||||
else:
|
else:
|
||||||
print("[DB] Berhasil update tabel", table_name)
|
pg_type = "TEXT"
|
||||||
|
|
||||||
|
col = f.name().replace(" ", "_")
|
||||||
|
field_defs.append(f'"{col}" {pg_type}')
|
||||||
|
|
||||||
|
# geometry column
|
||||||
|
field_defs.append(f'"{geom_col}" geometry(MultiPolygon,{srid})')
|
||||||
|
|
||||||
|
create_sql = f'CREATE TABLE "{schema}"."{table_name}" ({",".join(field_defs)});'
|
||||||
|
cur.execute(create_sql)
|
||||||
|
|
||||||
|
# Prepare INSERT
|
||||||
|
attribute_columns = [
|
||||||
|
f'"{f.name().replace(" ", "_")}"'
|
||||||
|
for f in fields if f.name() != geom_col
|
||||||
|
]
|
||||||
|
insert_columns = attribute_columns + [f'"{geom_col}"']
|
||||||
|
placeholders = ["%s"] * len(insert_columns)
|
||||||
|
|
||||||
|
insert_sql = f"""
|
||||||
|
INSERT INTO "{schema}"."{table_name}"
|
||||||
|
({",".join(insert_columns)})
|
||||||
|
VALUES ({",".join(placeholders)})
|
||||||
|
"""
|
||||||
|
|
||||||
|
# INSERT ROWS
|
||||||
|
count = 0
|
||||||
|
for feat in layer.getFeatures():
|
||||||
|
attrs = feat.attributes()
|
||||||
|
|
||||||
|
row = []
|
||||||
|
for f, v in zip(fields, attrs):
|
||||||
|
if f.name() != geom_col:
|
||||||
|
row.append(to_python(v))
|
||||||
|
|
||||||
|
geom = feat.geometry()
|
||||||
|
wkb_bytes = geom.asWkb()
|
||||||
|
if isinstance(wkb_bytes, QByteArray):
|
||||||
|
wkb_bytes = bytes(wkb_bytes)
|
||||||
|
|
||||||
|
row.append(psycopg2.Binary(wkb_bytes))
|
||||||
|
cur.execute(insert_sql, row)
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
print(f"[DB] Inserted features: {count}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ os.environ["QT_QPA_PLATFORM"] = "offscreen"
|
||||||
sys.path.append(f"{QGIS_PREFIX}/python")
|
sys.path.append(f"{QGIS_PREFIX}/python")
|
||||||
sys.path.append(f"{QGIS_PREFIX}/python/plugins")
|
sys.path.append(f"{QGIS_PREFIX}/python/plugins")
|
||||||
|
|
||||||
from qgis.core import QgsApplication
|
from qgis.core import QgsApplication, QgsProviderRegistry
|
||||||
from qgis.analysis import QgsNativeAlgorithms
|
from qgis.analysis import QgsNativeAlgorithms
|
||||||
|
|
||||||
import processing
|
import processing
|
||||||
|
|
@ -29,52 +29,7 @@ def start_qgis():
|
||||||
|
|
||||||
# === WAJIB: initialize processing ===
|
# === WAJIB: initialize processing ===
|
||||||
Processing.initialize()
|
Processing.initialize()
|
||||||
|
QgsProviderRegistry.instance()
|
||||||
qgs.processingRegistry().addProvider(QgsNativeAlgorithms())
|
qgs.processingRegistry().addProvider(QgsNativeAlgorithms())
|
||||||
|
|
||||||
return qgs
|
return qgs
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# DEPLOYMENT
|
|
||||||
# import os
|
|
||||||
# import sys
|
|
||||||
|
|
||||||
# # QGIS environment
|
|
||||||
# os.environ["QGIS_PREFIX_PATH"] = "/usr"
|
|
||||||
# os.environ["QGIS_HOME"] = "/usr"
|
|
||||||
|
|
||||||
# os.environ["PROJ_LIB"] = "/usr/share/proj"
|
|
||||||
# os.environ["GDAL_DATA"] = "/usr/share/gdal"
|
|
||||||
# os.environ["QT_PLUGIN_PATH"] = "/usr/lib/x86_64-linux-gnu/qt5/plugins"
|
|
||||||
|
|
||||||
# os.environ["QT_QPA_PLATFORM"] = "offscreen"
|
|
||||||
|
|
||||||
# # QGIS Python plugins (THIS IS THE MISSING PART)
|
|
||||||
# sys.path.append("/usr/share/qgis/python")
|
|
||||||
# sys.path.append("/usr/share/qgis/python/plugins")
|
|
||||||
|
|
||||||
# # Python modules (from system)
|
|
||||||
# sys.path.append("/usr/lib/python3/dist-packages")
|
|
||||||
# sys.path.append("/usr/lib/python3/dist-packages/qgis")
|
|
||||||
|
|
||||||
|
|
||||||
# from qgis.core import QgsApplication
|
|
||||||
# from qgis.analysis import QgsNativeAlgorithms
|
|
||||||
# import processing
|
|
||||||
# from processing.core.Processing import Processing
|
|
||||||
|
|
||||||
# def start_qgis():
|
|
||||||
# qgs = QgsApplication([], False)
|
|
||||||
# qgs.initQgis()
|
|
||||||
# Processing.initialize()
|
|
||||||
# qgs.processingRegistry().addProvider(QgsNativeAlgorithms())
|
|
||||||
# return qgs
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user