2025-11-25 08:33:38 +00:00
|
|
|
from qgis.core import (
|
2025-11-29 04:44:08 +00:00
|
|
|
QgsDataSourceUri,
|
2025-11-26 07:18:46 +00:00
|
|
|
QgsFeature,
|
2025-11-25 08:33:38 +00:00
|
|
|
QgsVectorLayer,
|
|
|
|
|
QgsVectorLayerExporter,
|
2025-11-29 04:44:08 +00:00
|
|
|
QgsVectorFileWriter,
|
|
|
|
|
QgsWkbTypes
|
2025-11-25 08:33:38 +00:00
|
|
|
)
|
|
|
|
|
import processing
|
|
|
|
|
from typing import Dict
|
2025-12-01 03:02:48 +00:00
|
|
|
from config import HOST,PORT,DB,USER,PWD,SCHEMA,GEOM_COL
|
2025-11-25 08:33:38 +00:00
|
|
|
|
|
|
|
|
def load_layer(table_name: str):
|
2025-11-29 04:44:08 +00:00
|
|
|
uri = QgsDataSourceUri()
|
|
|
|
|
uri.setConnection(HOST, PORT, DB, USER, PWD)
|
|
|
|
|
uri.setDataSource(SCHEMA, table_name, GEOM_COL, "", "_id")
|
|
|
|
|
|
|
|
|
|
layer = QgsVectorLayer(uri.uri(), table_name, "postgres")
|
|
|
|
|
|
2025-11-25 08:33:38 +00:00
|
|
|
print("Layer valid:", layer.isValid())
|
|
|
|
|
return layer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# self-intersection
|
|
|
|
|
def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
|
|
|
|
print("\n========== START CLEANSING ==========")
|
|
|
|
|
print("Step 0: Load Layer")
|
|
|
|
|
print(" - Valid:", layer.isValid())
|
|
|
|
|
print(" - Feature Count:", layer.featureCount())
|
2025-11-29 04:44:08 +00:00
|
|
|
print(" - type:", layer.geometryType())
|
2025-11-25 08:33:38 +00:00
|
|
|
|
|
|
|
|
summary = {
|
2025-11-29 04:44:08 +00:00
|
|
|
"features": layer.featureCount(),
|
|
|
|
|
"invalid_before": 0,
|
|
|
|
|
"after_fixgeometries": 0,
|
|
|
|
|
"after_fix": 0,
|
|
|
|
|
"after_multipolygon": 0,
|
|
|
|
|
"duplicates_removed": 0,
|
|
|
|
|
"after_remove_vertices": 0,
|
|
|
|
|
"after_srid": 0,
|
|
|
|
|
"sliver_removed": 0,
|
|
|
|
|
"after_deleteholes": 0,
|
|
|
|
|
"valid_after": 0
|
2025-11-25 08:33:38 +00:00
|
|
|
}
|
|
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
# 1. Geometry validity check
|
|
|
|
|
print("\nStep 1: Geometry validity check (QGIS native)")
|
|
|
|
|
validity = processing.run(
|
|
|
|
|
"qgis:checkvalidity",
|
|
|
|
|
{
|
|
|
|
|
"INPUT_LAYER": layer,
|
|
|
|
|
"METHOD": 2, # GEOS
|
|
|
|
|
"IGNORE_RING_SELF_INTERSECTION": False,
|
|
|
|
|
"VALID_OUTPUT": "memory:",
|
|
|
|
|
"INVALID_OUTPUT": "memory:",
|
|
|
|
|
"ERROR_OUTPUT": "memory:"
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
invalid_layer = validity["INVALID_OUTPUT"]
|
|
|
|
|
error_table = validity["ERROR_OUTPUT"]
|
|
|
|
|
invalid_count = invalid_layer.featureCount()
|
|
|
|
|
summary["invalid_before"] = invalid_count
|
|
|
|
|
print(" - Invalid geometries found:", invalid_count)
|
|
|
|
|
print(" - Total error messages:", error_table.featureCount())
|
|
|
|
|
|
|
|
|
|
# 1.1 Fix invalid geometries
|
|
|
|
|
# print("\nStep 1.1: Fix invalid geometries (FixGeometries)")
|
|
|
|
|
# fixed_pre = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
|
|
|
|
# summary["after_fixgeometries"] = fixed_pre.featureCount()
|
|
|
|
|
# print(" - Features after FixGeometries:", fixed_pre.featureCount())
|
|
|
|
|
# layer = fixed_pre
|
|
|
|
|
|
|
|
|
|
# 2. Fix geometries (again)
|
2025-11-25 08:33:38 +00:00
|
|
|
print("\nStep 2: Fix geometries (including self-intersections)")
|
2025-11-29 04:44:08 +00:00
|
|
|
fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
2025-11-25 08:33:38 +00:00
|
|
|
print(" - Valid after fix:", fixed.isValid())
|
|
|
|
|
print(" - Features after fix:", fixed.featureCount())
|
2025-11-29 04:44:08 +00:00
|
|
|
summary["after_fix"] = fixed.featureCount()
|
2025-11-25 08:33:38 +00:00
|
|
|
|
|
|
|
|
# ========================================================
|
2025-11-29 04:44:08 +00:00
|
|
|
# 3. ENSURE MULTIPOLYGON (LTR compatible!!)
|
2025-11-25 08:33:38 +00:00
|
|
|
# ========================================================
|
2025-11-29 04:44:08 +00:00
|
|
|
print("\nStep 3: Ensure MULTIPOLYGON (LTR-safe method)")
|
2025-11-25 08:33:38 +00:00
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
# Step 3.1: Pecah multiparts → single (agar bersih)
|
2025-12-01 03:02:48 +00:00
|
|
|
# singleparts = processing.run(
|
|
|
|
|
# "native:multiparttosingleparts",
|
|
|
|
|
# {"INPUT": fixed, "OUTPUT": "memory:"}
|
|
|
|
|
# )["OUTPUT"]
|
2025-11-25 08:33:38 +00:00
|
|
|
|
2025-12-01 03:02:48 +00:00
|
|
|
# print(" - After multiparttosingleparts:", singleparts.featureCount())
|
2025-11-29 04:44:08 +00:00
|
|
|
|
|
|
|
|
# Step 3.2: Promote semua polygon → multipolygon
|
|
|
|
|
multipolygon = processing.run(
|
|
|
|
|
"native:promotetomulti",
|
2025-12-01 03:02:48 +00:00
|
|
|
{"INPUT": fixed, "OUTPUT": "memory:"}
|
2025-11-29 04:44:08 +00:00
|
|
|
)["OUTPUT"]
|
|
|
|
|
|
|
|
|
|
print(" - After promotetomulti:", multipolygon.featureCount())
|
2025-11-25 08:33:38 +00:00
|
|
|
print(" - Valid:", multipolygon.isValid())
|
|
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
summary["after_multipolygon"] = multipolygon.featureCount()
|
|
|
|
|
|
2025-11-25 08:33:38 +00:00
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
# 4. Remove duplicate rows
|
|
|
|
|
print("\nStep 4: Remove duplicate rows")
|
2025-11-25 08:33:38 +00:00
|
|
|
all_fields = [f.name() for f in multipolygon.fields()]
|
|
|
|
|
print(" - All fields:", all_fields)
|
|
|
|
|
if "id" in all_fields:
|
|
|
|
|
key_fields = ["id"]
|
|
|
|
|
else:
|
2025-11-29 04:44:08 +00:00
|
|
|
int_cols = [f.name() for f in multipolygon.fields() if f.typeName().lower() in ["int", "integer", "bigint"]]
|
2025-11-25 08:33:38 +00:00
|
|
|
key_fields = [int_cols[0]] if int_cols else all_fields
|
|
|
|
|
print(" - Using duplicate key:", key_fields)
|
2025-11-29 04:44:08 +00:00
|
|
|
dedup = processing.run("native:removeduplicatesbyattribute", {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
2025-11-25 08:33:38 +00:00
|
|
|
duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
|
2025-11-29 04:44:08 +00:00
|
|
|
summary["duplicates_removed"] = duplicates_removed
|
2025-11-25 08:33:38 +00:00
|
|
|
print(" - Features before:", multipolygon.featureCount())
|
|
|
|
|
print(" - Features after:", dedup.featureCount())
|
|
|
|
|
print(" - Duplicates removed:", duplicates_removed)
|
|
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
# 5. Remove duplicate vertices
|
2025-11-25 08:33:38 +00:00
|
|
|
print("\nStep 5: Remove duplicate vertices")
|
2025-11-29 04:44:08 +00:00
|
|
|
no_dup_vertices = processing.run("native:removeduplicatevertices", {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
|
|
|
|
print(" - Features:", no_dup_vertices.featureCount())
|
|
|
|
|
summary["after_remove_vertices"] = no_dup_vertices.featureCount()
|
2025-11-25 08:33:38 +00:00
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
print("\nStep 5.5: Check input CRS before reprojection")
|
|
|
|
|
input_crs = no_dup_vertices.crs()
|
|
|
|
|
if input_crs.isValid():
|
|
|
|
|
print(" - Input CRS:", input_crs.authid())
|
|
|
|
|
print(" - CRS description:", input_crs.description())
|
|
|
|
|
else:
|
|
|
|
|
print(" - CRS INVALID or UNDEFINED")
|
|
|
|
|
|
|
|
|
|
# 6. REPROJECT to metric CRS BEFORE any area-based ops (use EPSG:4326 or local UTM)
|
|
|
|
|
print("\nStep 6: Reproject layer to EPSG:4326 for metric area calculations")
|
|
|
|
|
# choose EPSG:4326 or better choose local UTM if you know it; EPSG:4326 is general metric
|
|
|
|
|
final_proj = processing.run("native:reprojectlayer", {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"]
|
|
|
|
|
print(" - Features after reproject:", final_proj.featureCount())
|
|
|
|
|
summary["after_srid"] = final_proj.featureCount()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 7. Remove sliver polygons based on metric area (< 1 m^2)
|
|
|
|
|
# print("\nStep 7: Remove sliver polygons (<1 m²)")
|
|
|
|
|
# # use $area now because layer is in meters (EPSG:3857)
|
|
|
|
|
# slivers = processing.run("native:extractbyexpression", {"INPUT": reprojected, "EXPRESSION": "$area < 1", "OUTPUT": "memory:"})["OUTPUT"]
|
|
|
|
|
# summary["sliver_removed"] = slivers.featureCount()
|
|
|
|
|
# print(" - Slivers found:", slivers.featureCount())
|
|
|
|
|
# no_sliver = processing.run(
|
|
|
|
|
# "native:extractbyexpression",
|
|
|
|
|
# {
|
|
|
|
|
# "INPUT": reprojected,
|
|
|
|
|
# "EXPRESSION": "geometry IS NOT NULL AND $area >= 1",
|
|
|
|
|
# "OUTPUT": "memory:"
|
|
|
|
|
# }
|
|
|
|
|
# )["OUTPUT"]
|
|
|
|
|
# print(" - Features left after removing slivers:", no_sliver.featureCount())
|
|
|
|
|
|
|
|
|
|
# # 8. Remove tiny holes (<1 m^2) — still in metric CRS
|
|
|
|
|
# print("\nStep 8: Remove tiny holes (<1 m²)")
|
|
|
|
|
# no_holes = processing.run("native:deleteholes", {"INPUT": no_sliver, "MIN_AREA": 1, "OUTPUT": "memory:"})["OUTPUT"]
|
|
|
|
|
# print(" - Features after delete holes:", no_holes.featureCount())
|
|
|
|
|
# summary["after_deleteholes"] = no_holes.featureCount()
|
|
|
|
|
|
|
|
|
|
# # Reproject BACK to EPSG:4326 for downstream (GeoServer/PostGIS target)
|
|
|
|
|
# print("\nStep 9: Reproject back to EPSG:4326")
|
|
|
|
|
# final_proj = processing.run("native:reprojectlayer", {"INPUT": no_holes, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"]
|
|
|
|
|
# print(" - Features:", final_proj.featureCount())
|
|
|
|
|
|
|
|
|
|
# Final: Trim string fields
|
|
|
|
|
print("\nFinal Step: Trim string fields")
|
|
|
|
|
trimmed = processing.run(
|
|
|
|
|
"qgis:refactorfields",
|
|
|
|
|
{
|
|
|
|
|
"INPUT": final_proj,
|
|
|
|
|
"FIELDS_MAPPING": [
|
|
|
|
|
{
|
|
|
|
|
"expression": f"trim(\"{field.name()}\")"
|
|
|
|
|
if field.typeName().lower() in ["text", "varchar"]
|
|
|
|
|
else f"\"{field.name()}\"",
|
|
|
|
|
"name": field.name(),
|
|
|
|
|
"type": field.type(),
|
|
|
|
|
"length": field.length(),
|
|
|
|
|
"precision": field.precision()
|
|
|
|
|
}
|
|
|
|
|
for field in final_proj.fields()
|
|
|
|
|
],
|
|
|
|
|
"KEEP_GEOMETRY": True, # <--- WAJIB
|
|
|
|
|
"OUTPUT": "memory:"
|
|
|
|
|
}
|
2025-11-25 08:33:38 +00:00
|
|
|
)["OUTPUT"]
|
|
|
|
|
|
|
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
valid_after = 0
|
|
|
|
|
for f in trimmed.getFeatures():
|
|
|
|
|
if f.geometry() is not None and f.geometry().isGeosValid():
|
|
|
|
|
valid_after += 1
|
|
|
|
|
summary["valid_after"] = valid_after
|
2025-11-25 08:33:38 +00:00
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
print(" - Final feature count:", trimmed.featureCount())
|
|
|
|
|
print("========== CLEANSING DONE ==========\n")
|
2025-11-25 08:33:38 +00:00
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
return {"summary": summary, "clean_layer": trimmed}
|
2025-11-25 08:33:38 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
def cleansing_points(layer: QgsVectorLayer):
|
|
|
|
|
print("\n=== POINT CLEANING PIPELINE ===")
|
2025-11-25 08:33:38 +00:00
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
summary = {
|
|
|
|
|
"features_before": layer.featureCount(),
|
|
|
|
|
"invalid_before": 0,
|
|
|
|
|
"after_fix": 0,
|
|
|
|
|
"after_dedup": 0,
|
|
|
|
|
"after_reproject": 0,
|
|
|
|
|
"valid_after": 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 1. Check validity (will always return 0 errors for points)
|
|
|
|
|
validity = processing.run(
|
|
|
|
|
"qgis:checkvalidity",
|
|
|
|
|
{"INPUT_LAYER": layer, "METHOD": 2, "VALID_OUTPUT": "memory:", "INVALID_OUTPUT": "memory:", "ERROR_OUTPUT": "memory:"}
|
|
|
|
|
)
|
|
|
|
|
invalid = validity["INVALID_OUTPUT"].featureCount()
|
|
|
|
|
summary["invalid_before"] = invalid
|
|
|
|
|
print("- Invalid points:", invalid)
|
2025-11-25 08:33:38 +00:00
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
# 2. Fix geometries (safe)
|
|
|
|
|
fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
|
|
|
|
summary["after_fix"] = fixed.featureCount()
|
|
|
|
|
|
|
|
|
|
# 3. Remove duplicate coordinates (points only)
|
|
|
|
|
dedup = processing.run(
|
|
|
|
|
"native:removedduplicategeometries",
|
|
|
|
|
{"INPUT": fixed, "OUTPUT": "memory:"}
|
|
|
|
|
)["OUTPUT"]
|
|
|
|
|
summary["after_dedup"] = dedup.featureCount()
|
|
|
|
|
|
|
|
|
|
# 4. Reproject
|
|
|
|
|
reproject = processing.run(
|
|
|
|
|
"native:reprojectlayer",
|
|
|
|
|
{"INPUT": dedup, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"}
|
|
|
|
|
)["OUTPUT"]
|
|
|
|
|
summary["after_reproject"] = reproject.featureCount()
|
|
|
|
|
|
|
|
|
|
# 5. Trim string fields
|
2025-11-25 08:33:38 +00:00
|
|
|
trimmed = processing.run(
|
|
|
|
|
"qgis:refactorfields",
|
|
|
|
|
{
|
2025-11-29 04:44:08 +00:00
|
|
|
"INPUT": reproject,
|
2025-11-25 08:33:38 +00:00
|
|
|
"FIELDS_MAPPING": [
|
|
|
|
|
{
|
2025-11-29 04:44:08 +00:00
|
|
|
"expression": f"trim(\"{field.name()}\")" if field.typeName().lower() in ["text","varchar"]
|
2025-11-25 08:33:38 +00:00
|
|
|
else f"\"{field.name()}\"",
|
|
|
|
|
"name": field.name(),
|
|
|
|
|
"type": field.type(),
|
|
|
|
|
"length": field.length(),
|
2025-11-29 04:44:08 +00:00
|
|
|
"precision": field.precision(),
|
2025-11-25 08:33:38 +00:00
|
|
|
}
|
2025-11-29 04:44:08 +00:00
|
|
|
for field in reproject.fields()
|
2025-11-25 08:33:38 +00:00
|
|
|
],
|
2025-11-29 04:44:08 +00:00
|
|
|
"KEEP_GEOMETRY": True,
|
2025-11-25 08:33:38 +00:00
|
|
|
"OUTPUT": "memory:"
|
|
|
|
|
}
|
|
|
|
|
)["OUTPUT"]
|
|
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
# 6. Validity check for points (simple)
|
|
|
|
|
valid_after = 0
|
|
|
|
|
for f in trimmed.getFeatures():
|
|
|
|
|
if f.geometry() is not None:
|
|
|
|
|
valid_after += 1
|
2025-11-25 08:33:38 +00:00
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
summary["valid_after"] = valid_after
|
2025-11-25 08:33:38 +00:00
|
|
|
|
2025-11-29 04:44:08 +00:00
|
|
|
return {"summary": summary, "clean_layer": trimmed}
|