From 1b7f6ab5ea374ae58f65d394fcc917b2bbd108d6 Mon Sep 17 00:00:00 2001 From: DmsAnhr Date: Mon, 22 Dec 2025 15:24:03 +0700 Subject: [PATCH] update cleansing flow --- full_cleansing_service.py | 219 ++++++++++++++++++++++++++++++-------- 1 file changed, 175 insertions(+), 44 deletions(-) diff --git a/full_cleansing_service.py b/full_cleansing_service.py index fad6b7b..6ba2597 100644 --- a/full_cleansing_service.py +++ b/full_cleansing_service.py @@ -45,13 +45,133 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict: "valid_after": 0 } - # 1. Geometry validity check - print("\nStep 1: Geometry validity check (QGIS native)") + # # 1. Geometry validity check + # print("\nStep 1: Geometry validity check (QGIS native)") + # validity = processing.run( + # "qgis:checkvalidity", + # { + # "INPUT_LAYER": layer, + # "METHOD": 2, # GEOS + # "IGNORE_RING_SELF_INTERSECTION": False, + # "VALID_OUTPUT": "memory:", + # "INVALID_OUTPUT": "memory:", + # "ERROR_OUTPUT": "memory:" + # } + # ) + # invalid_layer = validity["INVALID_OUTPUT"] + # error_table = validity["ERROR_OUTPUT"] + # invalid_count = invalid_layer.featureCount() + # summary["invalid_before"] = invalid_count + # print(" - Invalid geometries found:", invalid_count) + # print(" - Total error messages:", error_table.featureCount()) + + # # 1.1 Fix invalid geometries + # # print("\nStep 1.1: Fix invalid geometries (FixGeometries)") + # # fixed_pre = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"] + # # summary["after_fixgeometries"] = fixed_pre.featureCount() + # # print(" - Features after FixGeometries:", fixed_pre.featureCount()) + # # layer = fixed_pre + + # # 2. Fix geometries (again) + # print("\nStep 2: Fix geometries (including self-intersections)") + # fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"] + # print(" - Valid after fix:", fixed.isValid()) + # print(" - Features after fix:", fixed.featureCount()) + # summary["after_fix"] = fixed.featureCount() + + # # ======================================================== + # # 3. ENSURE MULTIPOLYGON (LTR compatible!!) + # # ======================================================== + # print("\nStep 3: Ensure MULTIPOLYGON (LTR-safe method)") + + # # Step 3.1: Pecah multiparts → single (agar bersih) + # singleparts = processing.run( + # "native:multiparttosingleparts", + # {"INPUT": fixed, "OUTPUT": "memory:"} + # )["OUTPUT"] + + # print(" - After multiparttosingleparts:", singleparts.featureCount()) + + # # Step 3.2: Promote semua polygon → multipolygon + # multipolygon = processing.run( + # "native:promotetomulti", + # {"INPUT": fixed, "OUTPUT": "memory:"} + # )["OUTPUT"] + + # print(" - After promotetomulti:", multipolygon.featureCount()) + # print(" - Valid:", multipolygon.isValid()) + + # summary["after_multipolygon"] = multipolygon.featureCount() + + + # # 4. Remove duplicate rows + # print("\nStep 4: Remove duplicate rows") + # all_fields = [f.name() for f in multipolygon.fields()] + # print(" - All fields:", all_fields) + # if "id" in all_fields: + # key_fields = ["id"] + # else: + # int_cols = [f.name() for f in multipolygon.fields() if f.typeName().lower() in ["int", "integer", "bigint"]] + # key_fields = [int_cols[0]] if int_cols else all_fields + # print(" - Using duplicate key:", key_fields) + # dedup = processing.run("native:removeduplicatesbyattribute", {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"})["OUTPUT"] + # duplicates_removed = multipolygon.featureCount() - dedup.featureCount() + # summary["duplicates_removed"] = duplicates_removed + # print(" - Features before:", multipolygon.featureCount()) + # print(" - Features after:", dedup.featureCount()) + # print(" - Duplicates removed:", duplicates_removed) + + # # 5. Remove duplicate vertices + # print("\nStep 5: Remove duplicate vertices") + # no_dup_vertices = processing.run("native:removeduplicatevertices", {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"})["OUTPUT"] + # print(" - Features:", no_dup_vertices.featureCount()) + # summary["after_remove_vertices"] = no_dup_vertices.featureCount() + + # print("\nStep 5.5: Check input CRS before reprojection") + # input_crs = no_dup_vertices.crs() + # if input_crs.isValid(): + # print(" - Input CRS:", input_crs.authid()) + # print(" - CRS description:", input_crs.description()) + # else: + # print(" - CRS INVALID or UNDEFINED") + + # # 6. REPROJECT to metric CRS BEFORE any area-based ops (use EPSG:4326 or local UTM) + # print("\nStep 6: Reproject layer to EPSG:4326 for metric area calculations") + # # choose EPSG:4326 or better choose local UTM if you know it; EPSG:4326 is general metric + # final_proj = processing.run("native:reprojectlayer", {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"] + # print(" - Features after reproject:", final_proj.featureCount()) + # summary["after_srid"] = final_proj.featureCount() + + + + # ======================================================== + # 1. REPROJECT FIRST (Step 6 dipindah ke Step 1) + # ======================================================== + print("\nStep 1: Reproject layer to EPSG:4326 (formerly Step 6)") + input_crs = layer.crs() + if input_crs.isValid(): + print(" - Original CRS:", input_crs.authid()) + print(" - Description:", input_crs.description()) + else: + print(" - Original CRS INVALID or UNDEFINED") + + reprojected = processing.run( + "native:reprojectlayer", + {"INPUT": layer, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"} + )["OUTPUT"] + + print(" - Features after reprojection:", reprojected.featureCount()) + summary["after_reproject"] = reprojected.featureCount() + + # ======================================================== + # 2. Geometry validity check + # ======================================================== + print("\nStep 2: Geometry validity check (QGIS native)") validity = processing.run( "qgis:checkvalidity", { - "INPUT_LAYER": layer, - "METHOD": 2, # GEOS + "INPUT_LAYER": reprojected, + "METHOD": 2, "IGNORE_RING_SELF_INTERSECTION": False, "VALID_OUTPUT": "memory:", "INVALID_OUTPUT": "memory:", @@ -65,34 +185,33 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict: print(" - Invalid geometries found:", invalid_count) print(" - Total error messages:", error_table.featureCount()) - # 1.1 Fix invalid geometries - # print("\nStep 1.1: Fix invalid geometries (FixGeometries)") - # fixed_pre = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"] - # summary["after_fixgeometries"] = fixed_pre.featureCount() - # print(" - Features after FixGeometries:", fixed_pre.featureCount()) - # layer = fixed_pre + # ======================================================== + # 3. Fix geometries + # ======================================================== + print("\nStep 3: Fix geometries") + fixed = processing.run( + "native:fixgeometries", + {"INPUT": reprojected, "OUTPUT": "memory:"} + )["OUTPUT"] - # 2. Fix geometries (again) - print("\nStep 2: Fix geometries (including self-intersections)") - fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"] print(" - Valid after fix:", fixed.isValid()) print(" - Features after fix:", fixed.featureCount()) summary["after_fix"] = fixed.featureCount() # ======================================================== - # 3. ENSURE MULTIPOLYGON (LTR compatible!!) + # 4. Ensure MULTIPOLYGON (LTR compatible) # ======================================================== - print("\nStep 3: Ensure MULTIPOLYGON (LTR-safe method)") + print("\nStep 4: Ensure MULTIPOLYGON (LTR-safe method)") - # Step 3.1: Pecah multiparts → single (agar bersih) - # singleparts = processing.run( - # "native:multiparttosingleparts", - # {"INPUT": fixed, "OUTPUT": "memory:"} - # )["OUTPUT"] + # 4.1 Split multipart → singlepart + singleparts = processing.run( + "native:multiparttosingleparts", + {"INPUT": fixed, "OUTPUT": "memory:"} + )["OUTPUT"] - # print(" - After multiparttosingleparts:", singleparts.featureCount()) + print(" - After multipart to single:", singleparts.featureCount()) - # Step 3.2: Promote semua polygon → multipolygon + # 4.2 Promote all polygons → multipolygon multipolygon = processing.run( "native:promotetomulti", {"INPUT": fixed, "OUTPUT": "memory:"} @@ -100,47 +219,59 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict: print(" - After promotetomulti:", multipolygon.featureCount()) print(" - Valid:", multipolygon.isValid()) - summary["after_multipolygon"] = multipolygon.featureCount() - - # 4. Remove duplicate rows - print("\nStep 4: Remove duplicate rows") + # ======================================================== + # 5. Remove duplicates rows & vertices + # ======================================================== + print("\nStep 5: Remove duplicate rows") all_fields = [f.name() for f in multipolygon.fields()] print(" - All fields:", all_fields) + if "id" in all_fields: key_fields = ["id"] else: - int_cols = [f.name() for f in multipolygon.fields() if f.typeName().lower() in ["int", "integer", "bigint"]] + int_cols = [ + f.name() + for f in multipolygon.fields() + if f.typeName().lower() in ["int", "integer", "bigint"] + ] key_fields = [int_cols[0]] if int_cols else all_fields + print(" - Using duplicate key:", key_fields) - dedup = processing.run("native:removeduplicatesbyattribute", {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"})["OUTPUT"] + + dedup = processing.run( + "native:removeduplicatesbyattribute", + {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"} + )["OUTPUT"] + duplicates_removed = multipolygon.featureCount() - dedup.featureCount() summary["duplicates_removed"] = duplicates_removed + print(" - Features before:", multipolygon.featureCount()) print(" - Features after:", dedup.featureCount()) print(" - Duplicates removed:", duplicates_removed) - # 5. Remove duplicate vertices - print("\nStep 5: Remove duplicate vertices") - no_dup_vertices = processing.run("native:removeduplicatevertices", {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"})["OUTPUT"] + # Remove duplicate vertices + print("\nStep 5.5: Remove duplicate vertices") + no_dup_vertices = processing.run( + "native:removeduplicatevertices", + {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"} + )["OUTPUT"] + print(" - Features:", no_dup_vertices.featureCount()) summary["after_remove_vertices"] = no_dup_vertices.featureCount() - print("\nStep 5.5: Check input CRS before reprojection") - input_crs = no_dup_vertices.crs() - if input_crs.isValid(): - print(" - Input CRS:", input_crs.authid()) - print(" - CRS description:", input_crs.description()) - else: - print(" - CRS INVALID or UNDEFINED") + # ======================================================== + # 6. FINAL STEP: final_proj tetap dipakai + # ======================================================== + print("\nStep 6: Finalize (using final_proj variable as requested)") + final_proj = no_dup_vertices + print(" - Final features:", final_proj.featureCount()) + summary["after_final"] = final_proj.featureCount() + + - # 6. REPROJECT to metric CRS BEFORE any area-based ops (use EPSG:4326 or local UTM) - print("\nStep 6: Reproject layer to EPSG:4326 for metric area calculations") - # choose EPSG:4326 or better choose local UTM if you know it; EPSG:4326 is general metric - final_proj = processing.run("native:reprojectlayer", {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"] - print(" - Features after reproject:", final_proj.featureCount()) - summary["after_srid"] = final_proj.featureCount() # 7. Remove sliver polygons based on metric area (< 1 m^2)