update cleansing flow

This commit is contained in:
DmsAnhr 2025-12-22 15:24:03 +07:00
parent ca317e7222
commit 1b7f6ab5ea

View File

@ -45,13 +45,133 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict:
"valid_after": 0
}
# 1. Geometry validity check
print("\nStep 1: Geometry validity check (QGIS native)")
# # 1. Geometry validity check
# print("\nStep 1: Geometry validity check (QGIS native)")
# validity = processing.run(
# "qgis:checkvalidity",
# {
# "INPUT_LAYER": layer,
# "METHOD": 2, # GEOS
# "IGNORE_RING_SELF_INTERSECTION": False,
# "VALID_OUTPUT": "memory:",
# "INVALID_OUTPUT": "memory:",
# "ERROR_OUTPUT": "memory:"
# }
# )
# invalid_layer = validity["INVALID_OUTPUT"]
# error_table = validity["ERROR_OUTPUT"]
# invalid_count = invalid_layer.featureCount()
# summary["invalid_before"] = invalid_count
# print(" - Invalid geometries found:", invalid_count)
# print(" - Total error messages:", error_table.featureCount())
# # 1.1 Fix invalid geometries
# # print("\nStep 1.1: Fix invalid geometries (FixGeometries)")
# # fixed_pre = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
# # summary["after_fixgeometries"] = fixed_pre.featureCount()
# # print(" - Features after FixGeometries:", fixed_pre.featureCount())
# # layer = fixed_pre
# # 2. Fix geometries (again)
# print("\nStep 2: Fix geometries (including self-intersections)")
# fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
# print(" - Valid after fix:", fixed.isValid())
# print(" - Features after fix:", fixed.featureCount())
# summary["after_fix"] = fixed.featureCount()
# # ========================================================
# # 3. ENSURE MULTIPOLYGON (LTR compatible!!)
# # ========================================================
# print("\nStep 3: Ensure MULTIPOLYGON (LTR-safe method)")
# # Step 3.1: Pecah multiparts → single (agar bersih)
# singleparts = processing.run(
# "native:multiparttosingleparts",
# {"INPUT": fixed, "OUTPUT": "memory:"}
# )["OUTPUT"]
# print(" - After multiparttosingleparts:", singleparts.featureCount())
# # Step 3.2: Promote semua polygon → multipolygon
# multipolygon = processing.run(
# "native:promotetomulti",
# {"INPUT": fixed, "OUTPUT": "memory:"}
# )["OUTPUT"]
# print(" - After promotetomulti:", multipolygon.featureCount())
# print(" - Valid:", multipolygon.isValid())
# summary["after_multipolygon"] = multipolygon.featureCount()
# # 4. Remove duplicate rows
# print("\nStep 4: Remove duplicate rows")
# all_fields = [f.name() for f in multipolygon.fields()]
# print(" - All fields:", all_fields)
# if "id" in all_fields:
# key_fields = ["id"]
# else:
# int_cols = [f.name() for f in multipolygon.fields() if f.typeName().lower() in ["int", "integer", "bigint"]]
# key_fields = [int_cols[0]] if int_cols else all_fields
# print(" - Using duplicate key:", key_fields)
# dedup = processing.run("native:removeduplicatesbyattribute", {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"})["OUTPUT"]
# duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
# summary["duplicates_removed"] = duplicates_removed
# print(" - Features before:", multipolygon.featureCount())
# print(" - Features after:", dedup.featureCount())
# print(" - Duplicates removed:", duplicates_removed)
# # 5. Remove duplicate vertices
# print("\nStep 5: Remove duplicate vertices")
# no_dup_vertices = processing.run("native:removeduplicatevertices", {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"})["OUTPUT"]
# print(" - Features:", no_dup_vertices.featureCount())
# summary["after_remove_vertices"] = no_dup_vertices.featureCount()
# print("\nStep 5.5: Check input CRS before reprojection")
# input_crs = no_dup_vertices.crs()
# if input_crs.isValid():
# print(" - Input CRS:", input_crs.authid())
# print(" - CRS description:", input_crs.description())
# else:
# print(" - CRS INVALID or UNDEFINED")
# # 6. REPROJECT to metric CRS BEFORE any area-based ops (use EPSG:4326 or local UTM)
# print("\nStep 6: Reproject layer to EPSG:4326 for metric area calculations")
# # choose EPSG:4326 or better choose local UTM if you know it; EPSG:4326 is general metric
# final_proj = processing.run("native:reprojectlayer", {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"]
# print(" - Features after reproject:", final_proj.featureCount())
# summary["after_srid"] = final_proj.featureCount()
# ========================================================
# 1. REPROJECT FIRST (Step 6 dipindah ke Step 1)
# ========================================================
print("\nStep 1: Reproject layer to EPSG:4326 (formerly Step 6)")
input_crs = layer.crs()
if input_crs.isValid():
print(" - Original CRS:", input_crs.authid())
print(" - Description:", input_crs.description())
else:
print(" - Original CRS INVALID or UNDEFINED")
reprojected = processing.run(
"native:reprojectlayer",
{"INPUT": layer, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"}
)["OUTPUT"]
print(" - Features after reprojection:", reprojected.featureCount())
summary["after_reproject"] = reprojected.featureCount()
# ========================================================
# 2. Geometry validity check
# ========================================================
print("\nStep 2: Geometry validity check (QGIS native)")
validity = processing.run(
"qgis:checkvalidity",
{
"INPUT_LAYER": layer,
"METHOD": 2, # GEOS
"INPUT_LAYER": reprojected,
"METHOD": 2,
"IGNORE_RING_SELF_INTERSECTION": False,
"VALID_OUTPUT": "memory:",
"INVALID_OUTPUT": "memory:",
@ -65,34 +185,33 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict:
print(" - Invalid geometries found:", invalid_count)
print(" - Total error messages:", error_table.featureCount())
# 1.1 Fix invalid geometries
# print("\nStep 1.1: Fix invalid geometries (FixGeometries)")
# fixed_pre = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
# summary["after_fixgeometries"] = fixed_pre.featureCount()
# print(" - Features after FixGeometries:", fixed_pre.featureCount())
# layer = fixed_pre
# ========================================================
# 3. Fix geometries
# ========================================================
print("\nStep 3: Fix geometries")
fixed = processing.run(
"native:fixgeometries",
{"INPUT": reprojected, "OUTPUT": "memory:"}
)["OUTPUT"]
# 2. Fix geometries (again)
print("\nStep 2: Fix geometries (including self-intersections)")
fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
print(" - Valid after fix:", fixed.isValid())
print(" - Features after fix:", fixed.featureCount())
summary["after_fix"] = fixed.featureCount()
# ========================================================
# 3. ENSURE MULTIPOLYGON (LTR compatible!!)
# 4. Ensure MULTIPOLYGON (LTR compatible)
# ========================================================
print("\nStep 3: Ensure MULTIPOLYGON (LTR-safe method)")
print("\nStep 4: Ensure MULTIPOLYGON (LTR-safe method)")
# Step 3.1: Pecah multiparts → single (agar bersih)
# singleparts = processing.run(
# "native:multiparttosingleparts",
# {"INPUT": fixed, "OUTPUT": "memory:"}
# )["OUTPUT"]
# 4.1 Split multipart → singlepart
singleparts = processing.run(
"native:multiparttosingleparts",
{"INPUT": fixed, "OUTPUT": "memory:"}
)["OUTPUT"]
# print(" - After multiparttosingleparts:", singleparts.featureCount())
print(" - After multipart to single:", singleparts.featureCount())
# Step 3.2: Promote semua polygon → multipolygon
# 4.2 Promote all polygons → multipolygon
multipolygon = processing.run(
"native:promotetomulti",
{"INPUT": fixed, "OUTPUT": "memory:"}
@ -100,47 +219,59 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict:
print(" - After promotetomulti:", multipolygon.featureCount())
print(" - Valid:", multipolygon.isValid())
summary["after_multipolygon"] = multipolygon.featureCount()
# 4. Remove duplicate rows
print("\nStep 4: Remove duplicate rows")
# ========================================================
# 5. Remove duplicates rows & vertices
# ========================================================
print("\nStep 5: Remove duplicate rows")
all_fields = [f.name() for f in multipolygon.fields()]
print(" - All fields:", all_fields)
if "id" in all_fields:
key_fields = ["id"]
else:
int_cols = [f.name() for f in multipolygon.fields() if f.typeName().lower() in ["int", "integer", "bigint"]]
int_cols = [
f.name()
for f in multipolygon.fields()
if f.typeName().lower() in ["int", "integer", "bigint"]
]
key_fields = [int_cols[0]] if int_cols else all_fields
print(" - Using duplicate key:", key_fields)
dedup = processing.run("native:removeduplicatesbyattribute", {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"})["OUTPUT"]
dedup = processing.run(
"native:removeduplicatesbyattribute",
{"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"}
)["OUTPUT"]
duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
summary["duplicates_removed"] = duplicates_removed
print(" - Features before:", multipolygon.featureCount())
print(" - Features after:", dedup.featureCount())
print(" - Duplicates removed:", duplicates_removed)
# 5. Remove duplicate vertices
print("\nStep 5: Remove duplicate vertices")
no_dup_vertices = processing.run("native:removeduplicatevertices", {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"})["OUTPUT"]
# Remove duplicate vertices
print("\nStep 5.5: Remove duplicate vertices")
no_dup_vertices = processing.run(
"native:removeduplicatevertices",
{"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"}
)["OUTPUT"]
print(" - Features:", no_dup_vertices.featureCount())
summary["after_remove_vertices"] = no_dup_vertices.featureCount()
print("\nStep 5.5: Check input CRS before reprojection")
input_crs = no_dup_vertices.crs()
if input_crs.isValid():
print(" - Input CRS:", input_crs.authid())
print(" - CRS description:", input_crs.description())
else:
print(" - CRS INVALID or UNDEFINED")
# ========================================================
# 6. FINAL STEP: final_proj tetap dipakai
# ========================================================
print("\nStep 6: Finalize (using final_proj variable as requested)")
final_proj = no_dup_vertices
print(" - Final features:", final_proj.featureCount())
summary["after_final"] = final_proj.featureCount()
# 6. REPROJECT to metric CRS BEFORE any area-based ops (use EPSG:4326 or local UTM)
print("\nStep 6: Reproject layer to EPSG:4326 for metric area calculations")
# choose EPSG:4326 or better choose local UTM if you know it; EPSG:4326 is general metric
final_proj = processing.run("native:reprojectlayer", {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"]
print(" - Features after reproject:", final_proj.featureCount())
summary["after_srid"] = final_proj.featureCount()
# 7. Remove sliver polygons based on metric area (< 1 m^2)