update cleansing flow
This commit is contained in:
parent
ca317e7222
commit
1b7f6ab5ea
|
|
@ -45,13 +45,133 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
||||||
"valid_after": 0
|
"valid_after": 0
|
||||||
}
|
}
|
||||||
|
|
||||||
# 1. Geometry validity check
|
# # 1. Geometry validity check
|
||||||
print("\nStep 1: Geometry validity check (QGIS native)")
|
# print("\nStep 1: Geometry validity check (QGIS native)")
|
||||||
|
# validity = processing.run(
|
||||||
|
# "qgis:checkvalidity",
|
||||||
|
# {
|
||||||
|
# "INPUT_LAYER": layer,
|
||||||
|
# "METHOD": 2, # GEOS
|
||||||
|
# "IGNORE_RING_SELF_INTERSECTION": False,
|
||||||
|
# "VALID_OUTPUT": "memory:",
|
||||||
|
# "INVALID_OUTPUT": "memory:",
|
||||||
|
# "ERROR_OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
# invalid_layer = validity["INVALID_OUTPUT"]
|
||||||
|
# error_table = validity["ERROR_OUTPUT"]
|
||||||
|
# invalid_count = invalid_layer.featureCount()
|
||||||
|
# summary["invalid_before"] = invalid_count
|
||||||
|
# print(" - Invalid geometries found:", invalid_count)
|
||||||
|
# print(" - Total error messages:", error_table.featureCount())
|
||||||
|
|
||||||
|
# # 1.1 Fix invalid geometries
|
||||||
|
# # print("\nStep 1.1: Fix invalid geometries (FixGeometries)")
|
||||||
|
# # fixed_pre = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
|
# # summary["after_fixgeometries"] = fixed_pre.featureCount()
|
||||||
|
# # print(" - Features after FixGeometries:", fixed_pre.featureCount())
|
||||||
|
# # layer = fixed_pre
|
||||||
|
|
||||||
|
# # 2. Fix geometries (again)
|
||||||
|
# print("\nStep 2: Fix geometries (including self-intersections)")
|
||||||
|
# fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
|
# print(" - Valid after fix:", fixed.isValid())
|
||||||
|
# print(" - Features after fix:", fixed.featureCount())
|
||||||
|
# summary["after_fix"] = fixed.featureCount()
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 3. ENSURE MULTIPOLYGON (LTR compatible!!)
|
||||||
|
# # ========================================================
|
||||||
|
# print("\nStep 3: Ensure MULTIPOLYGON (LTR-safe method)")
|
||||||
|
|
||||||
|
# # Step 3.1: Pecah multiparts → single (agar bersih)
|
||||||
|
# singleparts = processing.run(
|
||||||
|
# "native:multiparttosingleparts",
|
||||||
|
# {"INPUT": fixed, "OUTPUT": "memory:"}
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# print(" - After multiparttosingleparts:", singleparts.featureCount())
|
||||||
|
|
||||||
|
# # Step 3.2: Promote semua polygon → multipolygon
|
||||||
|
# multipolygon = processing.run(
|
||||||
|
# "native:promotetomulti",
|
||||||
|
# {"INPUT": fixed, "OUTPUT": "memory:"}
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# print(" - After promotetomulti:", multipolygon.featureCount())
|
||||||
|
# print(" - Valid:", multipolygon.isValid())
|
||||||
|
|
||||||
|
# summary["after_multipolygon"] = multipolygon.featureCount()
|
||||||
|
|
||||||
|
|
||||||
|
# # 4. Remove duplicate rows
|
||||||
|
# print("\nStep 4: Remove duplicate rows")
|
||||||
|
# all_fields = [f.name() for f in multipolygon.fields()]
|
||||||
|
# print(" - All fields:", all_fields)
|
||||||
|
# if "id" in all_fields:
|
||||||
|
# key_fields = ["id"]
|
||||||
|
# else:
|
||||||
|
# int_cols = [f.name() for f in multipolygon.fields() if f.typeName().lower() in ["int", "integer", "bigint"]]
|
||||||
|
# key_fields = [int_cols[0]] if int_cols else all_fields
|
||||||
|
# print(" - Using duplicate key:", key_fields)
|
||||||
|
# dedup = processing.run("native:removeduplicatesbyattribute", {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
|
# duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
|
||||||
|
# summary["duplicates_removed"] = duplicates_removed
|
||||||
|
# print(" - Features before:", multipolygon.featureCount())
|
||||||
|
# print(" - Features after:", dedup.featureCount())
|
||||||
|
# print(" - Duplicates removed:", duplicates_removed)
|
||||||
|
|
||||||
|
# # 5. Remove duplicate vertices
|
||||||
|
# print("\nStep 5: Remove duplicate vertices")
|
||||||
|
# no_dup_vertices = processing.run("native:removeduplicatevertices", {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
|
# print(" - Features:", no_dup_vertices.featureCount())
|
||||||
|
# summary["after_remove_vertices"] = no_dup_vertices.featureCount()
|
||||||
|
|
||||||
|
# print("\nStep 5.5: Check input CRS before reprojection")
|
||||||
|
# input_crs = no_dup_vertices.crs()
|
||||||
|
# if input_crs.isValid():
|
||||||
|
# print(" - Input CRS:", input_crs.authid())
|
||||||
|
# print(" - CRS description:", input_crs.description())
|
||||||
|
# else:
|
||||||
|
# print(" - CRS INVALID or UNDEFINED")
|
||||||
|
|
||||||
|
# # 6. REPROJECT to metric CRS BEFORE any area-based ops (use EPSG:4326 or local UTM)
|
||||||
|
# print("\nStep 6: Reproject layer to EPSG:4326 for metric area calculations")
|
||||||
|
# # choose EPSG:4326 or better choose local UTM if you know it; EPSG:4326 is general metric
|
||||||
|
# final_proj = processing.run("native:reprojectlayer", {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"]
|
||||||
|
# print(" - Features after reproject:", final_proj.featureCount())
|
||||||
|
# summary["after_srid"] = final_proj.featureCount()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# 1. REPROJECT FIRST (Step 6 dipindah ke Step 1)
|
||||||
|
# ========================================================
|
||||||
|
print("\nStep 1: Reproject layer to EPSG:4326 (formerly Step 6)")
|
||||||
|
input_crs = layer.crs()
|
||||||
|
if input_crs.isValid():
|
||||||
|
print(" - Original CRS:", input_crs.authid())
|
||||||
|
print(" - Description:", input_crs.description())
|
||||||
|
else:
|
||||||
|
print(" - Original CRS INVALID or UNDEFINED")
|
||||||
|
|
||||||
|
reprojected = processing.run(
|
||||||
|
"native:reprojectlayer",
|
||||||
|
{"INPUT": layer, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
print(" - Features after reprojection:", reprojected.featureCount())
|
||||||
|
summary["after_reproject"] = reprojected.featureCount()
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# 2. Geometry validity check
|
||||||
|
# ========================================================
|
||||||
|
print("\nStep 2: Geometry validity check (QGIS native)")
|
||||||
validity = processing.run(
|
validity = processing.run(
|
||||||
"qgis:checkvalidity",
|
"qgis:checkvalidity",
|
||||||
{
|
{
|
||||||
"INPUT_LAYER": layer,
|
"INPUT_LAYER": reprojected,
|
||||||
"METHOD": 2, # GEOS
|
"METHOD": 2,
|
||||||
"IGNORE_RING_SELF_INTERSECTION": False,
|
"IGNORE_RING_SELF_INTERSECTION": False,
|
||||||
"VALID_OUTPUT": "memory:",
|
"VALID_OUTPUT": "memory:",
|
||||||
"INVALID_OUTPUT": "memory:",
|
"INVALID_OUTPUT": "memory:",
|
||||||
|
|
@ -65,34 +185,33 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
||||||
print(" - Invalid geometries found:", invalid_count)
|
print(" - Invalid geometries found:", invalid_count)
|
||||||
print(" - Total error messages:", error_table.featureCount())
|
print(" - Total error messages:", error_table.featureCount())
|
||||||
|
|
||||||
# 1.1 Fix invalid geometries
|
# ========================================================
|
||||||
# print("\nStep 1.1: Fix invalid geometries (FixGeometries)")
|
# 3. Fix geometries
|
||||||
# fixed_pre = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
# ========================================================
|
||||||
# summary["after_fixgeometries"] = fixed_pre.featureCount()
|
print("\nStep 3: Fix geometries")
|
||||||
# print(" - Features after FixGeometries:", fixed_pre.featureCount())
|
fixed = processing.run(
|
||||||
# layer = fixed_pre
|
"native:fixgeometries",
|
||||||
|
{"INPUT": reprojected, "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
# 2. Fix geometries (again)
|
|
||||||
print("\nStep 2: Fix geometries (including self-intersections)")
|
|
||||||
fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
|
||||||
print(" - Valid after fix:", fixed.isValid())
|
print(" - Valid after fix:", fixed.isValid())
|
||||||
print(" - Features after fix:", fixed.featureCount())
|
print(" - Features after fix:", fixed.featureCount())
|
||||||
summary["after_fix"] = fixed.featureCount()
|
summary["after_fix"] = fixed.featureCount()
|
||||||
|
|
||||||
# ========================================================
|
# ========================================================
|
||||||
# 3. ENSURE MULTIPOLYGON (LTR compatible!!)
|
# 4. Ensure MULTIPOLYGON (LTR compatible)
|
||||||
# ========================================================
|
# ========================================================
|
||||||
print("\nStep 3: Ensure MULTIPOLYGON (LTR-safe method)")
|
print("\nStep 4: Ensure MULTIPOLYGON (LTR-safe method)")
|
||||||
|
|
||||||
# Step 3.1: Pecah multiparts → single (agar bersih)
|
# 4.1 Split multipart → singlepart
|
||||||
# singleparts = processing.run(
|
singleparts = processing.run(
|
||||||
# "native:multiparttosingleparts",
|
"native:multiparttosingleparts",
|
||||||
# {"INPUT": fixed, "OUTPUT": "memory:"}
|
{"INPUT": fixed, "OUTPUT": "memory:"}
|
||||||
# )["OUTPUT"]
|
)["OUTPUT"]
|
||||||
|
|
||||||
# print(" - After multiparttosingleparts:", singleparts.featureCount())
|
print(" - After multipart to single:", singleparts.featureCount())
|
||||||
|
|
||||||
# Step 3.2: Promote semua polygon → multipolygon
|
# 4.2 Promote all polygons → multipolygon
|
||||||
multipolygon = processing.run(
|
multipolygon = processing.run(
|
||||||
"native:promotetomulti",
|
"native:promotetomulti",
|
||||||
{"INPUT": fixed, "OUTPUT": "memory:"}
|
{"INPUT": fixed, "OUTPUT": "memory:"}
|
||||||
|
|
@ -100,47 +219,59 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
||||||
|
|
||||||
print(" - After promotetomulti:", multipolygon.featureCount())
|
print(" - After promotetomulti:", multipolygon.featureCount())
|
||||||
print(" - Valid:", multipolygon.isValid())
|
print(" - Valid:", multipolygon.isValid())
|
||||||
|
|
||||||
summary["after_multipolygon"] = multipolygon.featureCount()
|
summary["after_multipolygon"] = multipolygon.featureCount()
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
# 4. Remove duplicate rows
|
# 5. Remove duplicates rows & vertices
|
||||||
print("\nStep 4: Remove duplicate rows")
|
# ========================================================
|
||||||
|
print("\nStep 5: Remove duplicate rows")
|
||||||
all_fields = [f.name() for f in multipolygon.fields()]
|
all_fields = [f.name() for f in multipolygon.fields()]
|
||||||
print(" - All fields:", all_fields)
|
print(" - All fields:", all_fields)
|
||||||
|
|
||||||
if "id" in all_fields:
|
if "id" in all_fields:
|
||||||
key_fields = ["id"]
|
key_fields = ["id"]
|
||||||
else:
|
else:
|
||||||
int_cols = [f.name() for f in multipolygon.fields() if f.typeName().lower() in ["int", "integer", "bigint"]]
|
int_cols = [
|
||||||
|
f.name()
|
||||||
|
for f in multipolygon.fields()
|
||||||
|
if f.typeName().lower() in ["int", "integer", "bigint"]
|
||||||
|
]
|
||||||
key_fields = [int_cols[0]] if int_cols else all_fields
|
key_fields = [int_cols[0]] if int_cols else all_fields
|
||||||
|
|
||||||
print(" - Using duplicate key:", key_fields)
|
print(" - Using duplicate key:", key_fields)
|
||||||
dedup = processing.run("native:removeduplicatesbyattribute", {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
|
||||||
|
dedup = processing.run(
|
||||||
|
"native:removeduplicatesbyattribute",
|
||||||
|
{"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
|
duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
|
||||||
summary["duplicates_removed"] = duplicates_removed
|
summary["duplicates_removed"] = duplicates_removed
|
||||||
|
|
||||||
print(" - Features before:", multipolygon.featureCount())
|
print(" - Features before:", multipolygon.featureCount())
|
||||||
print(" - Features after:", dedup.featureCount())
|
print(" - Features after:", dedup.featureCount())
|
||||||
print(" - Duplicates removed:", duplicates_removed)
|
print(" - Duplicates removed:", duplicates_removed)
|
||||||
|
|
||||||
# 5. Remove duplicate vertices
|
# Remove duplicate vertices
|
||||||
print("\nStep 5: Remove duplicate vertices")
|
print("\nStep 5.5: Remove duplicate vertices")
|
||||||
no_dup_vertices = processing.run("native:removeduplicatevertices", {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
no_dup_vertices = processing.run(
|
||||||
|
"native:removeduplicatevertices",
|
||||||
|
{"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
print(" - Features:", no_dup_vertices.featureCount())
|
print(" - Features:", no_dup_vertices.featureCount())
|
||||||
summary["after_remove_vertices"] = no_dup_vertices.featureCount()
|
summary["after_remove_vertices"] = no_dup_vertices.featureCount()
|
||||||
|
|
||||||
print("\nStep 5.5: Check input CRS before reprojection")
|
# ========================================================
|
||||||
input_crs = no_dup_vertices.crs()
|
# 6. FINAL STEP: final_proj tetap dipakai
|
||||||
if input_crs.isValid():
|
# ========================================================
|
||||||
print(" - Input CRS:", input_crs.authid())
|
print("\nStep 6: Finalize (using final_proj variable as requested)")
|
||||||
print(" - CRS description:", input_crs.description())
|
final_proj = no_dup_vertices
|
||||||
else:
|
print(" - Final features:", final_proj.featureCount())
|
||||||
print(" - CRS INVALID or UNDEFINED")
|
summary["after_final"] = final_proj.featureCount()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 6. REPROJECT to metric CRS BEFORE any area-based ops (use EPSG:4326 or local UTM)
|
|
||||||
print("\nStep 6: Reproject layer to EPSG:4326 for metric area calculations")
|
|
||||||
# choose EPSG:4326 or better choose local UTM if you know it; EPSG:4326 is general metric
|
|
||||||
final_proj = processing.run("native:reprojectlayer", {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"]
|
|
||||||
print(" - Features after reproject:", final_proj.featureCount())
|
|
||||||
summary["after_srid"] = final_proj.featureCount()
|
|
||||||
|
|
||||||
|
|
||||||
# 7. Remove sliver polygons based on metric area (< 1 m^2)
|
# 7. Remove sliver polygons based on metric area (< 1 m^2)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user