update cleansing flow
This commit is contained in:
parent
ca317e7222
commit
1b7f6ab5ea
|
|
@ -45,13 +45,133 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
|||
"valid_after": 0
|
||||
}
|
||||
|
||||
# 1. Geometry validity check
|
||||
print("\nStep 1: Geometry validity check (QGIS native)")
|
||||
# # 1. Geometry validity check
|
||||
# print("\nStep 1: Geometry validity check (QGIS native)")
|
||||
# validity = processing.run(
|
||||
# "qgis:checkvalidity",
|
||||
# {
|
||||
# "INPUT_LAYER": layer,
|
||||
# "METHOD": 2, # GEOS
|
||||
# "IGNORE_RING_SELF_INTERSECTION": False,
|
||||
# "VALID_OUTPUT": "memory:",
|
||||
# "INVALID_OUTPUT": "memory:",
|
||||
# "ERROR_OUTPUT": "memory:"
|
||||
# }
|
||||
# )
|
||||
# invalid_layer = validity["INVALID_OUTPUT"]
|
||||
# error_table = validity["ERROR_OUTPUT"]
|
||||
# invalid_count = invalid_layer.featureCount()
|
||||
# summary["invalid_before"] = invalid_count
|
||||
# print(" - Invalid geometries found:", invalid_count)
|
||||
# print(" - Total error messages:", error_table.featureCount())
|
||||
|
||||
# # 1.1 Fix invalid geometries
|
||||
# # print("\nStep 1.1: Fix invalid geometries (FixGeometries)")
|
||||
# # fixed_pre = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
||||
# # summary["after_fixgeometries"] = fixed_pre.featureCount()
|
||||
# # print(" - Features after FixGeometries:", fixed_pre.featureCount())
|
||||
# # layer = fixed_pre
|
||||
|
||||
# # 2. Fix geometries (again)
|
||||
# print("\nStep 2: Fix geometries (including self-intersections)")
|
||||
# fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
||||
# print(" - Valid after fix:", fixed.isValid())
|
||||
# print(" - Features after fix:", fixed.featureCount())
|
||||
# summary["after_fix"] = fixed.featureCount()
|
||||
|
||||
# # ========================================================
|
||||
# # 3. ENSURE MULTIPOLYGON (LTR compatible!!)
|
||||
# # ========================================================
|
||||
# print("\nStep 3: Ensure MULTIPOLYGON (LTR-safe method)")
|
||||
|
||||
# # Step 3.1: Pecah multiparts → single (agar bersih)
|
||||
# singleparts = processing.run(
|
||||
# "native:multiparttosingleparts",
|
||||
# {"INPUT": fixed, "OUTPUT": "memory:"}
|
||||
# )["OUTPUT"]
|
||||
|
||||
# print(" - After multiparttosingleparts:", singleparts.featureCount())
|
||||
|
||||
# # Step 3.2: Promote semua polygon → multipolygon
|
||||
# multipolygon = processing.run(
|
||||
# "native:promotetomulti",
|
||||
# {"INPUT": fixed, "OUTPUT": "memory:"}
|
||||
# )["OUTPUT"]
|
||||
|
||||
# print(" - After promotetomulti:", multipolygon.featureCount())
|
||||
# print(" - Valid:", multipolygon.isValid())
|
||||
|
||||
# summary["after_multipolygon"] = multipolygon.featureCount()
|
||||
|
||||
|
||||
# # 4. Remove duplicate rows
|
||||
# print("\nStep 4: Remove duplicate rows")
|
||||
# all_fields = [f.name() for f in multipolygon.fields()]
|
||||
# print(" - All fields:", all_fields)
|
||||
# if "id" in all_fields:
|
||||
# key_fields = ["id"]
|
||||
# else:
|
||||
# int_cols = [f.name() for f in multipolygon.fields() if f.typeName().lower() in ["int", "integer", "bigint"]]
|
||||
# key_fields = [int_cols[0]] if int_cols else all_fields
|
||||
# print(" - Using duplicate key:", key_fields)
|
||||
# dedup = processing.run("native:removeduplicatesbyattribute", {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
||||
# duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
|
||||
# summary["duplicates_removed"] = duplicates_removed
|
||||
# print(" - Features before:", multipolygon.featureCount())
|
||||
# print(" - Features after:", dedup.featureCount())
|
||||
# print(" - Duplicates removed:", duplicates_removed)
|
||||
|
||||
# # 5. Remove duplicate vertices
|
||||
# print("\nStep 5: Remove duplicate vertices")
|
||||
# no_dup_vertices = processing.run("native:removeduplicatevertices", {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
||||
# print(" - Features:", no_dup_vertices.featureCount())
|
||||
# summary["after_remove_vertices"] = no_dup_vertices.featureCount()
|
||||
|
||||
# print("\nStep 5.5: Check input CRS before reprojection")
|
||||
# input_crs = no_dup_vertices.crs()
|
||||
# if input_crs.isValid():
|
||||
# print(" - Input CRS:", input_crs.authid())
|
||||
# print(" - CRS description:", input_crs.description())
|
||||
# else:
|
||||
# print(" - CRS INVALID or UNDEFINED")
|
||||
|
||||
# # 6. REPROJECT to metric CRS BEFORE any area-based ops (use EPSG:4326 or local UTM)
|
||||
# print("\nStep 6: Reproject layer to EPSG:4326 for metric area calculations")
|
||||
# # choose EPSG:4326 or better choose local UTM if you know it; EPSG:4326 is general metric
|
||||
# final_proj = processing.run("native:reprojectlayer", {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"]
|
||||
# print(" - Features after reproject:", final_proj.featureCount())
|
||||
# summary["after_srid"] = final_proj.featureCount()
|
||||
|
||||
|
||||
|
||||
# ========================================================
|
||||
# 1. REPROJECT FIRST (Step 6 dipindah ke Step 1)
|
||||
# ========================================================
|
||||
print("\nStep 1: Reproject layer to EPSG:4326 (formerly Step 6)")
|
||||
input_crs = layer.crs()
|
||||
if input_crs.isValid():
|
||||
print(" - Original CRS:", input_crs.authid())
|
||||
print(" - Description:", input_crs.description())
|
||||
else:
|
||||
print(" - Original CRS INVALID or UNDEFINED")
|
||||
|
||||
reprojected = processing.run(
|
||||
"native:reprojectlayer",
|
||||
{"INPUT": layer, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"}
|
||||
)["OUTPUT"]
|
||||
|
||||
print(" - Features after reprojection:", reprojected.featureCount())
|
||||
summary["after_reproject"] = reprojected.featureCount()
|
||||
|
||||
# ========================================================
|
||||
# 2. Geometry validity check
|
||||
# ========================================================
|
||||
print("\nStep 2: Geometry validity check (QGIS native)")
|
||||
validity = processing.run(
|
||||
"qgis:checkvalidity",
|
||||
{
|
||||
"INPUT_LAYER": layer,
|
||||
"METHOD": 2, # GEOS
|
||||
"INPUT_LAYER": reprojected,
|
||||
"METHOD": 2,
|
||||
"IGNORE_RING_SELF_INTERSECTION": False,
|
||||
"VALID_OUTPUT": "memory:",
|
||||
"INVALID_OUTPUT": "memory:",
|
||||
|
|
@ -65,34 +185,33 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
|||
print(" - Invalid geometries found:", invalid_count)
|
||||
print(" - Total error messages:", error_table.featureCount())
|
||||
|
||||
# 1.1 Fix invalid geometries
|
||||
# print("\nStep 1.1: Fix invalid geometries (FixGeometries)")
|
||||
# fixed_pre = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
||||
# summary["after_fixgeometries"] = fixed_pre.featureCount()
|
||||
# print(" - Features after FixGeometries:", fixed_pre.featureCount())
|
||||
# layer = fixed_pre
|
||||
# ========================================================
|
||||
# 3. Fix geometries
|
||||
# ========================================================
|
||||
print("\nStep 3: Fix geometries")
|
||||
fixed = processing.run(
|
||||
"native:fixgeometries",
|
||||
{"INPUT": reprojected, "OUTPUT": "memory:"}
|
||||
)["OUTPUT"]
|
||||
|
||||
# 2. Fix geometries (again)
|
||||
print("\nStep 2: Fix geometries (including self-intersections)")
|
||||
fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"]
|
||||
print(" - Valid after fix:", fixed.isValid())
|
||||
print(" - Features after fix:", fixed.featureCount())
|
||||
summary["after_fix"] = fixed.featureCount()
|
||||
|
||||
# ========================================================
|
||||
# 3. ENSURE MULTIPOLYGON (LTR compatible!!)
|
||||
# 4. Ensure MULTIPOLYGON (LTR compatible)
|
||||
# ========================================================
|
||||
print("\nStep 3: Ensure MULTIPOLYGON (LTR-safe method)")
|
||||
print("\nStep 4: Ensure MULTIPOLYGON (LTR-safe method)")
|
||||
|
||||
# Step 3.1: Pecah multiparts → single (agar bersih)
|
||||
# singleparts = processing.run(
|
||||
# "native:multiparttosingleparts",
|
||||
# {"INPUT": fixed, "OUTPUT": "memory:"}
|
||||
# )["OUTPUT"]
|
||||
# 4.1 Split multipart → singlepart
|
||||
singleparts = processing.run(
|
||||
"native:multiparttosingleparts",
|
||||
{"INPUT": fixed, "OUTPUT": "memory:"}
|
||||
)["OUTPUT"]
|
||||
|
||||
# print(" - After multiparttosingleparts:", singleparts.featureCount())
|
||||
print(" - After multipart to single:", singleparts.featureCount())
|
||||
|
||||
# Step 3.2: Promote semua polygon → multipolygon
|
||||
# 4.2 Promote all polygons → multipolygon
|
||||
multipolygon = processing.run(
|
||||
"native:promotetomulti",
|
||||
{"INPUT": fixed, "OUTPUT": "memory:"}
|
||||
|
|
@ -100,47 +219,59 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
|||
|
||||
print(" - After promotetomulti:", multipolygon.featureCount())
|
||||
print(" - Valid:", multipolygon.isValid())
|
||||
|
||||
summary["after_multipolygon"] = multipolygon.featureCount()
|
||||
|
||||
|
||||
# 4. Remove duplicate rows
|
||||
print("\nStep 4: Remove duplicate rows")
|
||||
# ========================================================
|
||||
# 5. Remove duplicates rows & vertices
|
||||
# ========================================================
|
||||
print("\nStep 5: Remove duplicate rows")
|
||||
all_fields = [f.name() for f in multipolygon.fields()]
|
||||
print(" - All fields:", all_fields)
|
||||
|
||||
if "id" in all_fields:
|
||||
key_fields = ["id"]
|
||||
else:
|
||||
int_cols = [f.name() for f in multipolygon.fields() if f.typeName().lower() in ["int", "integer", "bigint"]]
|
||||
int_cols = [
|
||||
f.name()
|
||||
for f in multipolygon.fields()
|
||||
if f.typeName().lower() in ["int", "integer", "bigint"]
|
||||
]
|
||||
key_fields = [int_cols[0]] if int_cols else all_fields
|
||||
|
||||
print(" - Using duplicate key:", key_fields)
|
||||
dedup = processing.run("native:removeduplicatesbyattribute", {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
||||
|
||||
dedup = processing.run(
|
||||
"native:removeduplicatesbyattribute",
|
||||
{"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"}
|
||||
)["OUTPUT"]
|
||||
|
||||
duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
|
||||
summary["duplicates_removed"] = duplicates_removed
|
||||
|
||||
print(" - Features before:", multipolygon.featureCount())
|
||||
print(" - Features after:", dedup.featureCount())
|
||||
print(" - Duplicates removed:", duplicates_removed)
|
||||
|
||||
# 5. Remove duplicate vertices
|
||||
print("\nStep 5: Remove duplicate vertices")
|
||||
no_dup_vertices = processing.run("native:removeduplicatevertices", {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"})["OUTPUT"]
|
||||
# Remove duplicate vertices
|
||||
print("\nStep 5.5: Remove duplicate vertices")
|
||||
no_dup_vertices = processing.run(
|
||||
"native:removeduplicatevertices",
|
||||
{"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"}
|
||||
)["OUTPUT"]
|
||||
|
||||
print(" - Features:", no_dup_vertices.featureCount())
|
||||
summary["after_remove_vertices"] = no_dup_vertices.featureCount()
|
||||
|
||||
print("\nStep 5.5: Check input CRS before reprojection")
|
||||
input_crs = no_dup_vertices.crs()
|
||||
if input_crs.isValid():
|
||||
print(" - Input CRS:", input_crs.authid())
|
||||
print(" - CRS description:", input_crs.description())
|
||||
else:
|
||||
print(" - CRS INVALID or UNDEFINED")
|
||||
# ========================================================
|
||||
# 6. FINAL STEP: final_proj tetap dipakai
|
||||
# ========================================================
|
||||
print("\nStep 6: Finalize (using final_proj variable as requested)")
|
||||
final_proj = no_dup_vertices
|
||||
print(" - Final features:", final_proj.featureCount())
|
||||
summary["after_final"] = final_proj.featureCount()
|
||||
|
||||
|
||||
|
||||
# 6. REPROJECT to metric CRS BEFORE any area-based ops (use EPSG:4326 or local UTM)
|
||||
print("\nStep 6: Reproject layer to EPSG:4326 for metric area calculations")
|
||||
# choose EPSG:4326 or better choose local UTM if you know it; EPSG:4326 is general metric
|
||||
final_proj = processing.run("native:reprojectlayer", {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"]
|
||||
print(" - Features after reproject:", final_proj.featureCount())
|
||||
summary["after_srid"] = final_proj.featureCount()
|
||||
|
||||
|
||||
# 7. Remove sliver polygons based on metric area (< 1 m^2)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user