diff --git a/.gitignore b/.gitignore index ebf0677..1a3f161 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ +.env test_pg.py cleansing_service.py +postgis_metadata.py +database.py __pycache__/ data/ \ No newline at end of file diff --git a/core/config.py b/core/config.py new file mode 100644 index 0000000..3d16f9e --- /dev/null +++ b/core/config.py @@ -0,0 +1,13 @@ +from dotenv import load_dotenv +import os + +load_dotenv() + +HOST = os.getenv("host") +PORT = os.getenv("port") +DB = os.getenv("db") +USER = os.getenv("user") +PWD = os.getenv("pwd") +SCHEMA = os.getenv("schema") +GEOM_COL = os.getenv("geom_col") + diff --git a/database.py b/database.py deleted file mode 100644 index bff73f6..0000000 --- a/database.py +++ /dev/null @@ -1,21 +0,0 @@ -POSTGIS = { - "host": "192.168.60.24", - "port": "5432", - "db": "test_postgis", - "user": "postgres", - "password": "12345" -} - -def build_uri(table_name: str) -> str: - return ( - f"dbname='{POSTGIS['db']}' " - f"host='{POSTGIS['host']}' " - f"port='{POSTGIS['port']}' " - f"user='{POSTGIS['user']}' " - f"password='{POSTGIS['password']}' " - f"sslmode=disable " - f"table=\"public\".\"{table_name}\" " - f"key='_id'" - ) - - diff --git a/full_cleansing_service.py b/full_cleansing_service.py index c92362c..2c64633 100644 --- a/full_cleansing_service.py +++ b/full_cleansing_service.py @@ -1,598 +1,181 @@ from qgis.core import ( + QgsDataSourceUri, QgsFeature, QgsVectorLayer, QgsVectorLayerExporter, - QgsVectorFileWriter + QgsVectorFileWriter, + QgsWkbTypes ) import processing from typing import Dict -from database import build_uri +from core.config import HOST,PORT,DB,USER,PWD,SCHEMA,GEOM_COL def load_layer(table_name: str): - uri = build_uri(table_name) - print('uri', uri) - layer = QgsVectorLayer(uri, table_name, "postgres") + uri = QgsDataSourceUri() + uri.setConnection(HOST, PORT, DB, USER, PWD) + uri.setDataSource(SCHEMA, table_name, GEOM_COL, "", "_id") + + layer = QgsVectorLayer(uri.uri(), table_name, "postgres") + print("Layer valid:", layer.isValid()) return layer -# def cleansing_layer(layer: QgsVectorLayer) -> Dict: - -# summary = { -# "total_features_before": layer.featureCount(), -# "invalid_geometries_before": 0, -# "invalid_geometries_fixed": 0, -# "duplicates_removed": 0, -# "sliver_removed": 0, -# "holes_removed": 0 -# } - -# # ======================================================== -# # 1. IDENTIFY INVALID GEOMETRY -# # ======================================================== -# invalid_ids = [] -# for f in layer.getFeatures(): -# if not f.geometry().isGeosValid(): -# invalid_ids.append(f.id()) - -# summary["invalid_geometries_before"] = len(invalid_ids) - -# # ======================================================== -# # 2. FIX GEOMETRIES -# # ======================================================== -# fixed = processing.run( -# "native:fixgeometries", -# { -# "INPUT": layer, -# "OUTPUT": "memory:" -# } -# )["OUTPUT"] - -# summary["invalid_geometries_fixed"] = len(invalid_ids) - -# # ======================================================== -# # 3. ENSURE MULTIPOLYGON -# # ======================================================== -# multipolygon = processing.run( -# "native:collect", -# { -# "INPUT": fixed, -# "OUTPUT": "memory:" -# } -# )["OUTPUT"] - -# # ======================================================== -# # 4. REMOVE DUPLICATE ROWS -# # ======================================================== -# all_fields = [f.name() for f in multipolygon.fields()] -# print("Detecting key fields:", all_fields) - -# key_fields = None - -# # (1) Prefer 'id' -# if "id" in all_fields: -# key_fields = ["id"] - -# # (2) Else pick first integer field -# if key_fields is None: -# int_cols = [ -# f.name() for f in multipolygon.fields() -# if f.typeName().lower() in ["int", "integer", "bigint"] -# ] -# if int_cols: -# key_fields = [int_cols[0]] - -# # (3) Else use all fields -# if key_fields is None: -# key_fields = all_fields - -# print("Using key field:", key_fields) - -# dedup = processing.run( -# "native:removeduplicatesbyattribute", -# { -# "INPUT": multipolygon, -# "FIELDS": key_fields, -# "METHOD": 0, -# "OUTPUT": "memory:" -# } -# )["OUTPUT"] - -# summary["duplicates_removed"] = ( -# multipolygon.featureCount() - dedup.featureCount() -# ) - -# # ======================================================== -# # 5. REMOVE DUPLICATE VERTICES -# # ======================================================== -# no_dup_vertices = processing.run( -# "native:removeduplicatevertices", -# { -# "INPUT": dedup, -# "VERTICES": 0, # remove exact duplicates -# "OUTPUT": "memory:" -# } -# )["OUTPUT"] - -# # ======================================================== -# # 6. FIX SRID (REPROJECT IF NEEDED) -# # ======================================================== -# # Force SRID to 4326 -# reprojected = processing.run( -# "native:reprojectlayer", -# { -# "INPUT": no_dup_vertices, -# "TARGET_CRS": "EPSG:4326", -# "OUTPUT": "memory:" -# } -# )["OUTPUT"] - -# # ======================================================== -# # 7. REMOVE SLIVER POLYGONS (< 1 m²) -# # ======================================================== -# # Filter polygons with area < 1 (threshold bisa kamu ubah) -# slivers = processing.run( -# "native:extractbyexpression", -# { -# "INPUT": reprojected, -# "EXPRESSION": "$area < 1", -# "OUTPUT": "memory:" -# } -# )["OUTPUT"] - -# summary["sliver_removed"] = slivers.featureCount() - -# # Keep only polygons with area >= 1 -# no_sliver = processing.run( -# "native:extractbyexpression", -# { -# "INPUT": reprojected, -# "EXPRESSION": "$area >= 1", -# "OUTPUT": "memory:" -# } -# )["OUTPUT"] - -# # ======================================================== -# # 8. REMOVE TINY HOLES (< 1 m²) -# # ======================================================== -# no_holes = processing.run( -# "native:deleteholes", -# { -# "INPUT": no_sliver, -# "MIN_AREA": 1, # minimum area of hole to keep -# "OUTPUT": "memory:" -# } -# )["OUTPUT"] - -# summary["holes_removed"] = 0 # can't count holes easily in PyQGIS - - -# # ======================================================== -# # 9. TRIM STRING FIELDS (ATTRIBUTE CLEANSING) -# # ======================================================== -# trimmed = processing.run( -# "qgis:refactorfields", -# { -# "INPUT": no_holes, -# "FIELDS_MAPPING": [ -# { -# "expression": f"trim(\"{field.name()}\")" -# if field.typeName().lower() in ["text", "varchar"] -# else f"\"{field.name()}\"", -# "name": field.name(), -# "type": field.type(), -# "length": field.length(), -# "precision": field.precision() -# } -# for field in no_holes.fields() -# ], -# "OUTPUT": "memory:" -# } -# )["OUTPUT"] - -# # ======================================================== -# # RETURN CLEANED LAYER -# # ======================================================== -# return { -# "summary": summary, -# "clean_layer": trimmed -# } - - -# def cleansing_layer(layer: QgsVectorLayer) -> Dict: - -# # ======================================================== -# # INITIAL STATE -# # ======================================================== -# print("\n========== START CLEANSING ==========") -# print("Step 0: Load Layer") -# print(" - Valid:", layer.isValid()) -# print(" - Feature Count:", layer.featureCount()) - -# summary = { -# "step0_features": layer.featureCount(), -# "step1_invalid_before": 0, -# "step2_after_fix": 0, -# "step3_after_multipolygon": 0, -# "step4_duplicates_removed": 0, -# "step5_after_remove_vertices": 0, -# "step6_after_srid": 0, -# "step7_sliver_removed": 0, -# "step8_after_deleteholes": 0 -# } - -# # ======================================================== -# # 1. VALIDATE GEOMETRY -# # ======================================================== -# print("\nStep 1: Identify invalid geometries") - -# invalid_ids = [] -# for f in layer.getFeatures(): -# if not f.geometry().isGeosValid(): -# invalid_ids.append(f.id()) - -# summary["step1_invalid_before"] = len(invalid_ids) - -# print(" - Invalid geometries found:", len(invalid_ids)) - -# # ======================================================== -# # 2. FIX GEOMETRIES -# # ======================================================== -# print("\nStep 2: Fix geometries") -# fixed = processing.run( -# "native:fixgeometries", -# {"INPUT": layer, "OUTPUT": "memory:"} -# )["OUTPUT"] - -# print(" - Valid:", fixed.isValid()) -# print(" - Features after fix:", fixed.featureCount()) -# summary["step2_after_fix"] = fixed.featureCount() - -# # ======================================================== -# # 3. ENSURE MULTIPOLYGON -# # ======================================================== -# print("\nStep 3: Ensure MULTIPOLYGON") -# multipolygon = processing.run( -# "native:collect", -# {"INPUT": fixed, "OUTPUT": "memory:"} -# )["OUTPUT"] - -# print(" - Valid:", multipolygon.isValid()) -# print(" - Features:", multipolygon.featureCount()) -# summary["step3_after_multipolygon"] = multipolygon.featureCount() - -# # ======================================================== -# # 4. REMOVE DUPLICATE ROWS -# # ======================================================== -# print("\nStep 4: Remove duplicate rows") - -# all_fields = [f.name() for f in multipolygon.fields()] -# print(" - All fields:", all_fields) - -# key_fields = None - -# if "id" in all_fields: -# key_fields = ["id"] -# else: -# int_cols = [ -# f.name() for f in multipolygon.fields() -# if f.typeName().lower() in ["int", "integer", "bigint"] -# ] -# if int_cols: -# key_fields = [int_cols[0]] -# else: -# key_fields = all_fields - -# print(" - Using duplicate key:", key_fields) - -# dedup = processing.run( -# "native:removeduplicatesbyattribute", -# {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"} -# )["OUTPUT"] - -# duplicates_removed = multipolygon.featureCount() - dedup.featureCount() -# summary["step4_duplicates_removed"] = duplicates_removed - -# print(" - Features before:", multipolygon.featureCount()) -# print(" - Features after:", dedup.featureCount()) -# print(" - Duplicates removed:", duplicates_removed) - -# # ======================================================== -# # 5. REMOVE DUPLICATE VERTICES -# # ======================================================== -# print("\nStep 5: Remove duplicate vertices") - -# no_dup_vertices = processing.run( -# "native:removeduplicatevertices", -# {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"} -# )["OUTPUT"] - -# print(" - Features:", no_dup_vertices.featureCount()) -# summary["step5_after_remove_vertices"] = no_dup_vertices.featureCount() - -# # ======================================================== -# # 6. FIX SRID / REPROJECT -# # ======================================================== -# print("\nStep 6: Reproject (Fix SRID to EPSG:4326)") - -# reprojected = processing.run( -# "native:reprojectlayer", -# {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"} -# )["OUTPUT"] - -# print(" - Features:", reprojected.featureCount()) -# summary["step6_after_srid"] = reprojected.featureCount() - -# # ======================================================== -# # 7. REMOVE SLIVER POLYGONS (< 1 m2) -# # ======================================================== -# print("\nStep 7: Remove sliver polygons (<1 m²)") - -# slivers = processing.run( -# "native:extractbyexpression", -# {"INPUT": reprojected, "EXPRESSION": "$area < 1", "OUTPUT": "memory:"} -# )["OUTPUT"] - -# summary["step7_sliver_removed"] = slivers.featureCount() -# print(" - Slivers found:", slivers.featureCount()) - -# no_sliver = processing.run( -# "native:extractbyexpression", -# {"INPUT": reprojected, "EXPRESSION": "$area >= 1", "OUTPUT": "memory:"} -# )["OUTPUT"] - -# print(" - Features left after removing slivers:", no_sliver.featureCount()) - -# # ======================================================== -# # 8. REMOVE TINY HOLES (< 1 m2) -# # ======================================================== -# print("\nStep 8: Remove tiny holes") - -# no_holes = processing.run( -# "native:deleteholes", -# {"INPUT": no_sliver, "MIN_AREA": 1, "OUTPUT": "memory:"} -# )["OUTPUT"] - -# print(" - Features:", no_holes.featureCount()) -# summary["step8_after_deleteholes"] = no_holes.featureCount() - -# # ======================================================== -# # FINISH (TRIM ATTRIBUTES) -# # ======================================================== -# print("\nFinal Step: Trim string fields") - -# trimmed = processing.run( -# "qgis:refactorfields", -# { -# "INPUT": no_holes, -# "FIELDS_MAPPING": [ -# { -# "expression": f"trim(\"{field.name()}\")" -# if field.typeName().lower() in ["text", "varchar"] -# else f"\"{field.name()}\"", -# "name": field.name(), -# "type": field.type(), -# "length": field.length(), -# "precision": field.precision() -# } -# for field in no_holes.fields() -# ], -# "OUTPUT": "memory:" -# } -# )["OUTPUT"] - -# print(" - Final feature count:", trimmed.featureCount()) -# print("========== CLEANSING DONE ==========\n") - -# return { -# "summary": summary, -# "clean_layer": trimmed -# } - # self-intersection def cleansing_layer(layer: QgsVectorLayer) -> Dict: - - # ======================================================== - # INITIAL STATE - # ======================================================== print("\n========== START CLEANSING ==========") print("Step 0: Load Layer") print(" - Valid:", layer.isValid()) print(" - Feature Count:", layer.featureCount()) + print(" - type:", layer.geometryType()) summary = { - "step0_features": layer.featureCount(), - "step1_invalid_before": 0, - "step1_5_self_intersections": 0, - "step2_after_fix": 0, - "step3_after_multipolygon": 0, - "step4_duplicates_removed": 0, - "step5_after_remove_vertices": 0, - "step6_after_srid": 0, - "step7_sliver_removed": 0, - "step8_after_deleteholes": 0 + "features": layer.featureCount(), + "invalid_before": 0, + "after_fixgeometries": 0, + "after_fix": 0, + "after_multipolygon": 0, + "duplicates_removed": 0, + "after_remove_vertices": 0, + "after_srid": 0, + "sliver_removed": 0, + "after_deleteholes": 0, + "valid_after": 0 } - # ======================================================== - # 1. VALIDATE GEOMETRY - # ======================================================== - print("\nStep 1: Identify invalid geometries") + # 1. Geometry validity check + print("\nStep 1: Geometry validity check (QGIS native)") + validity = processing.run( + "qgis:checkvalidity", + { + "INPUT_LAYER": layer, + "METHOD": 2, # GEOS + "IGNORE_RING_SELF_INTERSECTION": False, + "VALID_OUTPUT": "memory:", + "INVALID_OUTPUT": "memory:", + "ERROR_OUTPUT": "memory:" + } + ) + invalid_layer = validity["INVALID_OUTPUT"] + error_table = validity["ERROR_OUTPUT"] + invalid_count = invalid_layer.featureCount() + summary["invalid_before"] = invalid_count + print(" - Invalid geometries found:", invalid_count) + print(" - Total error messages:", error_table.featureCount()) - invalid_ids = [] - for f in layer.getFeatures(): - if not f.geometry().isGeosValid(): - invalid_ids.append(f.id()) + # 1.1 Fix invalid geometries + # print("\nStep 1.1: Fix invalid geometries (FixGeometries)") + # fixed_pre = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"] + # summary["after_fixgeometries"] = fixed_pre.featureCount() + # print(" - Features after FixGeometries:", fixed_pre.featureCount()) + # layer = fixed_pre - summary["step1_invalid_before"] = len(invalid_ids) - print(" - Invalid geometries found:", len(invalid_ids)) - - # ======================================================== - # 1.5 DETECT GEOMETRY ERRORS (MANUAL GEOS VALIDATION) - # ======================================================== - print("\nStep 1.5: Detect geometry errors (universal GEOS-safe method)") - - errors = [] - - for f in layer.getFeatures(): - geom = f.geometry() - if not geom.isGeosValid(): - # Kita hanya tandai invalid (tanpa reason) - errors.append(f.id()) - - summary["step1_5_geometry_errors"] = len(errors) - - print(" - Geometry errors detected:", len(errors)) - print(" - Invalid feature IDs (first 10):", errors[:10]) - - - - # ======================================================== - # 1.6 FIX INVALID GEOMETRIES (Native FixGeometries) - # ======================================================== - print("\nStep 1.6: Fix invalid geometries (FixGeometries)") - - fixed_pre = processing.run( - "native:fixgeometries", - {"INPUT": layer, "OUTPUT": "memory:"} - )["OUTPUT"] - - summary["step1_6_after_fixgeometries"] = fixed_pre.featureCount() - - print(" - Features after FixGeometries:", fixed_pre.featureCount()) - - layer = fixed_pre - - - - # ======================================================== - # 2. FIX GEOMETRIES (INCLUDES SELF-INTERSECTION FIX) - # ======================================================== + # 2. Fix geometries (again) print("\nStep 2: Fix geometries (including self-intersections)") - - fixed = processing.run( - "native:fixgeometries", - {"INPUT": layer, "OUTPUT": "memory:"} - )["OUTPUT"] - + fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"] print(" - Valid after fix:", fixed.isValid()) print(" - Features after fix:", fixed.featureCount()) - summary["step2_after_fix"] = fixed.featureCount() + summary["after_fix"] = fixed.featureCount() # ======================================================== - # 3. ENSURE MULTIPOLYGON + # 3. ENSURE MULTIPOLYGON (LTR compatible!!) # ======================================================== - print("\nStep 3: Ensure MULTIPOLYGON") + print("\nStep 3: Ensure MULTIPOLYGON (LTR-safe method)") - multipolygon = processing.run( - "native:collect", + # Step 3.1: Pecah multiparts → single (agar bersih) + singleparts = processing.run( + "native:multiparttosingleparts", {"INPUT": fixed, "OUTPUT": "memory:"} )["OUTPUT"] + print(" - After multiparttosingleparts:", singleparts.featureCount()) + + # Step 3.2: Promote semua polygon → multipolygon + multipolygon = processing.run( + "native:promotetomulti", + {"INPUT": singleparts, "OUTPUT": "memory:"} + )["OUTPUT"] + + print(" - After promotetomulti:", multipolygon.featureCount()) print(" - Valid:", multipolygon.isValid()) - print(" - Features:", multipolygon.featureCount()) - summary["step3_after_multipolygon"] = multipolygon.featureCount() - # ======================================================== - # 4. REMOVE DUPLICATE ROWS - # ======================================================== + summary["after_multipolygon"] = multipolygon.featureCount() + + + # 4. Remove duplicate rows print("\nStep 4: Remove duplicate rows") - all_fields = [f.name() for f in multipolygon.fields()] print(" - All fields:", all_fields) - if "id" in all_fields: key_fields = ["id"] else: - int_cols = [ - f.name() for f in multipolygon.fields() - if f.typeName().lower() in ["int", "integer", "bigint"] - ] + int_cols = [f.name() for f in multipolygon.fields() if f.typeName().lower() in ["int", "integer", "bigint"]] key_fields = [int_cols[0]] if int_cols else all_fields - print(" - Using duplicate key:", key_fields) - - dedup = processing.run( - "native:removeduplicatesbyattribute", - {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"} - )["OUTPUT"] - + dedup = processing.run("native:removeduplicatesbyattribute", {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"})["OUTPUT"] duplicates_removed = multipolygon.featureCount() - dedup.featureCount() - summary["step4_duplicates_removed"] = duplicates_removed - + summary["duplicates_removed"] = duplicates_removed print(" - Features before:", multipolygon.featureCount()) print(" - Features after:", dedup.featureCount()) print(" - Duplicates removed:", duplicates_removed) - # ======================================================== - # 5. REMOVE DUPLICATE VERTICES - # ======================================================== + # 5. Remove duplicate vertices print("\nStep 5: Remove duplicate vertices") - - no_dup_vertices = processing.run( - "native:removeduplicatevertices", - {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"} - )["OUTPUT"] - + no_dup_vertices = processing.run("native:removeduplicatevertices", {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"})["OUTPUT"] print(" - Features:", no_dup_vertices.featureCount()) - summary["step5_after_remove_vertices"] = no_dup_vertices.featureCount() + summary["after_remove_vertices"] = no_dup_vertices.featureCount() - # ======================================================== - # 6. FIX SRID / REPROJECT - # ======================================================== - print("\nStep 6: Reproject (Fix SRID to EPSG:4326)") + print("\nStep 5.5: Check input CRS before reprojection") + input_crs = no_dup_vertices.crs() + if input_crs.isValid(): + print(" - Input CRS:", input_crs.authid()) + print(" - CRS description:", input_crs.description()) + else: + print(" - CRS INVALID or UNDEFINED") - reprojected = processing.run( - "native:reprojectlayer", - {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"} - )["OUTPUT"] + # 6. REPROJECT to metric CRS BEFORE any area-based ops (use EPSG:4326 or local UTM) + print("\nStep 6: Reproject layer to EPSG:4326 for metric area calculations") + # choose EPSG:4326 or better choose local UTM if you know it; EPSG:4326 is general metric + final_proj = processing.run("native:reprojectlayer", {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"] + print(" - Features after reproject:", final_proj.featureCount()) + summary["after_srid"] = final_proj.featureCount() + - print(" - Features:", reprojected.featureCount()) - summary["step6_after_srid"] = reprojected.featureCount() + # 7. Remove sliver polygons based on metric area (< 1 m^2) + # print("\nStep 7: Remove sliver polygons (<1 m²)") + # # use $area now because layer is in meters (EPSG:3857) + # slivers = processing.run("native:extractbyexpression", {"INPUT": reprojected, "EXPRESSION": "$area < 1", "OUTPUT": "memory:"})["OUTPUT"] + # summary["sliver_removed"] = slivers.featureCount() + # print(" - Slivers found:", slivers.featureCount()) + # no_sliver = processing.run( + # "native:extractbyexpression", + # { + # "INPUT": reprojected, + # "EXPRESSION": "geometry IS NOT NULL AND $area >= 1", + # "OUTPUT": "memory:" + # } + # )["OUTPUT"] + # print(" - Features left after removing slivers:", no_sliver.featureCount()) - # ======================================================== - # 7. REMOVE SLIVER POLYGONS (< 1 m2) - # ======================================================== - print("\nStep 7: Remove sliver polygons (<1 m²)") + # # 8. Remove tiny holes (<1 m^2) — still in metric CRS + # print("\nStep 8: Remove tiny holes (<1 m²)") + # no_holes = processing.run("native:deleteholes", {"INPUT": no_sliver, "MIN_AREA": 1, "OUTPUT": "memory:"})["OUTPUT"] + # print(" - Features after delete holes:", no_holes.featureCount()) + # summary["after_deleteholes"] = no_holes.featureCount() - slivers = processing.run( - "native:extractbyexpression", - {"INPUT": reprojected, "EXPRESSION": "$area < 1", "OUTPUT": "memory:"} - )["OUTPUT"] + # # Reproject BACK to EPSG:4326 for downstream (GeoServer/PostGIS target) + # print("\nStep 9: Reproject back to EPSG:4326") + # final_proj = processing.run("native:reprojectlayer", {"INPUT": no_holes, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"})["OUTPUT"] + # print(" - Features:", final_proj.featureCount()) - summary["step7_sliver_removed"] = slivers.featureCount() - print(" - Slivers found:", slivers.featureCount()) - - no_sliver = processing.run( - "native:extractbyexpression", - {"INPUT": reprojected, "EXPRESSION": "$area >= 1", "OUTPUT": "memory:"} - )["OUTPUT"] - - print(" - Features left after removing slivers:", no_sliver.featureCount()) - - # ======================================================== - # 8. REMOVE TINY HOLES (< 1 m2) - # ======================================================== - print("\nStep 8: Remove tiny holes") - - no_holes = processing.run( - "native:deleteholes", - {"INPUT": no_sliver, "MIN_AREA": 1, "OUTPUT": "memory:"} - )["OUTPUT"] - - print(" - Features:", no_holes.featureCount()) - summary["step8_after_deleteholes"] = no_holes.featureCount() - - # ======================================================== - # FINAL: TRIM STRING FIELDS - # ======================================================== + # Final: Trim string fields print("\nFinal Step: Trim string fields") - trimmed = processing.run( "qgis:refactorfields", { - "INPUT": no_holes, + "INPUT": final_proj, "FIELDS_MAPPING": [ { "expression": f"trim(\"{field.name()}\")" @@ -603,19 +186,98 @@ def cleansing_layer(layer: QgsVectorLayer) -> Dict: "length": field.length(), "precision": field.precision() } - for field in no_holes.fields() + for field in final_proj.fields() ], + "KEEP_GEOMETRY": True, # <--- WAJIB "OUTPUT": "memory:" } )["OUTPUT"] + + valid_after = 0 + for f in trimmed.getFeatures(): + if f.geometry() is not None and f.geometry().isGeosValid(): + valid_after += 1 + summary["valid_after"] = valid_after + print(" - Final feature count:", trimmed.featureCount()) print("========== CLEANSING DONE ==========\n") - return { - "summary": summary, - "clean_layer": trimmed + return {"summary": summary, "clean_layer": trimmed} + + + + + + + + +def cleansing_points(layer: QgsVectorLayer): + print("\n=== POINT CLEANING PIPELINE ===") + + summary = { + "features_before": layer.featureCount(), + "invalid_before": 0, + "after_fix": 0, + "after_dedup": 0, + "after_reproject": 0, + "valid_after": 0 } + # 1. Check validity (will always return 0 errors for points) + validity = processing.run( + "qgis:checkvalidity", + {"INPUT_LAYER": layer, "METHOD": 2, "VALID_OUTPUT": "memory:", "INVALID_OUTPUT": "memory:", "ERROR_OUTPUT": "memory:"} + ) + invalid = validity["INVALID_OUTPUT"].featureCount() + summary["invalid_before"] = invalid + print("- Invalid points:", invalid) + # 2. Fix geometries (safe) + fixed = processing.run("native:fixgeometries", {"INPUT": layer, "OUTPUT": "memory:"})["OUTPUT"] + summary["after_fix"] = fixed.featureCount() + # 3. Remove duplicate coordinates (points only) + dedup = processing.run( + "native:removedduplicategeometries", + {"INPUT": fixed, "OUTPUT": "memory:"} + )["OUTPUT"] + summary["after_dedup"] = dedup.featureCount() + + # 4. Reproject + reproject = processing.run( + "native:reprojectlayer", + {"INPUT": dedup, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"} + )["OUTPUT"] + summary["after_reproject"] = reproject.featureCount() + + # 5. Trim string fields + trimmed = processing.run( + "qgis:refactorfields", + { + "INPUT": reproject, + "FIELDS_MAPPING": [ + { + "expression": f"trim(\"{field.name()}\")" if field.typeName().lower() in ["text","varchar"] + else f"\"{field.name()}\"", + "name": field.name(), + "type": field.type(), + "length": field.length(), + "precision": field.precision(), + } + for field in reproject.fields() + ], + "KEEP_GEOMETRY": True, + "OUTPUT": "memory:" + } + )["OUTPUT"] + + # 6. Validity check for points (simple) + valid_after = 0 + for f in trimmed.getFeatures(): + if f.geometry() is not None: + valid_after += 1 + + summary["valid_after"] = valid_after + + return {"summary": summary, "clean_layer": trimmed} diff --git a/main.py b/main.py index aa24752..30493f2 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,20 @@ from fastapi import FastAPI, BackgroundTasks -from qgis_bootstrap import start_qgis +import psycopg2 +import requests from uuid import uuid4 +from qgis_bootstrap import start_qgis # from cleansing_service import load_layer, cleansing_layer from full_cleansing_service import load_layer, cleansing_layer +from qgis.core import ( + QgsVectorLayer, + QgsVectorLayerExporter, + QgsDataSourceUri, + QgsProviderRegistry, + QgsCoordinateReferenceSystem +) +from qgis.PyQt.QtCore import QByteArray +from core.config import HOST,PORT,DB,USER,PWD,SCHEMA,GEOM_COL + app = FastAPI() @@ -67,70 +79,116 @@ def run_clean_table(table_name: str, job_id: str): "status": "FINISHED" } - import requests requests.post( - "http://backend-utama:8000/jobs/callback", + "http://localhost:8000/jobs/callback", json=callback_payload ) print(f"=== Cleansing selesai untuk tabel: {table_name} ===\n") +def to_python(v): + # Null + if v is None: + return None + # QVariant kosong + if hasattr(v, "isNull") and v.isNull(): + return None + # Convert QVariant to Python native + if hasattr(v, "toPyObject"): + return v.toPyObject() + # Fallback + return v +def save_to_postgis(layer, table_name): + host = HOST + port = PORT + db = DB + user = USER + pwd = PWD + schema = SCHEMA + geom_col = GEOM_COL + srid = layer.crs().postgisSrid() + fields = layer.fields() + # CONNECT + conn = psycopg2.connect( + dbname=db, + host=host, + port=port, + user=user, + password=pwd + ) + cur = conn.cursor() + # DROP TABLE + cur.execute(f'DROP TABLE IF EXISTS "{schema}"."{table_name}" CASCADE') -from qgis.core import ( - QgsVectorLayer, - QgsVectorLayerExporter, - QgsDataSourceUri -) -from database import POSTGIS + # CREATE TABLE + field_defs = [] + for f in fields: + if f.name() == geom_col: + continue + # type mapping + t = f.typeName().lower() + if "int" in t: + pg_type = "INTEGER" + elif "double" in t or "float" in t or "real" in t: + pg_type = "DOUBLE PRECISION" + else: + pg_type = "TEXT" -def save_to_postgis(clean_layer: QgsVectorLayer, table_name: str): - """ - Menghapus isi tabel dan menulis ulang hasil cleansing ke PostGIS. - Geometry harus MULTIPOLYGON dan SRID sudah benar. + col = f.name().replace(" ", "_") + field_defs.append(f'"{col}" {pg_type}') + + # geometry column + field_defs.append(f'"{geom_col}" geometry(MultiPolygon,{srid})') + + create_sql = f'CREATE TABLE "{schema}"."{table_name}" ({",".join(field_defs)});' + cur.execute(create_sql) + + # Prepare INSERT + attribute_columns = [ + f'"{f.name().replace(" ", "_")}"' + for f in fields if f.name() != geom_col + ] + insert_columns = attribute_columns + [f'"{geom_col}"'] + placeholders = ["%s"] * len(insert_columns) + + insert_sql = f""" + INSERT INTO "{schema}"."{table_name}" + ({",".join(insert_columns)}) + VALUES ({",".join(placeholders)}) """ - print(f"[DB] Menyimpan hasil cleansing ke tabel {table_name}") + # INSERT ROWS + count = 0 + for feat in layer.getFeatures(): + attrs = feat.attributes() - # ------------------------------------------- - # 1. Build URI PostGIS target - # ------------------------------------------- - uri = QgsDataSourceUri() - uri.setConnection( - POSTGIS['host'], - str(POSTGIS['port']), - POSTGIS['db'], - POSTGIS['user'], - POSTGIS['password'] - ) + row = [] + for f, v in zip(fields, attrs): + if f.name() != geom_col: + row.append(to_python(v)) - # Nama schema & tabel - schema = "public" - uri.setDataSource(schema, table_name, "geom") # geometry column = geom + geom = feat.geometry() + wkb_bytes = geom.asWkb() + if isinstance(wkb_bytes, QByteArray): + wkb_bytes = bytes(wkb_bytes) - # ------------------------------------------- - # 2. Export layer ke PostGIS (replace mode) - # ------------------------------------------- + row.append(psycopg2.Binary(wkb_bytes)) + cur.execute(insert_sql, row) + count += 1 + + conn.commit() + cur.close() + conn.close() + + print(f"[DB] Inserted features: {count}") - options = QgsVectorLayerExporter.ExportOptions() - options.actionOnExistingFile = QgsVectorLayerExporter.ActionOnExistingFile.OverwriteLayer - err_code, err_msg = QgsVectorLayerExporter.exportLayer( - clean_layer, # layer input - uri.uri(), # postgis connection uri - "postgres", # provider - clean_layer.crs(), # CRS layer - options - ) - if err_code != QgsVectorLayerExporter.NoError: - print("[DB][ERROR] Gagal menyimpan:", err_msg) - else: - print("[DB] Berhasil update tabel", table_name) diff --git a/qgis_bootstrap.py b/qgis_bootstrap.py index 5250e23..a9962e0 100644 --- a/qgis_bootstrap.py +++ b/qgis_bootstrap.py @@ -17,7 +17,7 @@ os.environ["QT_QPA_PLATFORM"] = "offscreen" sys.path.append(f"{QGIS_PREFIX}/python") sys.path.append(f"{QGIS_PREFIX}/python/plugins") -from qgis.core import QgsApplication +from qgis.core import QgsApplication, QgsProviderRegistry from qgis.analysis import QgsNativeAlgorithms import processing @@ -29,52 +29,7 @@ def start_qgis(): # === WAJIB: initialize processing === Processing.initialize() + QgsProviderRegistry.instance() qgs.processingRegistry().addProvider(QgsNativeAlgorithms()) return qgs - - - - - - - - - - - - -# DEPLOYMENT -# import os -# import sys - -# # QGIS environment -# os.environ["QGIS_PREFIX_PATH"] = "/usr" -# os.environ["QGIS_HOME"] = "/usr" - -# os.environ["PROJ_LIB"] = "/usr/share/proj" -# os.environ["GDAL_DATA"] = "/usr/share/gdal" -# os.environ["QT_PLUGIN_PATH"] = "/usr/lib/x86_64-linux-gnu/qt5/plugins" - -# os.environ["QT_QPA_PLATFORM"] = "offscreen" - -# # QGIS Python plugins (THIS IS THE MISSING PART) -# sys.path.append("/usr/share/qgis/python") -# sys.path.append("/usr/share/qgis/python/plugins") - -# # Python modules (from system) -# sys.path.append("/usr/lib/python3/dist-packages") -# sys.path.append("/usr/lib/python3/dist-packages/qgis") - - -# from qgis.core import QgsApplication -# from qgis.analysis import QgsNativeAlgorithms -# import processing -# from processing.core.Processing import Processing - -# def start_qgis(): -# qgs = QgsApplication([], False) -# qgs.initQgis() -# Processing.initialize() -# qgs.processingRegistry().addProvider(QgsNativeAlgorithms()) -# return qgs