diff --git a/.gitignore b/.gitignore index ba0430d..ebf0677 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ -__pycache__/ \ No newline at end of file +test_pg.py +cleansing_service.py + +__pycache__/ +data/ \ No newline at end of file diff --git a/cleansing_service.py b/cleansing_service.py deleted file mode 100644 index a3e3668..0000000 --- a/cleansing_service.py +++ /dev/null @@ -1,123 +0,0 @@ -from qgis.core import ( - QgsVectorLayer, - QgsVectorLayerExporter, - QgsVectorFileWriter -) -import processing -from typing import Dict -from database import build_uri - -def load_layer(table_name: str): - uri = build_uri(table_name) - print('uri',uri) - layer = QgsVectorLayer(uri, table_name, "postgres") - print("Layer valid:", layer.isValid()) - # print("Error:", layer.error().summary()) - # print("FIELDS:", [f.name() for f in layer.fields()]) - return layer - -def cleansing_layer(layer: QgsVectorLayer) -> Dict: - - summary = { - "total_features_before": layer.featureCount(), - "invalid_geometries_before": 0, - "invalid_geometries_fixed": 0, - "duplicates_removed": 0, - } - - # ------------------------- - # 1. IDENTIFY INVALID GEOMETRY - # ------------------------- - invalid_ids = [] - for f in layer.getFeatures(): - if not f.geometry().isGeosValid(): - invalid_ids.append(f.id()) - - summary["invalid_geometries_before"] = len(invalid_ids) - - # ------------------------- - # 2. FIX INVALID GEOMETRY - # ------------------------- - fixed = processing.run( - "native:fixgeometries", - { - "INPUT": layer, - "OUTPUT": "memory:" - } - )["OUTPUT"] - - summary["invalid_geometries_fixed"] = len(invalid_ids) - - # ------------------------- - # 3. AUTO DETECT DUPLICATE KEY FIELD - # ------------------------- - all_fields = [f.name() for f in fixed.fields()] - - print("Detecting key fields:", all_fields) - - key_fields = None - - # (1) Prefer kolom "id" - if "id" in all_fields: - key_fields = ["id"] - - # (2) Jika tidak ada, pilih kolom integer pertama - if key_fields is None: - int_cols = [f.name() for f in fixed.fields() - if f.typeName().lower() in ["int", "integer", "bigint"]] - if int_cols: - key_fields = [int_cols[0]] - - # (3) Jika tetap tidak ada, pakai semua kolom - if key_fields is None: - key_fields = all_fields - - print("Using key field:", key_fields) - - # ------------------------- - # 4. REMOVE DUPLICATES BY ATTRIBUTE - # ------------------------- - dedup = processing.run( - "native:removeduplicatesbyattribute", - { - "INPUT": fixed, - "FIELDS": key_fields, - "METHOD": 0, - "OUTPUT": "memory:" - } - )["OUTPUT"] - - summary["duplicates_removed"] = ( - fixed.featureCount() - dedup.featureCount() - ) - - # ------------------------- - # 5. TRIM STRING FIELDS - # ------------------------- - trimmed = processing.run( - "qgis:refactorfields", - { - "INPUT": dedup, - "FIELDS_MAPPING": [ - { - "expression": f"trim(\"{field.name()}\")" - if field.typeName().lower() in ["text", "varchar"] - else f"\"{field.name()}\"", - "name": field.name(), - "type": field.type(), - "length": field.length(), - "precision": field.precision() - } - for field in dedup.fields() - ], - "OUTPUT": "memory:" - } - )["OUTPUT"] - - # ------------------------- - # RESULT - # ------------------------- - return { - "summary": summary, - "clean_layer": trimmed - } diff --git a/data/exmpl.geojson b/data/exmpl.geojson deleted file mode 100644 index 99c1af9..0000000 --- a/data/exmpl.geojson +++ /dev/null @@ -1,34 +0,0 @@ -{ "type": "FeatureCollection", - "features": [ - { "type": "Feature", - "geometry": {"type": "Point", "coordinates": [102.0, 0.5]}, - "properties": {"prop0": "value0"} - }, - { "type": "Feature", - "geometry": { - "type": "LineString", - "coordinates": [ - [102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0] - ] - }, - "properties": { - "prop0": "value0", - "prop1": 0.0 - } - }, - { "type": "Feature", - "geometry": { - "type": "Polygon", - "coordinates": [ - [ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0], - [100.0, 1.0], [100.0, 0.0] ] - ] - - }, - "properties": { - "prop0": "value0", - "prop1": {"this": "that"} - } - } - ] - } \ No newline at end of file diff --git a/test_pg.py b/test_pg.py deleted file mode 100644 index b3b5f76..0000000 --- a/test_pg.py +++ /dev/null @@ -1,42 +0,0 @@ -import os, sys - -print("---- ENV CHECK ----") -print("QGIS_PREFIX_PATH:", os.environ.get("QGIS_PREFIX_PATH")) -print("PROJ_LIB:", os.environ.get("PROJ_LIB")) -print("GDAL_DATA:", os.environ.get("GDAL_DATA")) -print("QT_PLUGIN_PATH:", os.environ.get("QT_PLUGIN_PATH")) - -import glob - -print("\n--- SEARCH proj.db in QGIS PREFIX ---") -print(glob.glob("/Applications/QGIS-LTR.app/**/proj.db", recursive=True)) - - -print("\n--- CHECK FILE EXISTS ---") -print("proj.db exists? ", os.path.isfile(os.path.join(os.environ["PROJ_LIB"], "proj.db"))) - - -QGIS_APP = "/Applications/QGIS-LTR.app/Contents" -QGIS_PREFIX = f"{QGIS_APP}/Resources" - -os.environ["QGIS_PREFIX_PATH"] = QGIS_PREFIX -sys.path.append(f"{QGIS_PREFIX}/python") -sys.path.append(f"{QGIS_PREFIX}/python/plugins") - -from qgis.core import QgsApplication, QgsVectorLayer - -qgs = QgsApplication([], False) -qgs.initQgis() - -uri = ( - "dbname='gisdb' host=localhost port=5432 user='postgres' password='postgres' " - "table=\"public\".\"dataset_metadata\" key='id' srid=4326" -) - -layer = QgsVectorLayer(uri, "test", "postgres") - -print("Valid:", layer.isValid()) -print("Provider:", layer.providerType()) -print("Error summary:", layer.error().summary()) - -qgs.exitQgis()