cleaning folder

This commit is contained in:
DmsAnhr 2025-11-26 09:48:48 +07:00
parent b7d88303e0
commit a4dd6b910f
4 changed files with 5 additions and 200 deletions

4
.gitignore vendored
View File

@ -1 +1,5 @@
test_pg.py
cleansing_service.py
__pycache__/
data/

View File

@ -1,123 +0,0 @@
from qgis.core import (
QgsVectorLayer,
QgsVectorLayerExporter,
QgsVectorFileWriter
)
import processing
from typing import Dict
from database import build_uri
def load_layer(table_name: str):
uri = build_uri(table_name)
print('uri',uri)
layer = QgsVectorLayer(uri, table_name, "postgres")
print("Layer valid:", layer.isValid())
# print("Error:", layer.error().summary())
# print("FIELDS:", [f.name() for f in layer.fields()])
return layer
def cleansing_layer(layer: QgsVectorLayer) -> Dict:
summary = {
"total_features_before": layer.featureCount(),
"invalid_geometries_before": 0,
"invalid_geometries_fixed": 0,
"duplicates_removed": 0,
}
# -------------------------
# 1. IDENTIFY INVALID GEOMETRY
# -------------------------
invalid_ids = []
for f in layer.getFeatures():
if not f.geometry().isGeosValid():
invalid_ids.append(f.id())
summary["invalid_geometries_before"] = len(invalid_ids)
# -------------------------
# 2. FIX INVALID GEOMETRY
# -------------------------
fixed = processing.run(
"native:fixgeometries",
{
"INPUT": layer,
"OUTPUT": "memory:"
}
)["OUTPUT"]
summary["invalid_geometries_fixed"] = len(invalid_ids)
# -------------------------
# 3. AUTO DETECT DUPLICATE KEY FIELD
# -------------------------
all_fields = [f.name() for f in fixed.fields()]
print("Detecting key fields:", all_fields)
key_fields = None
# (1) Prefer kolom "id"
if "id" in all_fields:
key_fields = ["id"]
# (2) Jika tidak ada, pilih kolom integer pertama
if key_fields is None:
int_cols = [f.name() for f in fixed.fields()
if f.typeName().lower() in ["int", "integer", "bigint"]]
if int_cols:
key_fields = [int_cols[0]]
# (3) Jika tetap tidak ada, pakai semua kolom
if key_fields is None:
key_fields = all_fields
print("Using key field:", key_fields)
# -------------------------
# 4. REMOVE DUPLICATES BY ATTRIBUTE
# -------------------------
dedup = processing.run(
"native:removeduplicatesbyattribute",
{
"INPUT": fixed,
"FIELDS": key_fields,
"METHOD": 0,
"OUTPUT": "memory:"
}
)["OUTPUT"]
summary["duplicates_removed"] = (
fixed.featureCount() - dedup.featureCount()
)
# -------------------------
# 5. TRIM STRING FIELDS
# -------------------------
trimmed = processing.run(
"qgis:refactorfields",
{
"INPUT": dedup,
"FIELDS_MAPPING": [
{
"expression": f"trim(\"{field.name()}\")"
if field.typeName().lower() in ["text", "varchar"]
else f"\"{field.name()}\"",
"name": field.name(),
"type": field.type(),
"length": field.length(),
"precision": field.precision()
}
for field in dedup.fields()
],
"OUTPUT": "memory:"
}
)["OUTPUT"]
# -------------------------
# RESULT
# -------------------------
return {
"summary": summary,
"clean_layer": trimmed
}

View File

@ -1,34 +0,0 @@
{ "type": "FeatureCollection",
"features": [
{ "type": "Feature",
"geometry": {"type": "Point", "coordinates": [102.0, 0.5]},
"properties": {"prop0": "value0"}
},
{ "type": "Feature",
"geometry": {
"type": "LineString",
"coordinates": [
[102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]
]
},
"properties": {
"prop0": "value0",
"prop1": 0.0
}
},
{ "type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0],
[100.0, 1.0], [100.0, 0.0] ]
]
},
"properties": {
"prop0": "value0",
"prop1": {"this": "that"}
}
}
]
}

View File

@ -1,42 +0,0 @@
import os, sys
print("---- ENV CHECK ----")
print("QGIS_PREFIX_PATH:", os.environ.get("QGIS_PREFIX_PATH"))
print("PROJ_LIB:", os.environ.get("PROJ_LIB"))
print("GDAL_DATA:", os.environ.get("GDAL_DATA"))
print("QT_PLUGIN_PATH:", os.environ.get("QT_PLUGIN_PATH"))
import glob
print("\n--- SEARCH proj.db in QGIS PREFIX ---")
print(glob.glob("/Applications/QGIS-LTR.app/**/proj.db", recursive=True))
print("\n--- CHECK FILE EXISTS ---")
print("proj.db exists? ", os.path.isfile(os.path.join(os.environ["PROJ_LIB"], "proj.db")))
QGIS_APP = "/Applications/QGIS-LTR.app/Contents"
QGIS_PREFIX = f"{QGIS_APP}/Resources"
os.environ["QGIS_PREFIX_PATH"] = QGIS_PREFIX
sys.path.append(f"{QGIS_PREFIX}/python")
sys.path.append(f"{QGIS_PREFIX}/python/plugins")
from qgis.core import QgsApplication, QgsVectorLayer
qgs = QgsApplication([], False)
qgs.initQgis()
uri = (
"dbname='gisdb' host=localhost port=5432 user='postgres' password='postgres' "
"table=\"public\".\"dataset_metadata\" key='id' srid=4326"
)
layer = QgsVectorLayer(uri, "test", "postgres")
print("Valid:", layer.isValid())
print("Provider:", layer.providerType())
print("Error summary:", layer.error().summary())
qgs.exitQgis()