cleaning folder
This commit is contained in:
parent
b7d88303e0
commit
a4dd6b910f
6
.gitignore
vendored
6
.gitignore
vendored
|
|
@ -1 +1,5 @@
|
|||
__pycache__/
|
||||
test_pg.py
|
||||
cleansing_service.py
|
||||
|
||||
__pycache__/
|
||||
data/
|
||||
|
|
@ -1,123 +0,0 @@
|
|||
from qgis.core import (
|
||||
QgsVectorLayer,
|
||||
QgsVectorLayerExporter,
|
||||
QgsVectorFileWriter
|
||||
)
|
||||
import processing
|
||||
from typing import Dict
|
||||
from database import build_uri
|
||||
|
||||
def load_layer(table_name: str):
|
||||
uri = build_uri(table_name)
|
||||
print('uri',uri)
|
||||
layer = QgsVectorLayer(uri, table_name, "postgres")
|
||||
print("Layer valid:", layer.isValid())
|
||||
# print("Error:", layer.error().summary())
|
||||
# print("FIELDS:", [f.name() for f in layer.fields()])
|
||||
return layer
|
||||
|
||||
def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
||||
|
||||
summary = {
|
||||
"total_features_before": layer.featureCount(),
|
||||
"invalid_geometries_before": 0,
|
||||
"invalid_geometries_fixed": 0,
|
||||
"duplicates_removed": 0,
|
||||
}
|
||||
|
||||
# -------------------------
|
||||
# 1. IDENTIFY INVALID GEOMETRY
|
||||
# -------------------------
|
||||
invalid_ids = []
|
||||
for f in layer.getFeatures():
|
||||
if not f.geometry().isGeosValid():
|
||||
invalid_ids.append(f.id())
|
||||
|
||||
summary["invalid_geometries_before"] = len(invalid_ids)
|
||||
|
||||
# -------------------------
|
||||
# 2. FIX INVALID GEOMETRY
|
||||
# -------------------------
|
||||
fixed = processing.run(
|
||||
"native:fixgeometries",
|
||||
{
|
||||
"INPUT": layer,
|
||||
"OUTPUT": "memory:"
|
||||
}
|
||||
)["OUTPUT"]
|
||||
|
||||
summary["invalid_geometries_fixed"] = len(invalid_ids)
|
||||
|
||||
# -------------------------
|
||||
# 3. AUTO DETECT DUPLICATE KEY FIELD
|
||||
# -------------------------
|
||||
all_fields = [f.name() for f in fixed.fields()]
|
||||
|
||||
print("Detecting key fields:", all_fields)
|
||||
|
||||
key_fields = None
|
||||
|
||||
# (1) Prefer kolom "id"
|
||||
if "id" in all_fields:
|
||||
key_fields = ["id"]
|
||||
|
||||
# (2) Jika tidak ada, pilih kolom integer pertama
|
||||
if key_fields is None:
|
||||
int_cols = [f.name() for f in fixed.fields()
|
||||
if f.typeName().lower() in ["int", "integer", "bigint"]]
|
||||
if int_cols:
|
||||
key_fields = [int_cols[0]]
|
||||
|
||||
# (3) Jika tetap tidak ada, pakai semua kolom
|
||||
if key_fields is None:
|
||||
key_fields = all_fields
|
||||
|
||||
print("Using key field:", key_fields)
|
||||
|
||||
# -------------------------
|
||||
# 4. REMOVE DUPLICATES BY ATTRIBUTE
|
||||
# -------------------------
|
||||
dedup = processing.run(
|
||||
"native:removeduplicatesbyattribute",
|
||||
{
|
||||
"INPUT": fixed,
|
||||
"FIELDS": key_fields,
|
||||
"METHOD": 0,
|
||||
"OUTPUT": "memory:"
|
||||
}
|
||||
)["OUTPUT"]
|
||||
|
||||
summary["duplicates_removed"] = (
|
||||
fixed.featureCount() - dedup.featureCount()
|
||||
)
|
||||
|
||||
# -------------------------
|
||||
# 5. TRIM STRING FIELDS
|
||||
# -------------------------
|
||||
trimmed = processing.run(
|
||||
"qgis:refactorfields",
|
||||
{
|
||||
"INPUT": dedup,
|
||||
"FIELDS_MAPPING": [
|
||||
{
|
||||
"expression": f"trim(\"{field.name()}\")"
|
||||
if field.typeName().lower() in ["text", "varchar"]
|
||||
else f"\"{field.name()}\"",
|
||||
"name": field.name(),
|
||||
"type": field.type(),
|
||||
"length": field.length(),
|
||||
"precision": field.precision()
|
||||
}
|
||||
for field in dedup.fields()
|
||||
],
|
||||
"OUTPUT": "memory:"
|
||||
}
|
||||
)["OUTPUT"]
|
||||
|
||||
# -------------------------
|
||||
# RESULT
|
||||
# -------------------------
|
||||
return {
|
||||
"summary": summary,
|
||||
"clean_layer": trimmed
|
||||
}
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
{ "type": "FeatureCollection",
|
||||
"features": [
|
||||
{ "type": "Feature",
|
||||
"geometry": {"type": "Point", "coordinates": [102.0, 0.5]},
|
||||
"properties": {"prop0": "value0"}
|
||||
},
|
||||
{ "type": "Feature",
|
||||
"geometry": {
|
||||
"type": "LineString",
|
||||
"coordinates": [
|
||||
[102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]
|
||||
]
|
||||
},
|
||||
"properties": {
|
||||
"prop0": "value0",
|
||||
"prop1": 0.0
|
||||
}
|
||||
},
|
||||
{ "type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Polygon",
|
||||
"coordinates": [
|
||||
[ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0],
|
||||
[100.0, 1.0], [100.0, 0.0] ]
|
||||
]
|
||||
|
||||
},
|
||||
"properties": {
|
||||
"prop0": "value0",
|
||||
"prop1": {"this": "that"}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
42
test_pg.py
42
test_pg.py
|
|
@ -1,42 +0,0 @@
|
|||
import os, sys
|
||||
|
||||
print("---- ENV CHECK ----")
|
||||
print("QGIS_PREFIX_PATH:", os.environ.get("QGIS_PREFIX_PATH"))
|
||||
print("PROJ_LIB:", os.environ.get("PROJ_LIB"))
|
||||
print("GDAL_DATA:", os.environ.get("GDAL_DATA"))
|
||||
print("QT_PLUGIN_PATH:", os.environ.get("QT_PLUGIN_PATH"))
|
||||
|
||||
import glob
|
||||
|
||||
print("\n--- SEARCH proj.db in QGIS PREFIX ---")
|
||||
print(glob.glob("/Applications/QGIS-LTR.app/**/proj.db", recursive=True))
|
||||
|
||||
|
||||
print("\n--- CHECK FILE EXISTS ---")
|
||||
print("proj.db exists? ", os.path.isfile(os.path.join(os.environ["PROJ_LIB"], "proj.db")))
|
||||
|
||||
|
||||
QGIS_APP = "/Applications/QGIS-LTR.app/Contents"
|
||||
QGIS_PREFIX = f"{QGIS_APP}/Resources"
|
||||
|
||||
os.environ["QGIS_PREFIX_PATH"] = QGIS_PREFIX
|
||||
sys.path.append(f"{QGIS_PREFIX}/python")
|
||||
sys.path.append(f"{QGIS_PREFIX}/python/plugins")
|
||||
|
||||
from qgis.core import QgsApplication, QgsVectorLayer
|
||||
|
||||
qgs = QgsApplication([], False)
|
||||
qgs.initQgis()
|
||||
|
||||
uri = (
|
||||
"dbname='gisdb' host=localhost port=5432 user='postgres' password='postgres' "
|
||||
"table=\"public\".\"dataset_metadata\" key='id' srid=4326"
|
||||
)
|
||||
|
||||
layer = QgsVectorLayer(uri, "test", "postgres")
|
||||
|
||||
print("Valid:", layer.isValid())
|
||||
print("Provider:", layer.providerType())
|
||||
print("Error summary:", layer.error().summary())
|
||||
|
||||
qgs.exitQgis()
|
||||
Loading…
Reference in New Issue
Block a user