init qgis headless
This commit is contained in:
commit
b46b1bb4ba
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
__pycache__/
|
||||||
123
cleansing_service.py
Normal file
123
cleansing_service.py
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
from qgis.core import (
|
||||||
|
QgsVectorLayer,
|
||||||
|
QgsVectorLayerExporter,
|
||||||
|
QgsVectorFileWriter
|
||||||
|
)
|
||||||
|
import processing
|
||||||
|
from typing import Dict
|
||||||
|
from database import build_uri
|
||||||
|
|
||||||
|
def load_layer(table_name: str):
|
||||||
|
uri = build_uri(table_name)
|
||||||
|
print('uri',uri)
|
||||||
|
layer = QgsVectorLayer(uri, table_name, "postgres")
|
||||||
|
print("Layer valid:", layer.isValid())
|
||||||
|
# print("Error:", layer.error().summary())
|
||||||
|
# print("FIELDS:", [f.name() for f in layer.fields()])
|
||||||
|
return layer
|
||||||
|
|
||||||
|
def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
||||||
|
|
||||||
|
summary = {
|
||||||
|
"total_features_before": layer.featureCount(),
|
||||||
|
"invalid_geometries_before": 0,
|
||||||
|
"invalid_geometries_fixed": 0,
|
||||||
|
"duplicates_removed": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# 1. IDENTIFY INVALID GEOMETRY
|
||||||
|
# -------------------------
|
||||||
|
invalid_ids = []
|
||||||
|
for f in layer.getFeatures():
|
||||||
|
if not f.geometry().isGeosValid():
|
||||||
|
invalid_ids.append(f.id())
|
||||||
|
|
||||||
|
summary["invalid_geometries_before"] = len(invalid_ids)
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# 2. FIX INVALID GEOMETRY
|
||||||
|
# -------------------------
|
||||||
|
fixed = processing.run(
|
||||||
|
"native:fixgeometries",
|
||||||
|
{
|
||||||
|
"INPUT": layer,
|
||||||
|
"OUTPUT": "memory:"
|
||||||
|
}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
summary["invalid_geometries_fixed"] = len(invalid_ids)
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# 3. AUTO DETECT DUPLICATE KEY FIELD
|
||||||
|
# -------------------------
|
||||||
|
all_fields = [f.name() for f in fixed.fields()]
|
||||||
|
|
||||||
|
print("Detecting key fields:", all_fields)
|
||||||
|
|
||||||
|
key_fields = None
|
||||||
|
|
||||||
|
# (1) Prefer kolom "id"
|
||||||
|
if "id" in all_fields:
|
||||||
|
key_fields = ["id"]
|
||||||
|
|
||||||
|
# (2) Jika tidak ada, pilih kolom integer pertama
|
||||||
|
if key_fields is None:
|
||||||
|
int_cols = [f.name() for f in fixed.fields()
|
||||||
|
if f.typeName().lower() in ["int", "integer", "bigint"]]
|
||||||
|
if int_cols:
|
||||||
|
key_fields = [int_cols[0]]
|
||||||
|
|
||||||
|
# (3) Jika tetap tidak ada, pakai semua kolom
|
||||||
|
if key_fields is None:
|
||||||
|
key_fields = all_fields
|
||||||
|
|
||||||
|
print("Using key field:", key_fields)
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# 4. REMOVE DUPLICATES BY ATTRIBUTE
|
||||||
|
# -------------------------
|
||||||
|
dedup = processing.run(
|
||||||
|
"native:removeduplicatesbyattribute",
|
||||||
|
{
|
||||||
|
"INPUT": fixed,
|
||||||
|
"FIELDS": key_fields,
|
||||||
|
"METHOD": 0,
|
||||||
|
"OUTPUT": "memory:"
|
||||||
|
}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
summary["duplicates_removed"] = (
|
||||||
|
fixed.featureCount() - dedup.featureCount()
|
||||||
|
)
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# 5. TRIM STRING FIELDS
|
||||||
|
# -------------------------
|
||||||
|
trimmed = processing.run(
|
||||||
|
"qgis:refactorfields",
|
||||||
|
{
|
||||||
|
"INPUT": dedup,
|
||||||
|
"FIELDS_MAPPING": [
|
||||||
|
{
|
||||||
|
"expression": f"trim(\"{field.name()}\")"
|
||||||
|
if field.typeName().lower() in ["text", "varchar"]
|
||||||
|
else f"\"{field.name()}\"",
|
||||||
|
"name": field.name(),
|
||||||
|
"type": field.type(),
|
||||||
|
"length": field.length(),
|
||||||
|
"precision": field.precision()
|
||||||
|
}
|
||||||
|
for field in dedup.fields()
|
||||||
|
],
|
||||||
|
"OUTPUT": "memory:"
|
||||||
|
}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# RESULT
|
||||||
|
# -------------------------
|
||||||
|
return {
|
||||||
|
"summary": summary,
|
||||||
|
"clean_layer": trimmed
|
||||||
|
}
|
||||||
34
data/exmpl.geojson
Normal file
34
data/exmpl.geojson
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
{ "type": "FeatureCollection",
|
||||||
|
"features": [
|
||||||
|
{ "type": "Feature",
|
||||||
|
"geometry": {"type": "Point", "coordinates": [102.0, 0.5]},
|
||||||
|
"properties": {"prop0": "value0"}
|
||||||
|
},
|
||||||
|
{ "type": "Feature",
|
||||||
|
"geometry": {
|
||||||
|
"type": "LineString",
|
||||||
|
"coordinates": [
|
||||||
|
[102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"properties": {
|
||||||
|
"prop0": "value0",
|
||||||
|
"prop1": 0.0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{ "type": "Feature",
|
||||||
|
"geometry": {
|
||||||
|
"type": "Polygon",
|
||||||
|
"coordinates": [
|
||||||
|
[ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0],
|
||||||
|
[100.0, 1.0], [100.0, 0.0] ]
|
||||||
|
]
|
||||||
|
|
||||||
|
},
|
||||||
|
"properties": {
|
||||||
|
"prop0": "value0",
|
||||||
|
"prop1": {"this": "that"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
21
database.py
Normal file
21
database.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
POSTGIS = {
|
||||||
|
"host": "192.168.60.24",
|
||||||
|
"port": "5432",
|
||||||
|
"db": "test_postgis",
|
||||||
|
"user": "postgres",
|
||||||
|
"password": "12345"
|
||||||
|
}
|
||||||
|
|
||||||
|
def build_uri(table_name: str) -> str:
|
||||||
|
return (
|
||||||
|
f"dbname='{POSTGIS['db']}' "
|
||||||
|
f"host='{POSTGIS['host']}' "
|
||||||
|
f"port='{POSTGIS['port']}' "
|
||||||
|
f"user='{POSTGIS['user']}' "
|
||||||
|
f"password='{POSTGIS['password']}' "
|
||||||
|
f"sslmode=disable "
|
||||||
|
f"table=\"public\".\"{table_name}\" "
|
||||||
|
f"key='_id'"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
597
full_cleansing_service.py
Normal file
597
full_cleansing_service.py
Normal file
|
|
@ -0,0 +1,597 @@
|
||||||
|
from qgis.core import (
|
||||||
|
QgsVectorLayer,
|
||||||
|
QgsVectorLayerExporter,
|
||||||
|
QgsVectorFileWriter
|
||||||
|
)
|
||||||
|
import processing
|
||||||
|
from typing import Dict
|
||||||
|
from database import build_uri
|
||||||
|
|
||||||
|
def load_layer(table_name: str):
|
||||||
|
uri = build_uri(table_name)
|
||||||
|
print('uri', uri)
|
||||||
|
layer = QgsVectorLayer(uri, table_name, "postgres")
|
||||||
|
print("Layer valid:", layer.isValid())
|
||||||
|
return layer
|
||||||
|
|
||||||
|
|
||||||
|
# def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
||||||
|
|
||||||
|
# summary = {
|
||||||
|
# "total_features_before": layer.featureCount(),
|
||||||
|
# "invalid_geometries_before": 0,
|
||||||
|
# "invalid_geometries_fixed": 0,
|
||||||
|
# "duplicates_removed": 0,
|
||||||
|
# "sliver_removed": 0,
|
||||||
|
# "holes_removed": 0
|
||||||
|
# }
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 1. IDENTIFY INVALID GEOMETRY
|
||||||
|
# # ========================================================
|
||||||
|
# invalid_ids = []
|
||||||
|
# for f in layer.getFeatures():
|
||||||
|
# if not f.geometry().isGeosValid():
|
||||||
|
# invalid_ids.append(f.id())
|
||||||
|
|
||||||
|
# summary["invalid_geometries_before"] = len(invalid_ids)
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 2. FIX GEOMETRIES
|
||||||
|
# # ========================================================
|
||||||
|
# fixed = processing.run(
|
||||||
|
# "native:fixgeometries",
|
||||||
|
# {
|
||||||
|
# "INPUT": layer,
|
||||||
|
# "OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# summary["invalid_geometries_fixed"] = len(invalid_ids)
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 3. ENSURE MULTIPOLYGON
|
||||||
|
# # ========================================================
|
||||||
|
# multipolygon = processing.run(
|
||||||
|
# "native:collect",
|
||||||
|
# {
|
||||||
|
# "INPUT": fixed,
|
||||||
|
# "OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 4. REMOVE DUPLICATE ROWS
|
||||||
|
# # ========================================================
|
||||||
|
# all_fields = [f.name() for f in multipolygon.fields()]
|
||||||
|
# print("Detecting key fields:", all_fields)
|
||||||
|
|
||||||
|
# key_fields = None
|
||||||
|
|
||||||
|
# # (1) Prefer 'id'
|
||||||
|
# if "id" in all_fields:
|
||||||
|
# key_fields = ["id"]
|
||||||
|
|
||||||
|
# # (2) Else pick first integer field
|
||||||
|
# if key_fields is None:
|
||||||
|
# int_cols = [
|
||||||
|
# f.name() for f in multipolygon.fields()
|
||||||
|
# if f.typeName().lower() in ["int", "integer", "bigint"]
|
||||||
|
# ]
|
||||||
|
# if int_cols:
|
||||||
|
# key_fields = [int_cols[0]]
|
||||||
|
|
||||||
|
# # (3) Else use all fields
|
||||||
|
# if key_fields is None:
|
||||||
|
# key_fields = all_fields
|
||||||
|
|
||||||
|
# print("Using key field:", key_fields)
|
||||||
|
|
||||||
|
# dedup = processing.run(
|
||||||
|
# "native:removeduplicatesbyattribute",
|
||||||
|
# {
|
||||||
|
# "INPUT": multipolygon,
|
||||||
|
# "FIELDS": key_fields,
|
||||||
|
# "METHOD": 0,
|
||||||
|
# "OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# summary["duplicates_removed"] = (
|
||||||
|
# multipolygon.featureCount() - dedup.featureCount()
|
||||||
|
# )
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 5. REMOVE DUPLICATE VERTICES
|
||||||
|
# # ========================================================
|
||||||
|
# no_dup_vertices = processing.run(
|
||||||
|
# "native:removeduplicatevertices",
|
||||||
|
# {
|
||||||
|
# "INPUT": dedup,
|
||||||
|
# "VERTICES": 0, # remove exact duplicates
|
||||||
|
# "OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 6. FIX SRID (REPROJECT IF NEEDED)
|
||||||
|
# # ========================================================
|
||||||
|
# # Force SRID to 4326
|
||||||
|
# reprojected = processing.run(
|
||||||
|
# "native:reprojectlayer",
|
||||||
|
# {
|
||||||
|
# "INPUT": no_dup_vertices,
|
||||||
|
# "TARGET_CRS": "EPSG:4326",
|
||||||
|
# "OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 7. REMOVE SLIVER POLYGONS (< 1 m²)
|
||||||
|
# # ========================================================
|
||||||
|
# # Filter polygons with area < 1 (threshold bisa kamu ubah)
|
||||||
|
# slivers = processing.run(
|
||||||
|
# "native:extractbyexpression",
|
||||||
|
# {
|
||||||
|
# "INPUT": reprojected,
|
||||||
|
# "EXPRESSION": "$area < 1",
|
||||||
|
# "OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# summary["sliver_removed"] = slivers.featureCount()
|
||||||
|
|
||||||
|
# # Keep only polygons with area >= 1
|
||||||
|
# no_sliver = processing.run(
|
||||||
|
# "native:extractbyexpression",
|
||||||
|
# {
|
||||||
|
# "INPUT": reprojected,
|
||||||
|
# "EXPRESSION": "$area >= 1",
|
||||||
|
# "OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 8. REMOVE TINY HOLES (< 1 m²)
|
||||||
|
# # ========================================================
|
||||||
|
# no_holes = processing.run(
|
||||||
|
# "native:deleteholes",
|
||||||
|
# {
|
||||||
|
# "INPUT": no_sliver,
|
||||||
|
# "MIN_AREA": 1, # minimum area of hole to keep
|
||||||
|
# "OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# summary["holes_removed"] = 0 # can't count holes easily in PyQGIS
|
||||||
|
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 9. TRIM STRING FIELDS (ATTRIBUTE CLEANSING)
|
||||||
|
# # ========================================================
|
||||||
|
# trimmed = processing.run(
|
||||||
|
# "qgis:refactorfields",
|
||||||
|
# {
|
||||||
|
# "INPUT": no_holes,
|
||||||
|
# "FIELDS_MAPPING": [
|
||||||
|
# {
|
||||||
|
# "expression": f"trim(\"{field.name()}\")"
|
||||||
|
# if field.typeName().lower() in ["text", "varchar"]
|
||||||
|
# else f"\"{field.name()}\"",
|
||||||
|
# "name": field.name(),
|
||||||
|
# "type": field.type(),
|
||||||
|
# "length": field.length(),
|
||||||
|
# "precision": field.precision()
|
||||||
|
# }
|
||||||
|
# for field in no_holes.fields()
|
||||||
|
# ],
|
||||||
|
# "OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # RETURN CLEANED LAYER
|
||||||
|
# # ========================================================
|
||||||
|
# return {
|
||||||
|
# "summary": summary,
|
||||||
|
# "clean_layer": trimmed
|
||||||
|
# }
|
||||||
|
|
||||||
|
|
||||||
|
# def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # INITIAL STATE
|
||||||
|
# # ========================================================
|
||||||
|
# print("\n========== START CLEANSING ==========")
|
||||||
|
# print("Step 0: Load Layer")
|
||||||
|
# print(" - Valid:", layer.isValid())
|
||||||
|
# print(" - Feature Count:", layer.featureCount())
|
||||||
|
|
||||||
|
# summary = {
|
||||||
|
# "step0_features": layer.featureCount(),
|
||||||
|
# "step1_invalid_before": 0,
|
||||||
|
# "step2_after_fix": 0,
|
||||||
|
# "step3_after_multipolygon": 0,
|
||||||
|
# "step4_duplicates_removed": 0,
|
||||||
|
# "step5_after_remove_vertices": 0,
|
||||||
|
# "step6_after_srid": 0,
|
||||||
|
# "step7_sliver_removed": 0,
|
||||||
|
# "step8_after_deleteholes": 0
|
||||||
|
# }
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 1. VALIDATE GEOMETRY
|
||||||
|
# # ========================================================
|
||||||
|
# print("\nStep 1: Identify invalid geometries")
|
||||||
|
|
||||||
|
# invalid_ids = []
|
||||||
|
# for f in layer.getFeatures():
|
||||||
|
# if not f.geometry().isGeosValid():
|
||||||
|
# invalid_ids.append(f.id())
|
||||||
|
|
||||||
|
# summary["step1_invalid_before"] = len(invalid_ids)
|
||||||
|
|
||||||
|
# print(" - Invalid geometries found:", len(invalid_ids))
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 2. FIX GEOMETRIES
|
||||||
|
# # ========================================================
|
||||||
|
# print("\nStep 2: Fix geometries")
|
||||||
|
# fixed = processing.run(
|
||||||
|
# "native:fixgeometries",
|
||||||
|
# {"INPUT": layer, "OUTPUT": "memory:"}
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# print(" - Valid:", fixed.isValid())
|
||||||
|
# print(" - Features after fix:", fixed.featureCount())
|
||||||
|
# summary["step2_after_fix"] = fixed.featureCount()
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 3. ENSURE MULTIPOLYGON
|
||||||
|
# # ========================================================
|
||||||
|
# print("\nStep 3: Ensure MULTIPOLYGON")
|
||||||
|
# multipolygon = processing.run(
|
||||||
|
# "native:collect",
|
||||||
|
# {"INPUT": fixed, "OUTPUT": "memory:"}
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# print(" - Valid:", multipolygon.isValid())
|
||||||
|
# print(" - Features:", multipolygon.featureCount())
|
||||||
|
# summary["step3_after_multipolygon"] = multipolygon.featureCount()
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 4. REMOVE DUPLICATE ROWS
|
||||||
|
# # ========================================================
|
||||||
|
# print("\nStep 4: Remove duplicate rows")
|
||||||
|
|
||||||
|
# all_fields = [f.name() for f in multipolygon.fields()]
|
||||||
|
# print(" - All fields:", all_fields)
|
||||||
|
|
||||||
|
# key_fields = None
|
||||||
|
|
||||||
|
# if "id" in all_fields:
|
||||||
|
# key_fields = ["id"]
|
||||||
|
# else:
|
||||||
|
# int_cols = [
|
||||||
|
# f.name() for f in multipolygon.fields()
|
||||||
|
# if f.typeName().lower() in ["int", "integer", "bigint"]
|
||||||
|
# ]
|
||||||
|
# if int_cols:
|
||||||
|
# key_fields = [int_cols[0]]
|
||||||
|
# else:
|
||||||
|
# key_fields = all_fields
|
||||||
|
|
||||||
|
# print(" - Using duplicate key:", key_fields)
|
||||||
|
|
||||||
|
# dedup = processing.run(
|
||||||
|
# "native:removeduplicatesbyattribute",
|
||||||
|
# {"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"}
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
|
||||||
|
# summary["step4_duplicates_removed"] = duplicates_removed
|
||||||
|
|
||||||
|
# print(" - Features before:", multipolygon.featureCount())
|
||||||
|
# print(" - Features after:", dedup.featureCount())
|
||||||
|
# print(" - Duplicates removed:", duplicates_removed)
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 5. REMOVE DUPLICATE VERTICES
|
||||||
|
# # ========================================================
|
||||||
|
# print("\nStep 5: Remove duplicate vertices")
|
||||||
|
|
||||||
|
# no_dup_vertices = processing.run(
|
||||||
|
# "native:removeduplicatevertices",
|
||||||
|
# {"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"}
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# print(" - Features:", no_dup_vertices.featureCount())
|
||||||
|
# summary["step5_after_remove_vertices"] = no_dup_vertices.featureCount()
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 6. FIX SRID / REPROJECT
|
||||||
|
# # ========================================================
|
||||||
|
# print("\nStep 6: Reproject (Fix SRID to EPSG:4326)")
|
||||||
|
|
||||||
|
# reprojected = processing.run(
|
||||||
|
# "native:reprojectlayer",
|
||||||
|
# {"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"}
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# print(" - Features:", reprojected.featureCount())
|
||||||
|
# summary["step6_after_srid"] = reprojected.featureCount()
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 7. REMOVE SLIVER POLYGONS (< 1 m2)
|
||||||
|
# # ========================================================
|
||||||
|
# print("\nStep 7: Remove sliver polygons (<1 m²)")
|
||||||
|
|
||||||
|
# slivers = processing.run(
|
||||||
|
# "native:extractbyexpression",
|
||||||
|
# {"INPUT": reprojected, "EXPRESSION": "$area < 1", "OUTPUT": "memory:"}
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# summary["step7_sliver_removed"] = slivers.featureCount()
|
||||||
|
# print(" - Slivers found:", slivers.featureCount())
|
||||||
|
|
||||||
|
# no_sliver = processing.run(
|
||||||
|
# "native:extractbyexpression",
|
||||||
|
# {"INPUT": reprojected, "EXPRESSION": "$area >= 1", "OUTPUT": "memory:"}
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# print(" - Features left after removing slivers:", no_sliver.featureCount())
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # 8. REMOVE TINY HOLES (< 1 m2)
|
||||||
|
# # ========================================================
|
||||||
|
# print("\nStep 8: Remove tiny holes")
|
||||||
|
|
||||||
|
# no_holes = processing.run(
|
||||||
|
# "native:deleteholes",
|
||||||
|
# {"INPUT": no_sliver, "MIN_AREA": 1, "OUTPUT": "memory:"}
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# print(" - Features:", no_holes.featureCount())
|
||||||
|
# summary["step8_after_deleteholes"] = no_holes.featureCount()
|
||||||
|
|
||||||
|
# # ========================================================
|
||||||
|
# # FINISH (TRIM ATTRIBUTES)
|
||||||
|
# # ========================================================
|
||||||
|
# print("\nFinal Step: Trim string fields")
|
||||||
|
|
||||||
|
# trimmed = processing.run(
|
||||||
|
# "qgis:refactorfields",
|
||||||
|
# {
|
||||||
|
# "INPUT": no_holes,
|
||||||
|
# "FIELDS_MAPPING": [
|
||||||
|
# {
|
||||||
|
# "expression": f"trim(\"{field.name()}\")"
|
||||||
|
# if field.typeName().lower() in ["text", "varchar"]
|
||||||
|
# else f"\"{field.name()}\"",
|
||||||
|
# "name": field.name(),
|
||||||
|
# "type": field.type(),
|
||||||
|
# "length": field.length(),
|
||||||
|
# "precision": field.precision()
|
||||||
|
# }
|
||||||
|
# for field in no_holes.fields()
|
||||||
|
# ],
|
||||||
|
# "OUTPUT": "memory:"
|
||||||
|
# }
|
||||||
|
# )["OUTPUT"]
|
||||||
|
|
||||||
|
# print(" - Final feature count:", trimmed.featureCount())
|
||||||
|
# print("========== CLEANSING DONE ==========\n")
|
||||||
|
|
||||||
|
# return {
|
||||||
|
# "summary": summary,
|
||||||
|
# "clean_layer": trimmed
|
||||||
|
# }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# self-intersection
|
||||||
|
def cleansing_layer(layer: QgsVectorLayer) -> Dict:
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# INITIAL STATE
|
||||||
|
# ========================================================
|
||||||
|
print("\n========== START CLEANSING ==========")
|
||||||
|
print("Step 0: Load Layer")
|
||||||
|
print(" - Valid:", layer.isValid())
|
||||||
|
print(" - Feature Count:", layer.featureCount())
|
||||||
|
|
||||||
|
summary = {
|
||||||
|
"step0_features": layer.featureCount(),
|
||||||
|
"step1_invalid_before": 0,
|
||||||
|
"step1_5_self_intersections": 0,
|
||||||
|
"step2_after_fix": 0,
|
||||||
|
"step3_after_multipolygon": 0,
|
||||||
|
"step4_duplicates_removed": 0,
|
||||||
|
"step5_after_remove_vertices": 0,
|
||||||
|
"step6_after_srid": 0,
|
||||||
|
"step7_sliver_removed": 0,
|
||||||
|
"step8_after_deleteholes": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# 1. VALIDATE GEOMETRY
|
||||||
|
# ========================================================
|
||||||
|
print("\nStep 1: Identify invalid geometries")
|
||||||
|
|
||||||
|
invalid_ids = []
|
||||||
|
for f in layer.getFeatures():
|
||||||
|
if not f.geometry().isGeosValid():
|
||||||
|
invalid_ids.append(f.id())
|
||||||
|
|
||||||
|
summary["step1_invalid_before"] = len(invalid_ids)
|
||||||
|
print(" - Invalid geometries found:", len(invalid_ids))
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# 1.5 CHECK SELF INTERSECTION
|
||||||
|
# ========================================================
|
||||||
|
print("\nStep 1.5: Check self-intersection")
|
||||||
|
|
||||||
|
self_inter = processing.run(
|
||||||
|
"native:checkgeometryselfintersection",
|
||||||
|
{"INPUT": layer, "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
self_inter_count = self_inter.featureCount()
|
||||||
|
summary["step1_5_self_intersections"] = self_inter_count
|
||||||
|
|
||||||
|
print(" - Features with self-intersection:", self_inter_count)
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# 2. FIX GEOMETRIES (INCLUDES SELF-INTERSECTION FIX)
|
||||||
|
# ========================================================
|
||||||
|
print("\nStep 2: Fix geometries (including self-intersections)")
|
||||||
|
|
||||||
|
fixed = processing.run(
|
||||||
|
"native:fixgeometries",
|
||||||
|
{"INPUT": layer, "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
print(" - Valid after fix:", fixed.isValid())
|
||||||
|
print(" - Features after fix:", fixed.featureCount())
|
||||||
|
summary["step2_after_fix"] = fixed.featureCount()
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# 3. ENSURE MULTIPOLYGON
|
||||||
|
# ========================================================
|
||||||
|
print("\nStep 3: Ensure MULTIPOLYGON")
|
||||||
|
|
||||||
|
multipolygon = processing.run(
|
||||||
|
"native:collect",
|
||||||
|
{"INPUT": fixed, "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
print(" - Valid:", multipolygon.isValid())
|
||||||
|
print(" - Features:", multipolygon.featureCount())
|
||||||
|
summary["step3_after_multipolygon"] = multipolygon.featureCount()
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# 4. REMOVE DUPLICATE ROWS
|
||||||
|
# ========================================================
|
||||||
|
print("\nStep 4: Remove duplicate rows")
|
||||||
|
|
||||||
|
all_fields = [f.name() for f in multipolygon.fields()]
|
||||||
|
print(" - All fields:", all_fields)
|
||||||
|
|
||||||
|
if "id" in all_fields:
|
||||||
|
key_fields = ["id"]
|
||||||
|
else:
|
||||||
|
int_cols = [
|
||||||
|
f.name() for f in multipolygon.fields()
|
||||||
|
if f.typeName().lower() in ["int", "integer", "bigint"]
|
||||||
|
]
|
||||||
|
key_fields = [int_cols[0]] if int_cols else all_fields
|
||||||
|
|
||||||
|
print(" - Using duplicate key:", key_fields)
|
||||||
|
|
||||||
|
dedup = processing.run(
|
||||||
|
"native:removeduplicatesbyattribute",
|
||||||
|
{"INPUT": multipolygon, "FIELDS": key_fields, "METHOD": 0, "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
duplicates_removed = multipolygon.featureCount() - dedup.featureCount()
|
||||||
|
summary["step4_duplicates_removed"] = duplicates_removed
|
||||||
|
|
||||||
|
print(" - Features before:", multipolygon.featureCount())
|
||||||
|
print(" - Features after:", dedup.featureCount())
|
||||||
|
print(" - Duplicates removed:", duplicates_removed)
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# 5. REMOVE DUPLICATE VERTICES
|
||||||
|
# ========================================================
|
||||||
|
print("\nStep 5: Remove duplicate vertices")
|
||||||
|
|
||||||
|
no_dup_vertices = processing.run(
|
||||||
|
"native:removeduplicatevertices",
|
||||||
|
{"INPUT": dedup, "VERTICES": 0, "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
print(" - Features:", no_dup_vertices.featureCount())
|
||||||
|
summary["step5_after_remove_vertices"] = no_dup_vertices.featureCount()
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# 6. FIX SRID / REPROJECT
|
||||||
|
# ========================================================
|
||||||
|
print("\nStep 6: Reproject (Fix SRID to EPSG:4326)")
|
||||||
|
|
||||||
|
reprojected = processing.run(
|
||||||
|
"native:reprojectlayer",
|
||||||
|
{"INPUT": no_dup_vertices, "TARGET_CRS": "EPSG:4326", "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
print(" - Features:", reprojected.featureCount())
|
||||||
|
summary["step6_after_srid"] = reprojected.featureCount()
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# 7. REMOVE SLIVER POLYGONS (< 1 m2)
|
||||||
|
# ========================================================
|
||||||
|
print("\nStep 7: Remove sliver polygons (<1 m²)")
|
||||||
|
|
||||||
|
slivers = processing.run(
|
||||||
|
"native:extractbyexpression",
|
||||||
|
{"INPUT": reprojected, "EXPRESSION": "$area < 1", "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
summary["step7_sliver_removed"] = slivers.featureCount()
|
||||||
|
print(" - Slivers found:", slivers.featureCount())
|
||||||
|
|
||||||
|
no_sliver = processing.run(
|
||||||
|
"native:extractbyexpression",
|
||||||
|
{"INPUT": reprojected, "EXPRESSION": "$area >= 1", "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
print(" - Features left after removing slivers:", no_sliver.featureCount())
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# 8. REMOVE TINY HOLES (< 1 m2)
|
||||||
|
# ========================================================
|
||||||
|
print("\nStep 8: Remove tiny holes")
|
||||||
|
|
||||||
|
no_holes = processing.run(
|
||||||
|
"native:deleteholes",
|
||||||
|
{"INPUT": no_sliver, "MIN_AREA": 1, "OUTPUT": "memory:"}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
print(" - Features:", no_holes.featureCount())
|
||||||
|
summary["step8_after_deleteholes"] = no_holes.featureCount()
|
||||||
|
|
||||||
|
# ========================================================
|
||||||
|
# FINAL: TRIM STRING FIELDS
|
||||||
|
# ========================================================
|
||||||
|
print("\nFinal Step: Trim string fields")
|
||||||
|
|
||||||
|
trimmed = processing.run(
|
||||||
|
"qgis:refactorfields",
|
||||||
|
{
|
||||||
|
"INPUT": no_holes,
|
||||||
|
"FIELDS_MAPPING": [
|
||||||
|
{
|
||||||
|
"expression": f"trim(\"{field.name()}\")"
|
||||||
|
if field.typeName().lower() in ["text", "varchar"]
|
||||||
|
else f"\"{field.name()}\"",
|
||||||
|
"name": field.name(),
|
||||||
|
"type": field.type(),
|
||||||
|
"length": field.length(),
|
||||||
|
"precision": field.precision()
|
||||||
|
}
|
||||||
|
for field in no_holes.fields()
|
||||||
|
],
|
||||||
|
"OUTPUT": "memory:"
|
||||||
|
}
|
||||||
|
)["OUTPUT"]
|
||||||
|
|
||||||
|
print(" - Final feature count:", trimmed.featureCount())
|
||||||
|
print("========== CLEANSING DONE ==========\n")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"summary": summary,
|
||||||
|
"clean_layer": trimmed
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
94
main.py
Normal file
94
main.py
Normal file
|
|
@ -0,0 +1,94 @@
|
||||||
|
# from fastapi import FastAPI
|
||||||
|
# from qgis.core import QgsVectorLayer
|
||||||
|
# from qgis_bootstrap import start_qgis
|
||||||
|
|
||||||
|
# app = FastAPI()
|
||||||
|
|
||||||
|
# # Start QGIS headless
|
||||||
|
# qgs = start_qgis()
|
||||||
|
|
||||||
|
# @app.get("/")
|
||||||
|
# def root():
|
||||||
|
# return {"status": "QGIS API Ready"}
|
||||||
|
|
||||||
|
# @app.get("/extent")
|
||||||
|
# def extent():
|
||||||
|
# layer = QgsVectorLayer("data/exmpl.geojson", "jalan", "ogr")
|
||||||
|
|
||||||
|
# if not layer.isValid():
|
||||||
|
# return {"error": "Layer tidak valid"}
|
||||||
|
|
||||||
|
# ext = layer.extent()
|
||||||
|
# return {
|
||||||
|
# "xmin": ext.xMinimum(),
|
||||||
|
# "ymin": ext.yMinimum(),
|
||||||
|
# "xmax": ext.xMaximum(),
|
||||||
|
# "ymax": ext.yMaximum(),
|
||||||
|
# }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
from fastapi import FastAPI, BackgroundTasks
|
||||||
|
from qgis_bootstrap import start_qgis
|
||||||
|
# from cleansing_service import load_layer, cleansing_layer
|
||||||
|
from full_cleansing_service import load_layer, cleansing_layer
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
qgs = start_qgis()
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
def root():
|
||||||
|
return {"status": "QGIS Cleansing API Running"}
|
||||||
|
|
||||||
|
@app.get("/clean/{table_name}")
|
||||||
|
def clean_table(table_name: str):
|
||||||
|
|
||||||
|
layer = load_layer(table_name)
|
||||||
|
if not layer.isValid():
|
||||||
|
return {"error": f"Table '{table_name}' tidak valid atau tidak ditemukan."}
|
||||||
|
|
||||||
|
print(layer)
|
||||||
|
result = cleansing_layer(layer)
|
||||||
|
|
||||||
|
summary = result["summary"]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"table": table_name,
|
||||||
|
"summary": summary,
|
||||||
|
"message": "Cleansing selesai"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/process/{table_name}")
|
||||||
|
def process_table(table_name: str, background: BackgroundTasks):
|
||||||
|
background.add_task(run_clean_table, table_name)
|
||||||
|
return {"status": "ACCEPTED", "table": table_name}
|
||||||
|
|
||||||
|
|
||||||
|
def run_clean_table(table_name: str):
|
||||||
|
print(f"\n=== Mulai cleansing untuk tabel: {table_name} ===")
|
||||||
|
|
||||||
|
layer = load_layer(table_name)
|
||||||
|
if not layer.isValid():
|
||||||
|
print(f"[ERROR] Table '{table_name}' tidak valid atau tidak ditemukan.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("[OK] Layer valid, mulai cleansing...")
|
||||||
|
|
||||||
|
result = cleansing_layer(layer)
|
||||||
|
|
||||||
|
summary = result["summary"]
|
||||||
|
clean_layer = result["clean_layer"]
|
||||||
|
|
||||||
|
print("\n=== RINGKASAN CLEANSING ===")
|
||||||
|
for k, v in summary.items():
|
||||||
|
print(f"{k}: {v}")
|
||||||
|
|
||||||
|
# TODO: save back ke PostGIS
|
||||||
|
# save_to_postgis(clean_layer, table_name)
|
||||||
|
|
||||||
|
print(f"=== Cleansing selesai untuk tabel: {table_name} ===\n")
|
||||||
73
qgis_bootstrap.py
Normal file
73
qgis_bootstrap.py
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
# import os
|
||||||
|
# import sys
|
||||||
|
|
||||||
|
# QGIS_APP = "/Applications/QGIS-LTR.app/Contents"
|
||||||
|
# QGIS_PREFIX = f"{QGIS_APP}/Resources"
|
||||||
|
|
||||||
|
# # ==== FIX VERY IMPORTANT ====
|
||||||
|
# os.environ["QGIS_PREFIX_PATH"] = QGIS_PREFIX
|
||||||
|
# os.environ["PROJ_LIB"] = f"{QGIS_PREFIX}/proj"
|
||||||
|
# os.environ["GDAL_DATA"] = f"{QGIS_PREFIX}/gdal"
|
||||||
|
# os.environ["QT_PLUGIN_PATH"] = f"{QGIS_PREFIX}/plugins"
|
||||||
|
# # =============================
|
||||||
|
|
||||||
|
# os.environ["QT_QPA_PLATFORM"] = "offscreen"
|
||||||
|
|
||||||
|
# # Python path
|
||||||
|
# sys.path.append(f"{QGIS_PREFIX}/python")
|
||||||
|
# sys.path.append(f"{QGIS_PREFIX}/python/plugins")
|
||||||
|
|
||||||
|
# from qgis.core import QgsApplication
|
||||||
|
# from qgis.analysis import QgsNativeAlgorithms
|
||||||
|
|
||||||
|
# import processing
|
||||||
|
# from processing.core.Processing import Processing
|
||||||
|
|
||||||
|
# def start_qgis():
|
||||||
|
# qgs = QgsApplication([], False)
|
||||||
|
# qgs.initQgis()
|
||||||
|
|
||||||
|
# # === WAJIB: initialize processing ===
|
||||||
|
# Processing.initialize()
|
||||||
|
# qgs.processingRegistry().addProvider(QgsNativeAlgorithms())
|
||||||
|
|
||||||
|
# return qgs
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# ==== Linux QGIS installation prefix ====
|
||||||
|
QGIS_PREFIX = "/usr"
|
||||||
|
|
||||||
|
os.environ["QGIS_PREFIX_PATH"] = QGIS_PREFIX
|
||||||
|
os.environ["PROJ_LIB"] = "/usr/share/proj"
|
||||||
|
os.environ["GDAL_DATA"] = "/usr/share/gdal"
|
||||||
|
os.environ["QT_PLUGIN_PATH"] = "/usr/lib/qt/plugins"
|
||||||
|
|
||||||
|
# Python path
|
||||||
|
sys.path.append("/usr/lib/python3/dist-packages/qgis")
|
||||||
|
sys.path.append("/usr/lib/python3/dist-packages/qgis/plugins")
|
||||||
|
|
||||||
|
from qgis.core import QgsApplication
|
||||||
|
from qgis.analysis import QgsNativeAlgorithms
|
||||||
|
|
||||||
|
import processing
|
||||||
|
from processing.core.Processing import Processing
|
||||||
|
|
||||||
|
|
||||||
|
def start_qgis():
|
||||||
|
qgs = QgsApplication([], False)
|
||||||
|
qgs.initQgis()
|
||||||
|
|
||||||
|
Processing.initialize()
|
||||||
|
qgs.processingRegistry().addProvider(QgsNativeAlgorithms())
|
||||||
|
|
||||||
|
return qgs
|
||||||
42
test_pg.py
Normal file
42
test_pg.py
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
import os, sys
|
||||||
|
|
||||||
|
print("---- ENV CHECK ----")
|
||||||
|
print("QGIS_PREFIX_PATH:", os.environ.get("QGIS_PREFIX_PATH"))
|
||||||
|
print("PROJ_LIB:", os.environ.get("PROJ_LIB"))
|
||||||
|
print("GDAL_DATA:", os.environ.get("GDAL_DATA"))
|
||||||
|
print("QT_PLUGIN_PATH:", os.environ.get("QT_PLUGIN_PATH"))
|
||||||
|
|
||||||
|
import glob
|
||||||
|
|
||||||
|
print("\n--- SEARCH proj.db in QGIS PREFIX ---")
|
||||||
|
print(glob.glob("/Applications/QGIS-LTR.app/**/proj.db", recursive=True))
|
||||||
|
|
||||||
|
|
||||||
|
print("\n--- CHECK FILE EXISTS ---")
|
||||||
|
print("proj.db exists? ", os.path.isfile(os.path.join(os.environ["PROJ_LIB"], "proj.db")))
|
||||||
|
|
||||||
|
|
||||||
|
QGIS_APP = "/Applications/QGIS-LTR.app/Contents"
|
||||||
|
QGIS_PREFIX = f"{QGIS_APP}/Resources"
|
||||||
|
|
||||||
|
os.environ["QGIS_PREFIX_PATH"] = QGIS_PREFIX
|
||||||
|
sys.path.append(f"{QGIS_PREFIX}/python")
|
||||||
|
sys.path.append(f"{QGIS_PREFIX}/python/plugins")
|
||||||
|
|
||||||
|
from qgis.core import QgsApplication, QgsVectorLayer
|
||||||
|
|
||||||
|
qgs = QgsApplication([], False)
|
||||||
|
qgs.initQgis()
|
||||||
|
|
||||||
|
uri = (
|
||||||
|
"dbname='gisdb' host=localhost port=5432 user='postgres' password='postgres' "
|
||||||
|
"table=\"public\".\"dataset_metadata\" key='id' srid=4326"
|
||||||
|
)
|
||||||
|
|
||||||
|
layer = QgsVectorLayer(uri, "test", "postgres")
|
||||||
|
|
||||||
|
print("Valid:", layer.isValid())
|
||||||
|
print("Provider:", layer.providerType())
|
||||||
|
print("Error summary:", layer.error().summary())
|
||||||
|
|
||||||
|
qgs.exitQgis()
|
||||||
Loading…
Reference in New Issue
Block a user