file_table_reader/services/upload_file/readers/reader_mpk.py

72 lines
2.1 KiB
Python
Raw Normal View History

2025-11-08 09:07:58 +00:00
import os
import tempfile
import json
from io import BytesIO
import geopandas as gpd
from py7zr import SevenZipFile
import pyogrio
def find_data_source(extract_dir: str):
"""
Cari data sumber (.gdb atau .shp) di dalam folder hasil ekstrak.
"""
for root, dirs, _ in os.walk(extract_dir):
for d in dirs:
if d.lower().endswith(".gdb"):
return os.path.join(root, d)
for root, _, files in os.walk(extract_dir):
for f in files:
if f.lower().endswith(".shp"):
return os.path.join(root, f)
raise ValueError("Tidak ditemukan data source yang didukung (.gdb atau .shp).")
def get_main_layer(gdb_path: str):
"""
Ambil nama layer utama dari geodatabase (.gdb).
"""
try:
layers = pyogrio.list_layers(gdb_path)
for layer in layers:
if not layer[0].lower().endswith("__attach"):
return layer[0]
if layers:
return layers[0][0]
raise ValueError(f"Tidak ada layer utama yang valid di {gdb_path}")
except Exception as e:
raise ValueError(f"Gagal membaca daftar layer GDB: {e}")
def read_mpk(path: str):
mpk_bytes = None
with open(path, "rb") as f:
mpk_bytes = f.read()
if not mpk_bytes:
raise ValueError("File MPK kosong atau tidak valid.")
with tempfile.TemporaryDirectory() as tempdir:
try:
with SevenZipFile(BytesIO(mpk_bytes), mode="r") as z:
z.extractall(path=tempdir)
except Exception as e:
raise ValueError(f"File MPK rusak atau tidak valid: {e}")
src_path = find_data_source(tempdir)
if src_path.lower().endswith(".gdb"):
layer_name = get_main_layer(src_path)
gdf = gpd.read_file(src_path, layer=layer_name)
else:
gdf = gpd.read_file(src_path)
if gdf.crs is None:
raise ValueError("CRS tidak terdeteksi. Pastikan file memiliki informasi proyeksi (.prj).")
gdf = gdf.to_crs(epsg=4326)
print(f"[INFO] Berhasil membaca {len(gdf)} fitur")
return gdf