add reader mpk

This commit is contained in:
DmsAnhr 2025-11-08 16:07:58 +07:00
parent 2575a34742
commit fed3317284
3 changed files with 81 additions and 2 deletions

View File

@ -0,0 +1,72 @@
import os
import tempfile
import json
from io import BytesIO
import geopandas as gpd
from py7zr import SevenZipFile
import pyogrio
def find_data_source(extract_dir: str):
"""
Cari data sumber (.gdb atau .shp) di dalam folder hasil ekstrak.
"""
for root, dirs, _ in os.walk(extract_dir):
for d in dirs:
if d.lower().endswith(".gdb"):
return os.path.join(root, d)
for root, _, files in os.walk(extract_dir):
for f in files:
if f.lower().endswith(".shp"):
return os.path.join(root, f)
raise ValueError("Tidak ditemukan data source yang didukung (.gdb atau .shp).")
def get_main_layer(gdb_path: str):
"""
Ambil nama layer utama dari geodatabase (.gdb).
"""
try:
layers = pyogrio.list_layers(gdb_path)
for layer in layers:
if not layer[0].lower().endswith("__attach"):
return layer[0]
if layers:
return layers[0][0]
raise ValueError(f"Tidak ada layer utama yang valid di {gdb_path}")
except Exception as e:
raise ValueError(f"Gagal membaca daftar layer GDB: {e}")
def read_mpk(path: str):
mpk_bytes = None
with open(path, "rb") as f:
mpk_bytes = f.read()
if not mpk_bytes:
raise ValueError("File MPK kosong atau tidak valid.")
with tempfile.TemporaryDirectory() as tempdir:
try:
with SevenZipFile(BytesIO(mpk_bytes), mode="r") as z:
z.extractall(path=tempdir)
except Exception as e:
raise ValueError(f"File MPK rusak atau tidak valid: {e}")
src_path = find_data_source(tempdir)
if src_path.lower().endswith(".gdb"):
layer_name = get_main_layer(src_path)
gdf = gpd.read_file(src_path, layer=layer_name)
else:
gdf = gpd.read_file(src_path)
if gdf.crs is None:
raise ValueError("CRS tidak terdeteksi. Pastikan file memiliki informasi proyeksi (.prj).")
gdf = gdf.to_crs(epsg=4326)
print(f"[INFO] Berhasil membaca {len(gdf)} fitur")
return gdf

View File

@ -173,8 +173,12 @@ def parse_page_selection(selectedPage: str, total_pages: int):
def read_pdf(path: str, page: str):
pdf_path = path
selectedPage = None
if page == '' or None:
# if page == '' or None:
# selectedPage = "1"
if not page:
selectedPage = "1"
else:
selectedPage = page
tables_data = []
with pdfplumber.open(pdf_path) as pdf:
total_pages = len(pdf.pages)

View File

@ -11,6 +11,7 @@ from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES
from services.upload_file.read_csv.reader_csv import read_csv
from services.upload_file.read_shp.reader_shp import read_shp
from services.upload_file.read_gdb.reader_gdb import read_gdb
from services.upload_file.read_mpk.reader_mpk import read_mpk
from services.upload_file.read_pdf.reader_pdf import convert_df, read_pdf
from services.upload_file.geom_detector.geometry_detector import detect_and_build_geometry
from services.upload_file.geom_detector.geometry_detector import attach_polygon_geometry_auto
@ -156,7 +157,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
contents = await file.read()
size_mb = len(contents) / (1024*1024)
if size_mb > MAX_FILE_MB:
raise HTTPException(status_code=413, detail="File too large")
raise HTTPException(status_code=413, detail="Ukuran File Terlalu Besar")
tmp_path = UPLOAD_FOLDER / fname
with open(tmp_path, "wb") as f:
f.write(contents)
@ -168,6 +169,8 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
df = read_csv(str(tmp_path))
elif ext == ".xlsx":
df = read_csv(str(tmp_path), sheet)
elif ext == ".mpk":
df = read_mpk(str(tmp_path))
elif ext == ".pdf":
tbl = read_pdf(tmp_path, page)
if len(tbl) == 0: