add reader mpk
This commit is contained in:
parent
2575a34742
commit
fed3317284
72
services/upload_file/read_mpk/reader_mpk.py
Normal file
72
services/upload_file/read_mpk/reader_mpk.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
import os
|
||||
import tempfile
|
||||
import json
|
||||
from io import BytesIO
|
||||
import geopandas as gpd
|
||||
from py7zr import SevenZipFile
|
||||
import pyogrio
|
||||
|
||||
|
||||
def find_data_source(extract_dir: str):
|
||||
"""
|
||||
Cari data sumber (.gdb atau .shp) di dalam folder hasil ekstrak.
|
||||
"""
|
||||
for root, dirs, _ in os.walk(extract_dir):
|
||||
for d in dirs:
|
||||
if d.lower().endswith(".gdb"):
|
||||
return os.path.join(root, d)
|
||||
|
||||
for root, _, files in os.walk(extract_dir):
|
||||
for f in files:
|
||||
if f.lower().endswith(".shp"):
|
||||
return os.path.join(root, f)
|
||||
|
||||
raise ValueError("Tidak ditemukan data source yang didukung (.gdb atau .shp).")
|
||||
|
||||
|
||||
def get_main_layer(gdb_path: str):
|
||||
"""
|
||||
Ambil nama layer utama dari geodatabase (.gdb).
|
||||
"""
|
||||
try:
|
||||
layers = pyogrio.list_layers(gdb_path)
|
||||
for layer in layers:
|
||||
if not layer[0].lower().endswith("__attach"):
|
||||
return layer[0]
|
||||
if layers:
|
||||
return layers[0][0]
|
||||
raise ValueError(f"Tidak ada layer utama yang valid di {gdb_path}")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Gagal membaca daftar layer GDB: {e}")
|
||||
|
||||
|
||||
def read_mpk(path: str):
|
||||
mpk_bytes = None
|
||||
with open(path, "rb") as f:
|
||||
mpk_bytes = f.read()
|
||||
|
||||
if not mpk_bytes:
|
||||
raise ValueError("File MPK kosong atau tidak valid.")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
try:
|
||||
with SevenZipFile(BytesIO(mpk_bytes), mode="r") as z:
|
||||
z.extractall(path=tempdir)
|
||||
except Exception as e:
|
||||
raise ValueError(f"File MPK rusak atau tidak valid: {e}")
|
||||
|
||||
src_path = find_data_source(tempdir)
|
||||
|
||||
if src_path.lower().endswith(".gdb"):
|
||||
layer_name = get_main_layer(src_path)
|
||||
gdf = gpd.read_file(src_path, layer=layer_name)
|
||||
else:
|
||||
gdf = gpd.read_file(src_path)
|
||||
|
||||
if gdf.crs is None:
|
||||
raise ValueError("CRS tidak terdeteksi. Pastikan file memiliki informasi proyeksi (.prj).")
|
||||
|
||||
gdf = gdf.to_crs(epsg=4326)
|
||||
|
||||
print(f"[INFO] Berhasil membaca {len(gdf)} fitur")
|
||||
return gdf
|
||||
|
|
@ -173,8 +173,12 @@ def parse_page_selection(selectedPage: str, total_pages: int):
|
|||
def read_pdf(path: str, page: str):
|
||||
pdf_path = path
|
||||
selectedPage = None
|
||||
if page == '' or None:
|
||||
# if page == '' or None:
|
||||
# selectedPage = "1"
|
||||
if not page:
|
||||
selectedPage = "1"
|
||||
else:
|
||||
selectedPage = page
|
||||
tables_data = []
|
||||
with pdfplumber.open(pdf_path) as pdf:
|
||||
total_pages = len(pdf.pages)
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES
|
|||
from services.upload_file.read_csv.reader_csv import read_csv
|
||||
from services.upload_file.read_shp.reader_shp import read_shp
|
||||
from services.upload_file.read_gdb.reader_gdb import read_gdb
|
||||
from services.upload_file.read_mpk.reader_mpk import read_mpk
|
||||
from services.upload_file.read_pdf.reader_pdf import convert_df, read_pdf
|
||||
from services.upload_file.geom_detector.geometry_detector import detect_and_build_geometry
|
||||
from services.upload_file.geom_detector.geometry_detector import attach_polygon_geometry_auto
|
||||
|
|
@ -156,7 +157,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
|
|||
contents = await file.read()
|
||||
size_mb = len(contents) / (1024*1024)
|
||||
if size_mb > MAX_FILE_MB:
|
||||
raise HTTPException(status_code=413, detail="File too large")
|
||||
raise HTTPException(status_code=413, detail="Ukuran File Terlalu Besar")
|
||||
tmp_path = UPLOAD_FOLDER / fname
|
||||
with open(tmp_path, "wb") as f:
|
||||
f.write(contents)
|
||||
|
|
@ -168,6 +169,8 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
|
|||
df = read_csv(str(tmp_path))
|
||||
elif ext == ".xlsx":
|
||||
df = read_csv(str(tmp_path), sheet)
|
||||
elif ext == ".mpk":
|
||||
df = read_mpk(str(tmp_path))
|
||||
elif ext == ".pdf":
|
||||
tbl = read_pdf(tmp_path, page)
|
||||
if len(tbl) == 0:
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user