add reader mpk
This commit is contained in:
parent
2575a34742
commit
fed3317284
72
services/upload_file/read_mpk/reader_mpk.py
Normal file
72
services/upload_file/read_mpk/reader_mpk.py
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import json
|
||||||
|
from io import BytesIO
|
||||||
|
import geopandas as gpd
|
||||||
|
from py7zr import SevenZipFile
|
||||||
|
import pyogrio
|
||||||
|
|
||||||
|
|
||||||
|
def find_data_source(extract_dir: str):
|
||||||
|
"""
|
||||||
|
Cari data sumber (.gdb atau .shp) di dalam folder hasil ekstrak.
|
||||||
|
"""
|
||||||
|
for root, dirs, _ in os.walk(extract_dir):
|
||||||
|
for d in dirs:
|
||||||
|
if d.lower().endswith(".gdb"):
|
||||||
|
return os.path.join(root, d)
|
||||||
|
|
||||||
|
for root, _, files in os.walk(extract_dir):
|
||||||
|
for f in files:
|
||||||
|
if f.lower().endswith(".shp"):
|
||||||
|
return os.path.join(root, f)
|
||||||
|
|
||||||
|
raise ValueError("Tidak ditemukan data source yang didukung (.gdb atau .shp).")
|
||||||
|
|
||||||
|
|
||||||
|
def get_main_layer(gdb_path: str):
|
||||||
|
"""
|
||||||
|
Ambil nama layer utama dari geodatabase (.gdb).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
layers = pyogrio.list_layers(gdb_path)
|
||||||
|
for layer in layers:
|
||||||
|
if not layer[0].lower().endswith("__attach"):
|
||||||
|
return layer[0]
|
||||||
|
if layers:
|
||||||
|
return layers[0][0]
|
||||||
|
raise ValueError(f"Tidak ada layer utama yang valid di {gdb_path}")
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Gagal membaca daftar layer GDB: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def read_mpk(path: str):
|
||||||
|
mpk_bytes = None
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
mpk_bytes = f.read()
|
||||||
|
|
||||||
|
if not mpk_bytes:
|
||||||
|
raise ValueError("File MPK kosong atau tidak valid.")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
|
try:
|
||||||
|
with SevenZipFile(BytesIO(mpk_bytes), mode="r") as z:
|
||||||
|
z.extractall(path=tempdir)
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"File MPK rusak atau tidak valid: {e}")
|
||||||
|
|
||||||
|
src_path = find_data_source(tempdir)
|
||||||
|
|
||||||
|
if src_path.lower().endswith(".gdb"):
|
||||||
|
layer_name = get_main_layer(src_path)
|
||||||
|
gdf = gpd.read_file(src_path, layer=layer_name)
|
||||||
|
else:
|
||||||
|
gdf = gpd.read_file(src_path)
|
||||||
|
|
||||||
|
if gdf.crs is None:
|
||||||
|
raise ValueError("CRS tidak terdeteksi. Pastikan file memiliki informasi proyeksi (.prj).")
|
||||||
|
|
||||||
|
gdf = gdf.to_crs(epsg=4326)
|
||||||
|
|
||||||
|
print(f"[INFO] Berhasil membaca {len(gdf)} fitur")
|
||||||
|
return gdf
|
||||||
|
|
@ -173,8 +173,12 @@ def parse_page_selection(selectedPage: str, total_pages: int):
|
||||||
def read_pdf(path: str, page: str):
|
def read_pdf(path: str, page: str):
|
||||||
pdf_path = path
|
pdf_path = path
|
||||||
selectedPage = None
|
selectedPage = None
|
||||||
if page == '' or None:
|
# if page == '' or None:
|
||||||
|
# selectedPage = "1"
|
||||||
|
if not page:
|
||||||
selectedPage = "1"
|
selectedPage = "1"
|
||||||
|
else:
|
||||||
|
selectedPage = page
|
||||||
tables_data = []
|
tables_data = []
|
||||||
with pdfplumber.open(pdf_path) as pdf:
|
with pdfplumber.open(pdf_path) as pdf:
|
||||||
total_pages = len(pdf.pages)
|
total_pages = len(pdf.pages)
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES
|
||||||
from services.upload_file.read_csv.reader_csv import read_csv
|
from services.upload_file.read_csv.reader_csv import read_csv
|
||||||
from services.upload_file.read_shp.reader_shp import read_shp
|
from services.upload_file.read_shp.reader_shp import read_shp
|
||||||
from services.upload_file.read_gdb.reader_gdb import read_gdb
|
from services.upload_file.read_gdb.reader_gdb import read_gdb
|
||||||
|
from services.upload_file.read_mpk.reader_mpk import read_mpk
|
||||||
from services.upload_file.read_pdf.reader_pdf import convert_df, read_pdf
|
from services.upload_file.read_pdf.reader_pdf import convert_df, read_pdf
|
||||||
from services.upload_file.geom_detector.geometry_detector import detect_and_build_geometry
|
from services.upload_file.geom_detector.geometry_detector import detect_and_build_geometry
|
||||||
from services.upload_file.geom_detector.geometry_detector import attach_polygon_geometry_auto
|
from services.upload_file.geom_detector.geometry_detector import attach_polygon_geometry_auto
|
||||||
|
|
@ -156,7 +157,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
|
||||||
contents = await file.read()
|
contents = await file.read()
|
||||||
size_mb = len(contents) / (1024*1024)
|
size_mb = len(contents) / (1024*1024)
|
||||||
if size_mb > MAX_FILE_MB:
|
if size_mb > MAX_FILE_MB:
|
||||||
raise HTTPException(status_code=413, detail="File too large")
|
raise HTTPException(status_code=413, detail="Ukuran File Terlalu Besar")
|
||||||
tmp_path = UPLOAD_FOLDER / fname
|
tmp_path = UPLOAD_FOLDER / fname
|
||||||
with open(tmp_path, "wb") as f:
|
with open(tmp_path, "wb") as f:
|
||||||
f.write(contents)
|
f.write(contents)
|
||||||
|
|
@ -168,6 +169,8 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] =
|
||||||
df = read_csv(str(tmp_path))
|
df = read_csv(str(tmp_path))
|
||||||
elif ext == ".xlsx":
|
elif ext == ".xlsx":
|
||||||
df = read_csv(str(tmp_path), sheet)
|
df = read_csv(str(tmp_path), sheet)
|
||||||
|
elif ext == ".mpk":
|
||||||
|
df = read_mpk(str(tmp_path))
|
||||||
elif ext == ".pdf":
|
elif ext == ".pdf":
|
||||||
tbl = read_pdf(tmp_path, page)
|
tbl = read_pdf(tmp_path, page)
|
||||||
if len(tbl) == 0:
|
if len(tbl) == 0:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user