diff --git a/services/upload_file/read_mpk/reader_mpk.py b/services/upload_file/read_mpk/reader_mpk.py new file mode 100644 index 0000000..a466e58 --- /dev/null +++ b/services/upload_file/read_mpk/reader_mpk.py @@ -0,0 +1,72 @@ +import os +import tempfile +import json +from io import BytesIO +import geopandas as gpd +from py7zr import SevenZipFile +import pyogrio + + +def find_data_source(extract_dir: str): + """ + Cari data sumber (.gdb atau .shp) di dalam folder hasil ekstrak. + """ + for root, dirs, _ in os.walk(extract_dir): + for d in dirs: + if d.lower().endswith(".gdb"): + return os.path.join(root, d) + + for root, _, files in os.walk(extract_dir): + for f in files: + if f.lower().endswith(".shp"): + return os.path.join(root, f) + + raise ValueError("Tidak ditemukan data source yang didukung (.gdb atau .shp).") + + +def get_main_layer(gdb_path: str): + """ + Ambil nama layer utama dari geodatabase (.gdb). + """ + try: + layers = pyogrio.list_layers(gdb_path) + for layer in layers: + if not layer[0].lower().endswith("__attach"): + return layer[0] + if layers: + return layers[0][0] + raise ValueError(f"Tidak ada layer utama yang valid di {gdb_path}") + except Exception as e: + raise ValueError(f"Gagal membaca daftar layer GDB: {e}") + + +def read_mpk(path: str): + mpk_bytes = None + with open(path, "rb") as f: + mpk_bytes = f.read() + + if not mpk_bytes: + raise ValueError("File MPK kosong atau tidak valid.") + + with tempfile.TemporaryDirectory() as tempdir: + try: + with SevenZipFile(BytesIO(mpk_bytes), mode="r") as z: + z.extractall(path=tempdir) + except Exception as e: + raise ValueError(f"File MPK rusak atau tidak valid: {e}") + + src_path = find_data_source(tempdir) + + if src_path.lower().endswith(".gdb"): + layer_name = get_main_layer(src_path) + gdf = gpd.read_file(src_path, layer=layer_name) + else: + gdf = gpd.read_file(src_path) + + if gdf.crs is None: + raise ValueError("CRS tidak terdeteksi. Pastikan file memiliki informasi proyeksi (.prj).") + + gdf = gdf.to_crs(epsg=4326) + + print(f"[INFO] Berhasil membaca {len(gdf)} fitur") + return gdf \ No newline at end of file diff --git a/services/upload_file/read_pdf/reader_pdf.py b/services/upload_file/read_pdf/reader_pdf.py index 71c66d3..0f31264 100644 --- a/services/upload_file/read_pdf/reader_pdf.py +++ b/services/upload_file/read_pdf/reader_pdf.py @@ -173,8 +173,12 @@ def parse_page_selection(selectedPage: str, total_pages: int): def read_pdf(path: str, page: str): pdf_path = path selectedPage = None - if page == '' or None: + # if page == '' or None: + # selectedPage = "1" + if not page: selectedPage = "1" + else: + selectedPage = page tables_data = [] with pdfplumber.open(pdf_path) as pdf: total_pages = len(pdf.pages) diff --git a/services/upload_file/upload.py b/services/upload_file/upload.py index 1e60690..dbf9fdd 100644 --- a/services/upload_file/upload.py +++ b/services/upload_file/upload.py @@ -11,6 +11,7 @@ from core.config import UPLOAD_FOLDER, MAX_FILE_MB, VALID_WKT_PREFIXES from services.upload_file.read_csv.reader_csv import read_csv from services.upload_file.read_shp.reader_shp import read_shp from services.upload_file.read_gdb.reader_gdb import read_gdb +from services.upload_file.read_mpk.reader_mpk import read_mpk from services.upload_file.read_pdf.reader_pdf import convert_df, read_pdf from services.upload_file.geom_detector.geometry_detector import detect_and_build_geometry from services.upload_file.geom_detector.geometry_detector import attach_polygon_geometry_auto @@ -156,7 +157,7 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = contents = await file.read() size_mb = len(contents) / (1024*1024) if size_mb > MAX_FILE_MB: - raise HTTPException(status_code=413, detail="File too large") + raise HTTPException(status_code=413, detail="Ukuran File Terlalu Besar") tmp_path = UPLOAD_FOLDER / fname with open(tmp_path, "wb") as f: f.write(contents) @@ -168,6 +169,8 @@ async def handle_upload_file(file: UploadFile = File(...), page: Optional[str] = df = read_csv(str(tmp_path)) elif ext == ".xlsx": df = read_csv(str(tmp_path), sheet) + elif ext == ".mpk": + df = read_mpk(str(tmp_path)) elif ext == ".pdf": tbl = read_pdf(tmp_path, page) if len(tbl) == 0: