adding filter kolom administratif

This commit is contained in:
dmsanhrProject 2025-11-04 14:23:15 +07:00
parent 90b7351d9b
commit c953ae7675

47
services/filter_column.py Normal file
View File

@ -0,0 +1,47 @@
import re
import itertools
geo_admin_keywords = [
'lat', 'lon', 'long', 'latitude', 'longitude', 'koordinat', 'geometry', 'geometri',
'desa', 'kelurahan', 'kel', 'kecamatan', 'kabupaten', 'kab', 'kota', 'provinsi',
'lokasi', 'region', 'area', 'zone', 'boundary', 'batas'
]
def normalize_text(text):
text = text.lower()
text = re.sub(r'[^a-z0-9/ ]+', ' ', text)
text = re.sub(r'\s+', ' ', text).strip()
return text
def generate_combined_patterns(keywords):
combos = list(itertools.combinations(keywords, 2))
patterns = []
for a, b in combos:
patterns.append(rf'{a}\s*/\s*{b}')
patterns.append(rf'{b}\s*/\s*{a}')
return patterns
combined_patterns = generate_combined_patterns(geo_admin_keywords)
def contains_geo_admin_keywords(text):
text_clean = normalize_text(text)
if len(text_clean) < 3:
return False
for pattern in combined_patterns:
if re.search(pattern, text_clean):
return True
for kw in geo_admin_keywords:
if re.search(rf'(^|[\s/_-]){kw}([\s/_-]|$)', text_clean):
return True
return False
def filter_geo_admin_column(tables):
filtered = []
for table in tables:
found = any(contains_geo_admin_keywords(col) for col in table['columns'])
if found:
filtered.append(table)
return filtered