adding filter kolom administratif
This commit is contained in:
parent
90b7351d9b
commit
c953ae7675
47
services/filter_column.py
Normal file
47
services/filter_column.py
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
import re
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
geo_admin_keywords = [
|
||||||
|
'lat', 'lon', 'long', 'latitude', 'longitude', 'koordinat', 'geometry', 'geometri',
|
||||||
|
'desa', 'kelurahan', 'kel', 'kecamatan', 'kabupaten', 'kab', 'kota', 'provinsi',
|
||||||
|
'lokasi', 'region', 'area', 'zone', 'boundary', 'batas'
|
||||||
|
]
|
||||||
|
|
||||||
|
def normalize_text(text):
|
||||||
|
text = text.lower()
|
||||||
|
text = re.sub(r'[^a-z0-9/ ]+', ' ', text)
|
||||||
|
text = re.sub(r'\s+', ' ', text).strip()
|
||||||
|
return text
|
||||||
|
|
||||||
|
def generate_combined_patterns(keywords):
|
||||||
|
combos = list(itertools.combinations(keywords, 2))
|
||||||
|
patterns = []
|
||||||
|
for a, b in combos:
|
||||||
|
patterns.append(rf'{a}\s*/\s*{b}')
|
||||||
|
patterns.append(rf'{b}\s*/\s*{a}')
|
||||||
|
return patterns
|
||||||
|
|
||||||
|
combined_patterns = generate_combined_patterns(geo_admin_keywords)
|
||||||
|
|
||||||
|
def contains_geo_admin_keywords(text):
|
||||||
|
text_clean = normalize_text(text)
|
||||||
|
if len(text_clean) < 3:
|
||||||
|
return False
|
||||||
|
|
||||||
|
for pattern in combined_patterns:
|
||||||
|
if re.search(pattern, text_clean):
|
||||||
|
return True
|
||||||
|
|
||||||
|
for kw in geo_admin_keywords:
|
||||||
|
if re.search(rf'(^|[\s/_-]){kw}([\s/_-]|$)', text_clean):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def filter_geo_admin_column(tables):
|
||||||
|
filtered = []
|
||||||
|
for table in tables:
|
||||||
|
found = any(contains_geo_admin_keywords(col) for col in table['columns'])
|
||||||
|
if found:
|
||||||
|
filtered.append(table)
|
||||||
|
return filtered
|
||||||
Loading…
Reference in New Issue
Block a user