adding filter kolom administratif
This commit is contained in:
parent
90b7351d9b
commit
c953ae7675
47
services/filter_column.py
Normal file
47
services/filter_column.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import re
|
||||
import itertools
|
||||
|
||||
geo_admin_keywords = [
|
||||
'lat', 'lon', 'long', 'latitude', 'longitude', 'koordinat', 'geometry', 'geometri',
|
||||
'desa', 'kelurahan', 'kel', 'kecamatan', 'kabupaten', 'kab', 'kota', 'provinsi',
|
||||
'lokasi', 'region', 'area', 'zone', 'boundary', 'batas'
|
||||
]
|
||||
|
||||
def normalize_text(text):
|
||||
text = text.lower()
|
||||
text = re.sub(r'[^a-z0-9/ ]+', ' ', text)
|
||||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
return text
|
||||
|
||||
def generate_combined_patterns(keywords):
|
||||
combos = list(itertools.combinations(keywords, 2))
|
||||
patterns = []
|
||||
for a, b in combos:
|
||||
patterns.append(rf'{a}\s*/\s*{b}')
|
||||
patterns.append(rf'{b}\s*/\s*{a}')
|
||||
return patterns
|
||||
|
||||
combined_patterns = generate_combined_patterns(geo_admin_keywords)
|
||||
|
||||
def contains_geo_admin_keywords(text):
|
||||
text_clean = normalize_text(text)
|
||||
if len(text_clean) < 3:
|
||||
return False
|
||||
|
||||
for pattern in combined_patterns:
|
||||
if re.search(pattern, text_clean):
|
||||
return True
|
||||
|
||||
for kw in geo_admin_keywords:
|
||||
if re.search(rf'(^|[\s/_-]){kw}([\s/_-]|$)', text_clean):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def filter_geo_admin_column(tables):
|
||||
filtered = []
|
||||
for table in tables:
|
||||
found = any(contains_geo_admin_keywords(col) for col in table['columns'])
|
||||
if found:
|
||||
filtered.append(table)
|
||||
return filtered
|
||||
Loading…
Reference in New Issue
Block a user