Clean up data

This commit is contained in:
2022-11-30 15:43:50 +01:00
parent 216e195102
commit 8fb96db337
2 changed files with 5 additions and 0 deletions

View File

@ -24,6 +24,7 @@ GEM = re.compile(r'^([1-9][0-9]{2} [0-9]{2}) [X0-9]+')
ORT = re.compile(r'^([0-9]{5}) (([A-Za-zÄÖÜäöü(][A-Za-z0-9äöüÄÖÜßẞ():,.-]* ?|[0-9]+\..*?)+)'
r'(( [()X0-9.-]+)*)?([A-Za-zÄÖÜäöüßẞ ]+([0-9]*))?$')
STRIP_NUM = re.compile(r'[X0-9. -]+$')
STRIP_INV = re.compile(r' [A-Za-z]+[0-9]+$')
CODES = re.compile(r'Agh|Alm|Alpe|B|Bd|Bwg|Burg|Camp|D|E|Fbk|Fhei|Gh|Hgr|Hot|Indz|Jh|Jhtt|Ki|Kl|Krwk|Ks|M|Mh|'
r'Mü|R|Ru|Sa|Sä|Sb|Schh|Schih|Schl|Sdlg|Sgr|St|Stbr|Stt|V|W|We|Ek|Z|Zgl|ZH')
@ -273,6 +274,8 @@ def write_sql(plz_rows: List[PlzRow], kgv_rows: List[KgvRow], ov_rows: List[OvRo
elif 'Bez.' in name:
name = name.replace(',', ', ', 1).replace('.Bez.:', '. Bezirk: ').replace('0', '')
name = STRIP_INV.sub('', name)
if gkz not in gemeinden:
print(okz, name, gkz)
pr.add(gkz)