From 8fb96db33724fb39a7fdfb02f6f27b907aec41dd Mon Sep 17 00:00:00 2001 From: Lorenz Stechauner Date: Wed, 30 Nov 2022 15:43:50 +0100 Subject: [PATCH] Clean up data --- data/plz.py | 3 +++ sql/plz-fix.sql | 2 ++ 2 files changed, 5 insertions(+) diff --git a/data/plz.py b/data/plz.py index c2e9895..a0791f0 100755 --- a/data/plz.py +++ b/data/plz.py @@ -24,6 +24,7 @@ GEM = re.compile(r'^([1-9][0-9]{2} [0-9]{2}) [X0-9]+') ORT = re.compile(r'^([0-9]{5}) (([A-Za-zÄÖÜäöü(][A-Za-z0-9äöüÄÖÜßẞ():,.-]* ?|[0-9]+\..*?)+)' r'(( [()X0-9.-]+)*)?([A-Za-zÄÖÜäöüßẞ ]+([0-9]*))?$') STRIP_NUM = re.compile(r'[X0-9. -]+$') +STRIP_INV = re.compile(r' [A-Za-z]+[0-9]+$') CODES = re.compile(r'Agh|Alm|Alpe|B|Bd|Bwg|Burg|Camp|D|E|Fbk|Fhei|Gh|Hgr|Hot|Indz|Jh|Jhtt|Ki|Kl|Krwk|Ks|M|Mh|' r'Mü|R|Ru|Sa|Sä|Sb|Schh|Schih|Schl|Sdlg|Sgr|St|Stbr|Stt|V|W|We|Ek|Z|Zgl|ZH') @@ -273,6 +274,8 @@ def write_sql(plz_rows: List[PlzRow], kgv_rows: List[KgvRow], ov_rows: List[OvRo elif 'Bez.' in name: name = name.replace(',', ', ', 1).replace('.Bez.:', '. Bezirk: ').replace('0', '') + name = STRIP_INV.sub('', name) + if gkz not in gemeinden: print(okz, name, gkz) pr.add(gkz) diff --git a/sql/plz-fix.sql b/sql/plz-fix.sql index a074a75..c991c9e 100644 --- a/sql/plz-fix.sql +++ b/sql/plz-fix.sql @@ -3,3 +3,5 @@ INSERT INTO AT_plz VALUES (2241, 3560, 'Schönkirchen-Reyersdorf'), (2165, 5013, 'Drasenhofen'), (2134, 5115, 'Staaz-Kautendorf'); + +UPDATE AT_ort SET name = 'Etzmannsdorf am Kamp' WHERE okz = 3938;