From 733761777cdeb989a6bae41bafa26b99b7da48a2 Mon Sep 17 00:00:00 2001 From: Lorenz Stechauner Date: Thu, 23 Feb 2023 23:01:16 +0100 Subject: [PATCH] Parse phone numbers, email and addresses --- sql/v01/01.create.sql | 7 ++- wgmaster/migrate.py | 126 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 123 insertions(+), 10 deletions(-) diff --git a/sql/v01/01.create.sql b/sql/v01/01.create.sql index 72633cb..c89a2e6 100644 --- a/sql/v01/01.create.sql +++ b/sql/v01/01.create.sql @@ -271,9 +271,10 @@ CREATE TABLE member ( postal_dest TEXT NOT NULL, address TEXT NOT NULL, - email TEXT CHECK (email REGEXP '^[^@]+@([a-z0-9_\x2Däöüß]+\.)[a-z]{2,}$') DEFAULT NULL, - phone_landline TEXT CHECK (phone_landline REGEXP '^\+[0-9]+$') DEFAULT NULL, - phone_mobile TEXT CHECK (phone_mobile REGEXP '^\+[0-9]+$') DEFAULT NULL, + email TEXT CHECK (email REGEXP '^[^@\s]+@([a-z0-9_\x2Däöüß]+\.)+[a-z]{2,}$') DEFAULT NULL, + phone_landline TEXT CHECK (phone_landline REGEXP '^\+[0-9]+$') DEFAULT NULL, + phone_mobile_1 TEXT CHECK (phone_mobile_1 REGEXP '^\+[0-9]+$') DEFAULT NULL, + phone_mobile_2 TEXT CHECK (phone_mobile_2 REGEXP '^\+[0-9]+$') DEFAULT NULL, default_kgnr INTEGER NOT NULL, comment TEXT DEFAULT NULL, diff --git a/wgmaster/migrate.py b/wgmaster/migrate.py index 54745a3..5176ee2 100755 --- a/wgmaster/migrate.py +++ b/wgmaster/migrate.py @@ -11,6 +11,44 @@ import sys USTID_RE = re.compile('[A-Z]{2}[A-Z0-9]{2,12}') BIC_RE = re.compile('[A-Z0-9]{4}[A-Z]{2}[A-Z0-9]{2}([A-Z0-9]{3})?') IBAN_RE = re.compile('[A-Z]{2}[0-9]{2}[A-Z0-9]{8,30}') +EMAIL_RE = re.compile('[^@\s]+@([a-z0-9_äöüß-]+\.)+[a-z]{2,}') + + +STREET_NAMES = { + 'Hans-Wagnerstraße': 'Hans-Wagner-Straße', + 'J.Seitzstraße': 'Josef-Seitz-Straße', + 'Kurhaus-Str.': 'Kurhausstraße', + 'Kurhaus-Straße': 'Kurhausstraße', + 'Pirawartherstraße': 'Pirawarther Straße', + 'Raggendorferstraße': 'Raggendorfer Straße', + 'Matznerstraße': 'Matzner Straße', + 'Stillfriederstraße': 'Stillfrieder Straße', + 'Harraserstraße': 'Harraser Straße', + 'Gänserndorferstraße': 'Gänserdorfer Straße', + 'Hofrat Döltlstraße': 'Hofrat-Döltl-Straße', + 'Sulzerstraße': 'Sulzer Straße', + 'Brünnerstraße': 'Brünner Straße', + 'Flustraße': 'Flurstraße', + 'Wienerstraße': 'Wiener Straße', + 'St.Laurentstraße': 'St.-Laurentstraße', + 'Angernerstraße': 'Angerner Straße', + 'Schweinbartherstraße': 'Schweinbarther Straße', + 'Hohenruppersdorferstraße': 'Hohenruppersdorfer Straße', + 'Gruberhauptstraße': 'Gruber Hauptstraße', + 'Josef Seitzstraße': 'Josef-Seitz-Straße', + 'Auersthalerstraße': 'Auerstahler Straße', + 'Ollersdorferstraße': 'Ollersdorfer Straße', + 'Ritter Zoppelstraße': 'Ritter-Zoppel-Straße', + 'Spannbergerstraße': 'Spannberger Straße', + 'Ritter Zoppel Straße': 'Ritter-Zoppel-Straße', + 'R. Virchow-Straße': 'Rudolf-Virchow-Straße', + 'Ebenthalerstraße': 'Ebenthaler Straße', + 'Bockfließerstraße': 'Bockfließer Straße', + 'Dörfleserstraße': 'Dörfleser Straße', + 'Dörflesserstraße': 'Dörfleser Straße', + 'Grubere Hauptstraße': 'Gruber Hauptstraße', + 'Groß Inzersdorf': 'Großinzersdorf', +} def parse_csv(filename: str) -> Iterator[Dict[str, Any]]: @@ -34,7 +72,7 @@ def parse_csv(filename: str) -> Iterator[Dict[str, Any]]: part = False elif part.isdigit(): part = int(part) - elif re.match('\d+\.\d+', part): + elif re.match('[0-9]+\.[0-9]+', part): part = float(part) elif len(part) == 10 and part[4] == '-' and part[7] == '-': part = datetime.datetime.strptime(part, '%Y-%m-%d').date() @@ -71,6 +109,10 @@ def invalid(mgnr: int, key: str, value: str) -> None: print(f'\x1B[1;31m{mgnr:>5}: {key} {value}\x1B[0m', file=sys.stderr) +def convert(mgnr: int, key: str, old_value: str, new_value: str) -> None: + print(f'\x1B[1m{mgnr:>5}: {key} "{old_value}" -> "{new_value}"\x1B[0m', file=sys.stderr) + + def check_lfbis_nr(nr: str) -> bool: # https://statistik.at/fileadmin/shared/QM/Standarddokumentationen/RW/std_r_land-forstw_register.pdf#page=41 if len(nr) != 7 or not nr.isdigit(): @@ -112,6 +154,13 @@ def generate_iban_at(blz: int, ktonr: str) -> str: return iban.replace('00', f'{s:02}', 1) +def normalize_phone_nr(nr: str) -> str: + nr = re.sub('[ /-]', '', nr) + if nr[0] == '0': + nr = '+43' + nr[1:] + return nr + + def parse_branches(in_dir: str) -> Dict[str, Any]: branches = {} for b in parse_csv(f'{in_dir}/TZweigstellen.csv'): @@ -124,9 +173,11 @@ def migrate_members(in_dir: str, out_dir: str) -> None: branches = parse_branches(in_dir) with open(f'{out_dir}/member.csv', 'w+') as f_m, open(f'{out_dir}/member_billing_address.csv', 'w+') as f_mba: f_m.write('mgnr;predecessor_mgnr;prefix;given_name;middle_names;family_name;suffix;' - 'birthday;entry_date;exit_date;business_shares;accounting_nr;zwstid;' - 'lfbis_nr;ustid;volllieferant;buchführend;funktionär;active;iban;bic;' - 'country;postal_dest;address;email;phone_landline;phone_mobile;default_kgnr;comment\n') + 'birthday;entry_date;exit_date;business_shares;accounting_nr;zwstid;' + 'lfbis_nr;ustid;volllieferant;buchführend;funktionär;active;iban;bic;' + 'country;postal_dest;address;' + 'email;phone_landline;phone_mobile_1;phone_mobile_2;' + 'default_kgnr;comment\n') f_mba.write('mgr;name;country;postal_dest;address\n') for m in members: mgnr: int = m['MGNR'] @@ -263,14 +314,75 @@ def migrate_members(in_dir: str, out_dir: str) -> None: if not BIC_RE.fullmatch(bic): invalid(mgnr, 'BIC', bic) bic = None + if bic is not None: + if len(bic) == 11 and bic.endswith('XXX'): + bic = bic[:-3] + + ort: Optional[str] = m['Ort'] + address: Optional[str] = m['Straße'] + if address is not None: + address_old = address + address = re.sub('([0-9])([A-Z])', lambda m: m.group(1) + m.group(2).lower(), re.sub('\s+', ' ', address).strip().title()) + address = address.replace('strasse', 'straße').replace('strassse', 'straße')\ + .replace('Strasse', 'Straße').replace('Str.', 'Straße')\ + .replace('str.', 'straße').replace('ster.', 'straße').replace('g. ', 'gasse ')\ + .replace('Gross', 'Groß').replace('Bockfliess', 'Bockfließ').replace('Weiss', 'Weiß')\ + .replace('Preussen', 'Preußen').replace('Schloss', 'Schloß').replace('luss', 'luß')\ + .replace('Haupstraße', 'Hauptstraße') + address = re.sub('([a-z])([0-9])', lambda m: m.group(1) + ' ' + m.group(2), address) + if address.startswith('Nr. ') or address.startswith('Nr ') or address.isdigit(): + address = ort.title() + ' ' + address.split(' ')[-1] + elif address.startswith('Ob. '): + address = address.replace('Ob. ', 'Obere ', 1) + address = address.replace(' Nr. ', ' ') + address = re.sub(r'([^0-9]+?)( [0-9])', lambda m: STREET_NAMES.get(m.group(1), m.group(1)) + m.group(2), address) + if address_old != address: + convert(mgnr, 'Adresse', address_old, address) + + phone_1: Optional[str] = m['Telefon'] + phone_2: Optional[str] = m['Mobiltelefon'] + email: Optional[str] = m['EMail'] + phone_landline = None + phone_mobile = [] + + if email is not None: + if email.isupper(): + email = email.lower() + if not EMAIL_RE.fullmatch(email): + invalid(mgnr, 'E-Mail', m['EMail']) + email = None + + if phone_1: + phone_1 = normalize_phone_nr(phone_1) + if len(phone_1) <= 8 or phone_1[0] != '+': + invalid(mgnr, 'Tel.Nr.', m['Telefon']) + else: + if phone_1[3] == '6': + phone_mobile.append(phone_1) + else: + phone_landline = phone_1 + if phone_2: + phone_2 = normalize_phone_nr(phone_2) + if len(phone_2) <= 8 or phone_2[0] != '+': + invalid(mgnr, 'Tel.Nr.', m['Mobiltelefon']) + else: + if phone_2[3] == '6': + phone_mobile.append(phone_2) + elif phone_landline is None: + phone_landline = phone_2 + elif phone_landline != phone_2: + invalid(mgnr, 'Tel.Nr.', phone_2) + + zwstid = m['ZNR'] and branches[m['ZNR']]['Kennbst'] or len(branches) == 1 and list(branches.values())[0]['Kennbst'] - #print(m) f_m.write(format_row( mgnr, m['MGNR-Vorgänger'], prefix, given_name, middle_names, family_name, suffix, m['Geburtsjahr'], m['Eintrittsdatum'], m['Austrittsdatum'], m['Geschäftsanteile1'], - m['BHKontonummer'], m['ZNR'] and branches[m['ZNR']]['Kennbst'], bnr, ustid, + m['BHKontonummer'], zwstid, bnr, ustid, m['Volllieferant'] or False, m['Buchführend'] or False, False, m['Aktives Mitglied'] or False, - iban, bic, 'AT', + iban, bic, 'AT', None, address, email, phone_landline, + phone_mobile[0] if len(phone_mobile) > 0 else None, phone_mobile[1] if len(phone_mobile) > 1 else None, + None, m['Anmerkung'] )) if billing_name: f_mba.write(format_row(mgnr, billing_name, 'AT', None, None))