Parse phone numbers, email and addresses
This commit is contained in:
@ -271,9 +271,10 @@ CREATE TABLE member (
|
|||||||
postal_dest TEXT NOT NULL,
|
postal_dest TEXT NOT NULL,
|
||||||
address TEXT NOT NULL,
|
address TEXT NOT NULL,
|
||||||
|
|
||||||
email TEXT CHECK (email REGEXP '^[^@]+@([a-z0-9_\x2Däöüß]+\.)[a-z]{2,}$') DEFAULT NULL,
|
email TEXT CHECK (email REGEXP '^[^@\s]+@([a-z0-9_\x2Däöüß]+\.)+[a-z]{2,}$') DEFAULT NULL,
|
||||||
phone_landline TEXT CHECK (phone_landline REGEXP '^\+[0-9]+$') DEFAULT NULL,
|
phone_landline TEXT CHECK (phone_landline REGEXP '^\+[0-9]+$') DEFAULT NULL,
|
||||||
phone_mobile TEXT CHECK (phone_mobile REGEXP '^\+[0-9]+$') DEFAULT NULL,
|
phone_mobile_1 TEXT CHECK (phone_mobile_1 REGEXP '^\+[0-9]+$') DEFAULT NULL,
|
||||||
|
phone_mobile_2 TEXT CHECK (phone_mobile_2 REGEXP '^\+[0-9]+$') DEFAULT NULL,
|
||||||
|
|
||||||
default_kgnr INTEGER NOT NULL,
|
default_kgnr INTEGER NOT NULL,
|
||||||
comment TEXT DEFAULT NULL,
|
comment TEXT DEFAULT NULL,
|
||||||
|
@ -11,6 +11,44 @@ import sys
|
|||||||
USTID_RE = re.compile('[A-Z]{2}[A-Z0-9]{2,12}')
|
USTID_RE = re.compile('[A-Z]{2}[A-Z0-9]{2,12}')
|
||||||
BIC_RE = re.compile('[A-Z0-9]{4}[A-Z]{2}[A-Z0-9]{2}([A-Z0-9]{3})?')
|
BIC_RE = re.compile('[A-Z0-9]{4}[A-Z]{2}[A-Z0-9]{2}([A-Z0-9]{3})?')
|
||||||
IBAN_RE = re.compile('[A-Z]{2}[0-9]{2}[A-Z0-9]{8,30}')
|
IBAN_RE = re.compile('[A-Z]{2}[0-9]{2}[A-Z0-9]{8,30}')
|
||||||
|
EMAIL_RE = re.compile('[^@\s]+@([a-z0-9_äöüß-]+\.)+[a-z]{2,}')
|
||||||
|
|
||||||
|
|
||||||
|
STREET_NAMES = {
|
||||||
|
'Hans-Wagnerstraße': 'Hans-Wagner-Straße',
|
||||||
|
'J.Seitzstraße': 'Josef-Seitz-Straße',
|
||||||
|
'Kurhaus-Str.': 'Kurhausstraße',
|
||||||
|
'Kurhaus-Straße': 'Kurhausstraße',
|
||||||
|
'Pirawartherstraße': 'Pirawarther Straße',
|
||||||
|
'Raggendorferstraße': 'Raggendorfer Straße',
|
||||||
|
'Matznerstraße': 'Matzner Straße',
|
||||||
|
'Stillfriederstraße': 'Stillfrieder Straße',
|
||||||
|
'Harraserstraße': 'Harraser Straße',
|
||||||
|
'Gänserndorferstraße': 'Gänserdorfer Straße',
|
||||||
|
'Hofrat Döltlstraße': 'Hofrat-Döltl-Straße',
|
||||||
|
'Sulzerstraße': 'Sulzer Straße',
|
||||||
|
'Brünnerstraße': 'Brünner Straße',
|
||||||
|
'Flustraße': 'Flurstraße',
|
||||||
|
'Wienerstraße': 'Wiener Straße',
|
||||||
|
'St.Laurentstraße': 'St.-Laurentstraße',
|
||||||
|
'Angernerstraße': 'Angerner Straße',
|
||||||
|
'Schweinbartherstraße': 'Schweinbarther Straße',
|
||||||
|
'Hohenruppersdorferstraße': 'Hohenruppersdorfer Straße',
|
||||||
|
'Gruberhauptstraße': 'Gruber Hauptstraße',
|
||||||
|
'Josef Seitzstraße': 'Josef-Seitz-Straße',
|
||||||
|
'Auersthalerstraße': 'Auerstahler Straße',
|
||||||
|
'Ollersdorferstraße': 'Ollersdorfer Straße',
|
||||||
|
'Ritter Zoppelstraße': 'Ritter-Zoppel-Straße',
|
||||||
|
'Spannbergerstraße': 'Spannberger Straße',
|
||||||
|
'Ritter Zoppel Straße': 'Ritter-Zoppel-Straße',
|
||||||
|
'R. Virchow-Straße': 'Rudolf-Virchow-Straße',
|
||||||
|
'Ebenthalerstraße': 'Ebenthaler Straße',
|
||||||
|
'Bockfließerstraße': 'Bockfließer Straße',
|
||||||
|
'Dörfleserstraße': 'Dörfleser Straße',
|
||||||
|
'Dörflesserstraße': 'Dörfleser Straße',
|
||||||
|
'Grubere Hauptstraße': 'Gruber Hauptstraße',
|
||||||
|
'Groß Inzersdorf': 'Großinzersdorf',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def parse_csv(filename: str) -> Iterator[Dict[str, Any]]:
|
def parse_csv(filename: str) -> Iterator[Dict[str, Any]]:
|
||||||
@ -34,7 +72,7 @@ def parse_csv(filename: str) -> Iterator[Dict[str, Any]]:
|
|||||||
part = False
|
part = False
|
||||||
elif part.isdigit():
|
elif part.isdigit():
|
||||||
part = int(part)
|
part = int(part)
|
||||||
elif re.match('\d+\.\d+', part):
|
elif re.match('[0-9]+\.[0-9]+', part):
|
||||||
part = float(part)
|
part = float(part)
|
||||||
elif len(part) == 10 and part[4] == '-' and part[7] == '-':
|
elif len(part) == 10 and part[4] == '-' and part[7] == '-':
|
||||||
part = datetime.datetime.strptime(part, '%Y-%m-%d').date()
|
part = datetime.datetime.strptime(part, '%Y-%m-%d').date()
|
||||||
@ -71,6 +109,10 @@ def invalid(mgnr: int, key: str, value: str) -> None:
|
|||||||
print(f'\x1B[1;31m{mgnr:>5}: {key} {value}\x1B[0m', file=sys.stderr)
|
print(f'\x1B[1;31m{mgnr:>5}: {key} {value}\x1B[0m', file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def convert(mgnr: int, key: str, old_value: str, new_value: str) -> None:
|
||||||
|
print(f'\x1B[1m{mgnr:>5}: {key} "{old_value}" -> "{new_value}"\x1B[0m', file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
def check_lfbis_nr(nr: str) -> bool:
|
def check_lfbis_nr(nr: str) -> bool:
|
||||||
# https://statistik.at/fileadmin/shared/QM/Standarddokumentationen/RW/std_r_land-forstw_register.pdf#page=41
|
# https://statistik.at/fileadmin/shared/QM/Standarddokumentationen/RW/std_r_land-forstw_register.pdf#page=41
|
||||||
if len(nr) != 7 or not nr.isdigit():
|
if len(nr) != 7 or not nr.isdigit():
|
||||||
@ -112,6 +154,13 @@ def generate_iban_at(blz: int, ktonr: str) -> str:
|
|||||||
return iban.replace('00', f'{s:02}', 1)
|
return iban.replace('00', f'{s:02}', 1)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_phone_nr(nr: str) -> str:
|
||||||
|
nr = re.sub('[ /-]', '', nr)
|
||||||
|
if nr[0] == '0':
|
||||||
|
nr = '+43' + nr[1:]
|
||||||
|
return nr
|
||||||
|
|
||||||
|
|
||||||
def parse_branches(in_dir: str) -> Dict[str, Any]:
|
def parse_branches(in_dir: str) -> Dict[str, Any]:
|
||||||
branches = {}
|
branches = {}
|
||||||
for b in parse_csv(f'{in_dir}/TZweigstellen.csv'):
|
for b in parse_csv(f'{in_dir}/TZweigstellen.csv'):
|
||||||
@ -126,7 +175,9 @@ def migrate_members(in_dir: str, out_dir: str) -> None:
|
|||||||
f_m.write('mgnr;predecessor_mgnr;prefix;given_name;middle_names;family_name;suffix;'
|
f_m.write('mgnr;predecessor_mgnr;prefix;given_name;middle_names;family_name;suffix;'
|
||||||
'birthday;entry_date;exit_date;business_shares;accounting_nr;zwstid;'
|
'birthday;entry_date;exit_date;business_shares;accounting_nr;zwstid;'
|
||||||
'lfbis_nr;ustid;volllieferant;buchführend;funktionär;active;iban;bic;'
|
'lfbis_nr;ustid;volllieferant;buchführend;funktionär;active;iban;bic;'
|
||||||
'country;postal_dest;address;email;phone_landline;phone_mobile;default_kgnr;comment\n')
|
'country;postal_dest;address;'
|
||||||
|
'email;phone_landline;phone_mobile_1;phone_mobile_2;'
|
||||||
|
'default_kgnr;comment\n')
|
||||||
f_mba.write('mgr;name;country;postal_dest;address\n')
|
f_mba.write('mgr;name;country;postal_dest;address\n')
|
||||||
for m in members:
|
for m in members:
|
||||||
mgnr: int = m['MGNR']
|
mgnr: int = m['MGNR']
|
||||||
@ -263,14 +314,75 @@ def migrate_members(in_dir: str, out_dir: str) -> None:
|
|||||||
if not BIC_RE.fullmatch(bic):
|
if not BIC_RE.fullmatch(bic):
|
||||||
invalid(mgnr, 'BIC', bic)
|
invalid(mgnr, 'BIC', bic)
|
||||||
bic = None
|
bic = None
|
||||||
|
if bic is not None:
|
||||||
|
if len(bic) == 11 and bic.endswith('XXX'):
|
||||||
|
bic = bic[:-3]
|
||||||
|
|
||||||
|
ort: Optional[str] = m['Ort']
|
||||||
|
address: Optional[str] = m['Straße']
|
||||||
|
if address is not None:
|
||||||
|
address_old = address
|
||||||
|
address = re.sub('([0-9])([A-Z])', lambda m: m.group(1) + m.group(2).lower(), re.sub('\s+', ' ', address).strip().title())
|
||||||
|
address = address.replace('strasse', 'straße').replace('strassse', 'straße')\
|
||||||
|
.replace('Strasse', 'Straße').replace('Str.', 'Straße')\
|
||||||
|
.replace('str.', 'straße').replace('ster.', 'straße').replace('g. ', 'gasse ')\
|
||||||
|
.replace('Gross', 'Groß').replace('Bockfliess', 'Bockfließ').replace('Weiss', 'Weiß')\
|
||||||
|
.replace('Preussen', 'Preußen').replace('Schloss', 'Schloß').replace('luss', 'luß')\
|
||||||
|
.replace('Haupstraße', 'Hauptstraße')
|
||||||
|
address = re.sub('([a-z])([0-9])', lambda m: m.group(1) + ' ' + m.group(2), address)
|
||||||
|
if address.startswith('Nr. ') or address.startswith('Nr ') or address.isdigit():
|
||||||
|
address = ort.title() + ' ' + address.split(' ')[-1]
|
||||||
|
elif address.startswith('Ob. '):
|
||||||
|
address = address.replace('Ob. ', 'Obere ', 1)
|
||||||
|
address = address.replace(' Nr. ', ' ')
|
||||||
|
address = re.sub(r'([^0-9]+?)( [0-9])', lambda m: STREET_NAMES.get(m.group(1), m.group(1)) + m.group(2), address)
|
||||||
|
if address_old != address:
|
||||||
|
convert(mgnr, 'Adresse', address_old, address)
|
||||||
|
|
||||||
|
phone_1: Optional[str] = m['Telefon']
|
||||||
|
phone_2: Optional[str] = m['Mobiltelefon']
|
||||||
|
email: Optional[str] = m['EMail']
|
||||||
|
phone_landline = None
|
||||||
|
phone_mobile = []
|
||||||
|
|
||||||
|
if email is not None:
|
||||||
|
if email.isupper():
|
||||||
|
email = email.lower()
|
||||||
|
if not EMAIL_RE.fullmatch(email):
|
||||||
|
invalid(mgnr, 'E-Mail', m['EMail'])
|
||||||
|
email = None
|
||||||
|
|
||||||
|
if phone_1:
|
||||||
|
phone_1 = normalize_phone_nr(phone_1)
|
||||||
|
if len(phone_1) <= 8 or phone_1[0] != '+':
|
||||||
|
invalid(mgnr, 'Tel.Nr.', m['Telefon'])
|
||||||
|
else:
|
||||||
|
if phone_1[3] == '6':
|
||||||
|
phone_mobile.append(phone_1)
|
||||||
|
else:
|
||||||
|
phone_landline = phone_1
|
||||||
|
if phone_2:
|
||||||
|
phone_2 = normalize_phone_nr(phone_2)
|
||||||
|
if len(phone_2) <= 8 or phone_2[0] != '+':
|
||||||
|
invalid(mgnr, 'Tel.Nr.', m['Mobiltelefon'])
|
||||||
|
else:
|
||||||
|
if phone_2[3] == '6':
|
||||||
|
phone_mobile.append(phone_2)
|
||||||
|
elif phone_landline is None:
|
||||||
|
phone_landline = phone_2
|
||||||
|
elif phone_landline != phone_2:
|
||||||
|
invalid(mgnr, 'Tel.Nr.', phone_2)
|
||||||
|
|
||||||
|
zwstid = m['ZNR'] and branches[m['ZNR']]['Kennbst'] or len(branches) == 1 and list(branches.values())[0]['Kennbst']
|
||||||
|
|
||||||
#print(m)
|
|
||||||
f_m.write(format_row(
|
f_m.write(format_row(
|
||||||
mgnr, m['MGNR-Vorgänger'], prefix, given_name, middle_names, family_name, suffix,
|
mgnr, m['MGNR-Vorgänger'], prefix, given_name, middle_names, family_name, suffix,
|
||||||
m['Geburtsjahr'], m['Eintrittsdatum'], m['Austrittsdatum'], m['Geschäftsanteile1'],
|
m['Geburtsjahr'], m['Eintrittsdatum'], m['Austrittsdatum'], m['Geschäftsanteile1'],
|
||||||
m['BHKontonummer'], m['ZNR'] and branches[m['ZNR']]['Kennbst'], bnr, ustid,
|
m['BHKontonummer'], zwstid, bnr, ustid,
|
||||||
m['Volllieferant'] or False, m['Buchführend'] or False, False, m['Aktives Mitglied'] or False,
|
m['Volllieferant'] or False, m['Buchführend'] or False, False, m['Aktives Mitglied'] or False,
|
||||||
iban, bic, 'AT',
|
iban, bic, 'AT', None, address, email, phone_landline,
|
||||||
|
phone_mobile[0] if len(phone_mobile) > 0 else None, phone_mobile[1] if len(phone_mobile) > 1 else None,
|
||||||
|
None, m['Anmerkung']
|
||||||
))
|
))
|
||||||
if billing_name:
|
if billing_name:
|
||||||
f_mba.write(format_row(mgnr, billing_name, 'AT', None, None))
|
f_mba.write(format_row(mgnr, billing_name, 'AT', None, None))
|
||||||
|
Reference in New Issue
Block a user