diff --git a/README.md b/README.md index ea3e142..940a7fb 100644 --- a/README.md +++ b/README.md @@ -34,12 +34,23 @@ python3 plz.py cd .. ```` +## Create temporary database + +This will create `wgtemp.sqlite3`. + +```shell +rm wgtemp.sqlite3 +echo ".q" | ./sqlite.sh wgtemp.sqlite3 +``` + ## Migrate +This will create `wgprod.sqlite3` and migrate all WG Master data into it. + ```shell cd wgmaster -python3 migrate.py -q TABLES_DIR MIGRATE_DIR -d DB.sqlite3 -python3 import.py MIGRATE_DIR NEW.sqlite3 +python3 migrate.py -q TABLES_DIR MIGRATE_DIR -d ../wgtemp.sqlite3 +python3 import.py MIGRATE_DIR ../wgprod.sqlite3 cd .. ``` diff --git a/data/plz.py b/data/plz.py index a540bb0..15f2cce 100755 --- a/data/plz.py +++ b/data/plz.py @@ -1,6 +1,6 @@ #!/bin/env python3 -from typing import List, Tuple +from typing import List, Tuple, Callable import argparse import requests import re @@ -12,7 +12,8 @@ import PyPDF2 PLZ_URL = 'https://www.post.at/g/c/postlexikon' -PLZ_BUTTON = re.compile(r'title="PLZ Bestimmungsort" href="(.*?)"') +PLZ_BUTTON = re.compile(r'title="PLZ Verzeichnis" href="(.*?)"') +PLZ_DEST_BUTTON = re.compile(r'title="PLZ Bestimmungsort" href="(.*?)"') KGV_URL = 'https://www.bev.gv.at/Services/Downloads/Produktbezogene-Downloads/Unentgeltliche-Produkte/Kataster-Verzeichnisse/Katastralgemeindenverzeichnis.html' KGV_LINK = re.compile(r'Katastral') @@ -31,10 +32,23 @@ CODES = re.compile(r'Agh|Alm|Alpe|B|Bd|Bwg|Burg|Camp|D|E|Fbk|Fhei|Gh|Hgr|Hot|Ind STRIP_CODE = re.compile(r' *(' + CODES.pattern + r')[()X0-9. -]*$') STRIP_CODE_ALL = re.compile(r' +\b(' + CODES.pattern + r')\b.*$') -PlzRow = Tuple[int, str, int, str, int, str] +PlzRow = Tuple[int, str, int, str, bool, bool, bool] +PlzDestRow = Tuple[int, str, int, str, int, str] KgvRow = Tuple[int, str, int, str] OvRow = Tuple[int, int, str] +PLZ_BUNDESLAND = { + 'B': 1, + 'K': 2, + 'N': 3, + 'O': 4, + 'Sa': 5, + 'St': 6, + 'T': 7, + 'V': 8, + 'W': 9, +} + WIEN_UMGEBUNG = { 32401: 30729, 32402: 30730, @@ -62,23 +76,23 @@ WIEN_UMGEBUNG = { GKZ = {} -def get_plz_url() -> str: +def get_plz_url(button: re.Pattern) -> str: r = requests.get(PLZ_URL, headers={'User-Agent': 'Mozilla/5.0'}) if r.status_code != 200: raise RuntimeError(f'Unexpected response: {r.status_code} {r.reason}') - matches = PLZ_BUTTON.findall(r.text) + matches = button.findall(r.text) if len(matches) == 0: raise RuntimeError('Unable to find url of file') return matches[0] -def download_plz() -> List[PlzRow]: +def download_excel(url: str, transform: Callable[[List[str]], Tuple]) -> List: f_name = None try: f = tempfile.NamedTemporaryFile(delete=False) - with requests.get(get_plz_url(), stream=True, headers={'User-Agent': 'Mozilla/5.0'}) as r: + with requests.get(url, stream=True, headers={'User-Agent': 'Mozilla/5.0'}) as r: if r.status_code != 200: raise RuntimeError(f'Unexpected response: {r.status_code} {r.reason}') for chunk in r.iter_content(chunk_size=8192): @@ -91,13 +105,24 @@ def download_plz() -> List[PlzRow]: sheet = wb.sheet_by_index(0) for r in range(1, sheet.nrows): row = sheet.row_values(r) - rows.append((int(row[0]), row[1], int(row[2]), row[3], int(row[4]), row[5])) + rows.append(transform(row)) return rows finally: if f_name is not None: os.remove(f_name) +def download_plz() -> List[PlzRow]: + return download_excel(get_plz_url(PLZ_BUTTON), + lambda r: (int(r[0]), r[1], PLZ_BUNDESLAND[r[2]], r[5], + r[6] == 'intern', r[7] == 'Ja', r[8] == 'Ja')) + + +def download_plz_dest() -> List[PlzDestRow]: + return download_excel(get_plz_url(PLZ_DEST_BUTTON), + lambda r: (int(r[0]), r[1], int(r[2]), r[3], int(r[4]), r[5])) + + def get_kvg_zip_url() -> str: r = requests.get(KGV_URL) if r.status_code != 200: @@ -244,9 +269,9 @@ def parse_ov() -> List[OvRow]: return rows -def write_sql(plz_rows: List[PlzRow], kgv_rows: List[KgvRow], ov_rows: List[OvRow]) -> None: +def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows: List[KgvRow], ov_rows: List[OvRow]) -> None: kgv = {kgnr: (kg_name, gkz, gem_name) for kgnr, kg_name, gkz, gem_name in kgv_rows} - ov = {okz: (o_name, gkz) for plz, _, okz, o_name, gkz, _ in plz_rows} + ov = {okz: (o_name, gkz) for plz, _, okz, o_name, gkz, _ in plz_dest_rows} ov.update({okz: (name, gkz) for gkz, okz, name in ov_rows}) gemeinden = {gkz: (gem_name, [], []) for kgnr, kg_name, gkz, gem_name in kgv_rows} @@ -297,7 +322,14 @@ def write_sql(plz_rows: List[PlzRow], kgv_rows: List[KgvRow], ov_rows: List[OvRo f.write(b';\n') f.write(b"\nINSERT INTO AT_plz VALUES\n") - for plz, dest, okz, _, _, _ in plz_rows: + for plz, ort, blnr, plz_type, internal, addr, po_box in plz_rows: + f.write(f"({plz:4}, '{ort}', {blnr}, '{plz_type}', {internal and 'TRUE' or 'FALSE'}, " + f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf8')) + f.seek(-2, 1) + f.write(b';\n') + + f.write(b"\nINSERT INTO AT_plz_dest VALUES\n") + for plz, dest, okz, _, _, _ in plz_dest_rows: f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf8')) f.seek(-2, 1) f.write(b';\n') @@ -330,8 +362,10 @@ if __name__ == '__main__': ov_data = parse_ov() print('Downloading PLZ data from www.post.at') plz_data = download_plz() + print('Downloading PLZ destination data from www.post.at') + plz_dest_data = download_plz_dest() print('Downloading Katastralgemeindenverzeichnis from www.bev.gv.at') kgv_data = download_kgv() print('Generating 90.plz.sql') - write_sql(plz_data, kgv_data, ov_data) + write_sql(plz_data, plz_dest_data, kgv_data, ov_data) print('Successfully created 90.plz.sql!') diff --git a/sql/91.plz-fix.sql b/sql/91.plz-fix.sql index 687dde3..bce3582 100644 --- a/sql/91.plz-fix.sql +++ b/sql/91.plz-fix.sql @@ -1,9 +1,9 @@ -INSERT INTO AT_plz (plz, okz, dest) +INSERT INTO AT_plz_dest (plz, okz, dest) VALUES (2241, 3560, 'Schönkirchen-Reyersdorf'), (2165, 5013, 'Drasenhofen'), (2134, 5115, 'Staatz-Kautendorf'); UPDATE AT_ort SET name = 'Etzmannsdorf am Kamp' WHERE okz = 3938; -DELETE FROM AT_plz WHERE (plz, okz) = (2231, 5011); +DELETE FROM AT_plz_dest WHERE (plz, okz) = (2231, 5011); diff --git a/sql/v01/10.create.sql b/sql/v01/10.create.sql index 168ffa6..e65be48 100644 --- a/sql/v01/10.create.sql +++ b/sql/v01/10.create.sql @@ -90,6 +90,22 @@ CREATE TABLE AT_ort ( ) STRICT; CREATE TABLE AT_plz ( + plz INTEGER NOT NULL CHECK (plz >= 1000 AND plz <= 9999), + + ort TEXT NOT NULL, + blnr INTEGER NOT NULL, + type TEXT NOT NULL, + internal INTEGER NOT NULL CHECK (internal IN (TRUE, FALSE)), + addressable INTEGER NOT NULL CHECK (addressable IN (TRUE, FALSE)), + po_box INTEGER NOT NULL CHECK (po_box IN (TRUE, FALSE)), + + CONSTRAINT pk_AT_plz PRIMARY KEY (plz), + CONSTRAINT fk_AT_plz_AT_bundesland FOREIGN KEY (blnr) REFERENCES AT_bundesland (blnr) + ON UPDATE CASCADE + ON DELETE RESTRICT +); + +CREATE TABLE AT_plz_dest ( plz INTEGER NOT NULL CHECK (plz >= 1000 AND plz <= 9999), okz INTEGER NOT NULL, @@ -98,30 +114,30 @@ CREATE TABLE AT_plz ( dest TEXT NOT NULL, - CONSTRAINT pk_AT_plz PRIMARY KEY (plz, okz), - CONSTRAINT sk_AT_plz_id UNIQUE (id), - CONSTRAINT fk_AT_plz_AT_ort FOREIGN KEY (okz) REFERENCES AT_ort (okz) + CONSTRAINT pk_AT_plz_dest PRIMARY KEY (plz, okz), + CONSTRAINT sk_AT_plz_dest_id UNIQUE (id), + CONSTRAINT fk_AT_plz_dest_AT_ort FOREIGN KEY (okz) REFERENCES AT_ort (okz) ON UPDATE CASCADE ON DELETE RESTRICT, - CONSTRAINT fk_AT_plz_postal_dest FOREIGN KEY (country, id) REFERENCES postal_dest (country, id) + CONSTRAINT fk_AT_plz_dest_postal_dest FOREIGN KEY (country, id) REFERENCES postal_dest (country, id) ON UPDATE CASCADE ON DELETE CASCADE ) STRICT; -CREATE TRIGGER t_AT_plz_i - AFTER INSERT ON AT_plz FOR EACH ROW +CREATE TRIGGER t_AT_plz_dest_i + AFTER INSERT ON AT_plz_dest FOR EACH ROW BEGIN INSERT INTO postal_dest (country, id) VALUES (NEW.country, NEW.id); END; -CREATE TRIGGER t_AT_plz_u - AFTER UPDATE OF id ON AT_plz FOR EACH ROW +CREATE TRIGGER t_AT_plz_dest_u + AFTER UPDATE OF id ON AT_plz_dest FOR EACH ROW BEGIN UPDATE postal_dest SET country = NEW.country, id = NEW.id WHERE (country, id) = (OLD.country, OLD.id); END; -CREATE TRIGGER t_AT_plz_d - AFTER DELETE ON AT_plz FOR EACH ROW +CREATE TRIGGER t_AT_plz_dest_d + AFTER DELETE ON AT_plz_dest FOR EACH ROW BEGIN DELETE FROM postal_dest WHERE (country, id) = (OLD.country, OLD.id); END; diff --git a/sql/v01/20.view.sql b/sql/v01/20.view.sql index 5e5dac3..d5d555d 100644 --- a/sql/v01/20.view.sql +++ b/sql/v01/20.view.sql @@ -3,4 +3,4 @@ CREATE VIEW v_plz AS SELECT plz, p.dest AS bestimmungsort, g.name AS gemeinde, g.gkz, o.name AS ort, o.okz FROM AT_gem g JOIN AT_ort o ON o.gkz = g.gkz - JOIN AT_plz p ON p.okz = o.okz; + JOIN AT_plz_dest p ON p.okz = o.okz; diff --git a/tt/STE.csv b/tt/STE.csv index e32aa5b..bd7d2fa 100644 --- a/tt/STE.csv +++ b/tt/STE.csv @@ -49,3 +49,4 @@ date;from;to;hours;category;name 2023-03-13;16:00;18:15;2.75;wpf;Member List Window 2023-03-13;21:00;23:30;2.5;import;import.py 2023-03-14;02:00;02:45;0.75;wpf;Entity Framework +2023-03-14;10:00;11:30;1.5;database;AT PLZ Dest diff --git a/wgmaster/migrate.py b/wgmaster/migrate.py index 41951ea..0e50d4e 100755 --- a/wgmaster/migrate.py +++ b/wgmaster/migrate.py @@ -153,7 +153,7 @@ def lookup_plz(plz: Optional[int], ort: Optional[str], address: Optional[str] = return None cur = DB_CNX.cursor() - cur.execute("SELECT o.okz, p.dest, o.name FROM AT_plz p JOIN AT_ort o ON o.okz = p.okz WHERE plz = ?", (plz,)) + cur.execute("SELECT o.okz, p.dest, o.name FROM AT_plz_dest p JOIN AT_ort o ON o.okz = p.okz WHERE plz = ?", (plz,)) rows: List[Tuple[int, str, str]] = cur.fetchall() cur.close() @@ -176,8 +176,7 @@ def lookup_plz(plz: Optional[int], ort: Optional[str], address: Optional[str] = # Götzendorf return plz * 100000 + 3571 - print(ort, address) - raise RuntimeError() + raise RuntimeError(f'PLZ not found ({plz} {ort}, {address})') def lookup_kgnr(okz: Optional[int]) -> Optional[int]: @@ -226,10 +225,10 @@ def migrate_branches(in_dir: str, out_dir: str) -> None: with open(f'{out_dir}/branch.csv', 'w+') as f: f.write('zwstid;name;country;postal_dest;address;phone_nr\n') for b in csv.parse(f'{in_dir}/TZweigstellen.csv'): - BRANCH_MAP[b['ZNR']] = b['Kennbst'].strip().title() + BRANCH_MAP[b['ZNR']] = b['Kennbst'] address = b['Straße'] postal_dest = lookup_plz(int(b['PLZ']) if b['PLZ'] else None, b['Ort'], address) - f.write(csv.format_row(b['Kennbst'], b['Name'], 'AT', postal_dest, address, b['Telefon'])) + f.write(csv.format_row(b['Kennbst'], b['Name'].strip().title(), 'AT', postal_dest, address, b['Telefon'])) def migrate_grosslagen(in_dir: str, out_dir: str) -> None: