diff --git a/.gitignore b/.gitignore index 485dee6..9625327 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .idea +data/*.sql diff --git a/data/plz.py b/data/plz.py new file mode 100755 index 0000000..df25e3a --- /dev/null +++ b/data/plz.py @@ -0,0 +1,73 @@ +#!/bin/env python3 + +from typing import List, Tuple +import argparse +import requests +import re +import xlrd +import tempfile + + +URL = 'https://www.post.at/g/c/postlexikon' +BUTTON = re.compile(r'title="PLZ Bestimmungsort" href="(.*?)"') + + +def get_excel_url() -> str: + r = requests.get(URL, headers={'User-Agent': 'Mozilla/5.0'}) + if r.status_code != 200: + raise RuntimeError(f'Unexpected response: {r.status_code} {r.reason}') + + matches = BUTTON.findall(r.text) + if len(matches) == 0: + raise RuntimeError('Unable to find url of file') + + return matches[0] + + +def download_excel() -> List[Tuple[int, str, int, str, int, str]]: + with tempfile.NamedTemporaryFile() as f: + with requests.get(get_excel_url(), stream=True) as r: + if r.status_code != 200: + raise RuntimeError(f'Unexpected response: {r.status_code} {r.reason}') + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + + rows = [] + wb = xlrd.open_workbook(f.name) + sheet = wb.sheet_by_index(0) + for r in range(1, sheet.nrows): + row = sheet.row_values(r) + rows.append((int(row[0]), row[1], int(row[2]), row[3], int(row[4]), row[5])) + return rows + + +def write_sql(data: List[Tuple[int, str, int, str, int, str]]) -> None: + gemeinden = {gkz: name for _, _, _, _, gkz, name in data} + orte = {okz: (name, gkz) for _, _, okz, name, gkz, _ in data} + + with open('plz.sql', 'wb') as f: + f.write(b'\n') + + f.write(b"INSERT INTO AT_gemeinde VALUES\n") + for gem_nr, gem_name in gemeinden.items(): + f.write(f"({gem_nr}, {gem_nr // 10000}, '{gem_name}'),\n".encode('utf8')) + f.seek(-2, 1) + f.write(b';\n\n') + + f.write(b"INSERT INTO AT_ort VALUES\n") + for okz, (name, gkz) in orte.items(): + f.write(f"({okz:5}, {gkz}, '{name}'),\n".encode('utf8')) + f.seek(-2, 1) + f.write(b';\n\n') + + f.write(b"INSERT INTO AT_plz VALUES\n") + for plz, dest, okz, _, _, _ in data: + f.write(f"({plz}, {okz:5}, {100000 * plz + okz}, '{dest}'),\n".encode('utf8')) + f.seek(-2, 1) + f.write(b';\n\n') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + args = parser.parse_args() + write_sql(download_excel()) diff --git a/sql/base.sql b/sql/base.sql index 4e0ab5f..54bbd5b 100644 --- a/sql/base.sql +++ b/sql/base.sql @@ -7,8 +7,9 @@ INSERT INTO country VALUES ('SK', 'SVK', 703, 'Slowakei', FALSE); INSERT INTO currency VALUES -('EUR', 'Euro', '€', 10000), -('ATS', 'Schilling', 'S', 137603); +('EUR', 'Euro', '€', 10000), +('ATS', 'Schilling', 'S', 137603), +('XXP', 'Punkte', 'Pkt.', NULL); INSERT INTO AT_bundesland VALUES (1, 'B', 'Bgld.', 'Burgenland'),