#!/bin/env python3 from typing import List, Tuple import argparse import requests import re import xlrd import tempfile URL = 'https://www.post.at/g/c/postlexikon' BUTTON = re.compile(r'title="PLZ Bestimmungsort" href="(.*?)"') def get_excel_url() -> str: r = requests.get(URL, headers={'User-Agent': 'Mozilla/5.0'}) if r.status_code != 200: raise RuntimeError(f'Unexpected response: {r.status_code} {r.reason}') matches = BUTTON.findall(r.text) if len(matches) == 0: raise RuntimeError('Unable to find url of file') return matches[0] def download_excel() -> List[Tuple[int, str, int, str, int, str]]: with tempfile.NamedTemporaryFile() as f: with requests.get(get_excel_url(), stream=True) as r: if r.status_code != 200: raise RuntimeError(f'Unexpected response: {r.status_code} {r.reason}') for chunk in r.iter_content(chunk_size=8192): f.write(chunk) rows = [] wb = xlrd.open_workbook(f.name) sheet = wb.sheet_by_index(0) for r in range(1, sheet.nrows): row = sheet.row_values(r) rows.append((int(row[0]), row[1], int(row[2]), row[3], int(row[4]), row[5])) return rows def write_sql(data: List[Tuple[int, str, int, str, int, str]]) -> None: gemeinden = {gkz: name for _, _, _, _, gkz, name in data} orte = {okz: (name, gkz) for _, _, okz, name, gkz, _ in data} with open('plz.sql', 'wb') as f: f.write(b'\n') f.write(b"INSERT INTO AT_gemeinde VALUES\n") for gem_nr, gem_name in gemeinden.items(): f.write(f"({gem_nr}, {gem_nr // 10000}, '{gem_name}'),\n".encode('utf8')) f.seek(-2, 1) f.write(b';\n\n') f.write(b"INSERT INTO AT_ort VALUES\n") for okz, (name, gkz) in orte.items(): f.write(f"({okz:5}, {gkz}, '{name}'),\n".encode('utf8')) f.seek(-2, 1) f.write(b';\n\n') f.write(b"INSERT INTO AT_plz VALUES\n") for plz, dest, okz, _, _, _ in data: f.write(f"({plz}, {okz:5}, {100000 * plz + okz}, '{dest}'),\n".encode('utf8')) f.seek(-2, 1) f.write(b';\n\n') if __name__ == '__main__': parser = argparse.ArgumentParser() args = parser.parse_args() write_sql(download_excel())