#!/bin/env python3 from typing import List, Tuple import argparse import requests import re import xlrd import tempfile import os URL = 'https://www.post.at/g/c/postlexikon' BUTTON = re.compile(r'title="PLZ Bestimmungsort" href="(.*?)"') Row = Tuple[int, str, int, str, int, str] def get_excel_url() -> str: r = requests.get(URL, headers={'User-Agent': 'Mozilla/5.0'}) if r.status_code != 200: raise RuntimeError(f'Unexpected response: {r.status_code} {r.reason}') matches = BUTTON.findall(r.text) if len(matches) == 0: raise RuntimeError('Unable to find url of file') return matches[0] def download_excel() -> List[Row]: f_name = None try: f = tempfile.NamedTemporaryFile(delete=False) with requests.get(get_excel_url(), stream=True) as r: if r.status_code != 200: raise RuntimeError(f'Unexpected response: {r.status_code} {r.reason}') for chunk in r.iter_content(chunk_size=8192): f.write(chunk) f_name = f.name f.close() rows = [] wb = xlrd.open_workbook(f_name) sheet = wb.sheet_by_index(0) for r in range(1, sheet.nrows): row = sheet.row_values(r) rows.append((int(row[0]), row[1], int(row[2]), row[3], int(row[4]), row[5])) return rows finally: if f_name is not None: os.remove(f_name) def write_sql(data: List[Row]) -> None: with open('plz.sql', 'wb') as f: f.write(b"\nINSERT INTO AT_plz VALUES\n") for plz, dest, okz, _, _, _ in data: f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf8')) f.seek(-2, 1) f.write(b';\n') if __name__ == '__main__': parser = argparse.ArgumentParser() args = parser.parse_args() print('Downloading PLZ data from www.post.at') write_sql(download_excel()) print('Successfully created plz.sql!')