Add AT_plz_dest table

This commit is contained in:
2023-03-14 11:25:18 +01:00
parent 783396a0b0
commit 827fcb517b
6 changed files with 92 additions and 32 deletions

View File

@ -1,6 +1,6 @@
#!/bin/env python3
from typing import List, Tuple
from typing import List, Tuple, Callable
import argparse
import requests
import re
@ -12,7 +12,8 @@ import PyPDF2
PLZ_URL = 'https://www.post.at/g/c/postlexikon'
PLZ_BUTTON = re.compile(r'title="PLZ Bestimmungsort" href="(.*?)"')
PLZ_BUTTON = re.compile(r'title="PLZ Verzeichnis" href="(.*?)"')
PLZ_DEST_BUTTON = re.compile(r'title="PLZ Bestimmungsort" href="(.*?)"')
KGV_URL = 'https://www.bev.gv.at/Services/Downloads/Produktbezogene-Downloads/Unentgeltliche-Produkte/Kataster-Verzeichnisse/Katastralgemeindenverzeichnis.html'
KGV_LINK = re.compile(r'<a\s+href="(.*?)".*?>Katastral')
@ -31,10 +32,23 @@ CODES = re.compile(r'Agh|Alm|Alpe|B|Bd|Bwg|Burg|Camp|D|E|Fbk|Fhei|Gh|Hgr|Hot|Ind
STRIP_CODE = re.compile(r' *(' + CODES.pattern + r')[()X0-9. -]*$')
STRIP_CODE_ALL = re.compile(r' +\b(' + CODES.pattern + r')\b.*$')
PlzRow = Tuple[int, str, int, str, int, str]
PlzRow = Tuple[int, str, int, str, bool, bool, bool]
PlzDestRow = Tuple[int, str, int, str, int, str]
KgvRow = Tuple[int, str, int, str]
OvRow = Tuple[int, int, str]
PLZ_BUNDESLAND = {
'B': 1,
'K': 2,
'N': 3,
'O': 4,
'Sa': 5,
'St': 6,
'T': 7,
'V': 8,
'W': 9,
}
WIEN_UMGEBUNG = {
32401: 30729,
32402: 30730,
@ -62,23 +76,23 @@ WIEN_UMGEBUNG = {
GKZ = {}
def get_plz_url() -> str:
def get_plz_url(button: re.Pattern) -> str:
r = requests.get(PLZ_URL, headers={'User-Agent': 'Mozilla/5.0'})
if r.status_code != 200:
raise RuntimeError(f'Unexpected response: {r.status_code} {r.reason}')
matches = PLZ_BUTTON.findall(r.text)
matches = button.findall(r.text)
if len(matches) == 0:
raise RuntimeError('Unable to find url of file')
return matches[0]
def download_plz() -> List[PlzRow]:
def download_excel(url: str, transform: Callable[[List[str]], Tuple]) -> List:
f_name = None
try:
f = tempfile.NamedTemporaryFile(delete=False)
with requests.get(get_plz_url(), stream=True, headers={'User-Agent': 'Mozilla/5.0'}) as r:
with requests.get(url, stream=True, headers={'User-Agent': 'Mozilla/5.0'}) as r:
if r.status_code != 200:
raise RuntimeError(f'Unexpected response: {r.status_code} {r.reason}')
for chunk in r.iter_content(chunk_size=8192):
@ -91,13 +105,24 @@ def download_plz() -> List[PlzRow]:
sheet = wb.sheet_by_index(0)
for r in range(1, sheet.nrows):
row = sheet.row_values(r)
rows.append((int(row[0]), row[1], int(row[2]), row[3], int(row[4]), row[5]))
rows.append(transform(row))
return rows
finally:
if f_name is not None:
os.remove(f_name)
def download_plz() -> List[PlzRow]:
return download_excel(get_plz_url(PLZ_BUTTON),
lambda r: (int(r[0]), r[1], PLZ_BUNDESLAND[r[2]], r[5],
r[6] == 'intern', r[7] == 'Ja', r[8] == 'Ja'))
def download_plz_dest() -> List[PlzDestRow]:
return download_excel(get_plz_url(PLZ_DEST_BUTTON),
lambda r: (int(r[0]), r[1], int(r[2]), r[3], int(r[4]), r[5]))
def get_kvg_zip_url() -> str:
r = requests.get(KGV_URL)
if r.status_code != 200:
@ -244,9 +269,9 @@ def parse_ov() -> List[OvRow]:
return rows
def write_sql(plz_rows: List[PlzRow], kgv_rows: List[KgvRow], ov_rows: List[OvRow]) -> None:
def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows: List[KgvRow], ov_rows: List[OvRow]) -> None:
kgv = {kgnr: (kg_name, gkz, gem_name) for kgnr, kg_name, gkz, gem_name in kgv_rows}
ov = {okz: (o_name, gkz) for plz, _, okz, o_name, gkz, _ in plz_rows}
ov = {okz: (o_name, gkz) for plz, _, okz, o_name, gkz, _ in plz_dest_rows}
ov.update({okz: (name, gkz) for gkz, okz, name in ov_rows})
gemeinden = {gkz: (gem_name, [], []) for kgnr, kg_name, gkz, gem_name in kgv_rows}
@ -297,7 +322,14 @@ def write_sql(plz_rows: List[PlzRow], kgv_rows: List[KgvRow], ov_rows: List[OvRo
f.write(b';\n')
f.write(b"\nINSERT INTO AT_plz VALUES\n")
for plz, dest, okz, _, _, _ in plz_rows:
for plz, ort, blnr, plz_type, internal, addr, po_box in plz_rows:
f.write(f"({plz:4}, '{ort}', {blnr}, '{plz_type}', {internal and 'TRUE' or 'FALSE'}, "
f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf8'))
f.seek(-2, 1)
f.write(b';\n')
f.write(b"\nINSERT INTO AT_plz_dest VALUES\n")
for plz, dest, okz, _, _, _ in plz_dest_rows:
f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf8'))
f.seek(-2, 1)
f.write(b';\n')
@ -330,8 +362,10 @@ if __name__ == '__main__':
ov_data = parse_ov()
print('Downloading PLZ data from www.post.at')
plz_data = download_plz()
print('Downloading PLZ destination data from www.post.at')
plz_dest_data = download_plz_dest()
print('Downloading Katastralgemeindenverzeichnis from www.bev.gv.at')
kgv_data = download_kgv()
print('Generating 90.plz.sql')
write_sql(plz_data, kgv_data, ov_data)
write_sql(plz_data, plz_dest_data, kgv_data, ov_data)
print('Successfully created 90.plz.sql!')