Fix UTF-8 handling in python

This commit is contained in:
2023-04-04 00:22:36 +02:00
parent 1defd4259f
commit 0125e8ecee
5 changed files with 16 additions and 10 deletions

View File

@ -1,4 +1,5 @@
#!/bin/env python3
# -*- coding: utf-8 -*-
from typing import List, Tuple, Callable
import argparse
@ -155,7 +156,7 @@ def download_kgv() -> List[KgvRow]:
if first:
first = False
continue
line = r_line.decode('utf8').rstrip()
line = r_line.decode('utf-8').rstrip()
row = [c[1:-1] if c[0] == '"' else int(c) for c in line.split(';')]
rows.append((int(row[0]), str(row[1]), int(row[3]), str(row[4])))
return rows
@ -185,7 +186,7 @@ def download_ov_land(bundesland: str) -> List[OvRow]:
if not valid:
continue
with open(f'out/{bundesland}/{page_num + 1:03}.txt', 'w+') as o:
with open(f'out/{bundesland}/{page_num + 1:03}.txt', 'w+', encoding='utf-8') as o:
o.write(text)
return rows
@ -208,7 +209,7 @@ def parse_ov() -> List[OvRow]:
gkz = None
last = None
for page_name in sorted(os.listdir(f'out/{bundesland}')):
with open(f'out/{bundesland}/{page_name}', 'r') as f:
with open(f'out/{bundesland}/{page_name}', 'r', encoding='utf-8') as f:
cont = False
for line in f:
line = line.rstrip()
@ -279,14 +280,14 @@ def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows:
with open('90.plz.sql', 'wb') as f:
f.write(b"\nINSERT INTO AT_gem VALUES\n")
for gkz, (name, _, _) in gemeinden.items():
f.write(f"({gkz:5}, '{name}'),\n".encode('utf8'))
f.write(f"({gkz:5}, '{name}'),\n".encode('utf-8'))
f.seek(-2, 1)
f.write(b';\n')
f.write(b"\nINSERT INTO AT_kg VALUES\n")
for kgnr, name, gkz, _ in kgv_rows:
gemeinden[gkz][1].append(kgnr)
f.write(f"({kgnr:5}, {gkz:5}, '{name}'),\n".encode('utf8'))
f.write(f"({kgnr:5}, {gkz:5}, '{name}'),\n".encode('utf-8'))
f.seek(-2, 1)
f.write(b';\n')
@ -317,20 +318,20 @@ def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows:
if n11 in n21 or n11 in n22 or n12 in n21 or n12 in n22 or n21 in n11 or n21 in n12 or n22 in n11 or n22 in n12:
kgnr_o = kgnr
f.write(f"({okz:5}, {gkz:5}, {kgnr_o if kgnr_o is not None else 'NULL':>5}, '{name}'),\n".encode('utf8'))
f.write(f"({okz:5}, {gkz:5}, {kgnr_o if kgnr_o is not None else 'NULL':>5}, '{name}'),\n".encode('utf-8'))
f.seek(-2, 1)
f.write(b';\n')
f.write(b"\nINSERT INTO AT_plz VALUES\n")
for plz, ort, blnr, plz_type, internal, addr, po_box in plz_rows:
f.write(f"({plz:4}, '{ort}', {blnr}, '{plz_type}', {internal and 'TRUE' or 'FALSE'}, "
f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf8'))
f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf-8'))
f.seek(-2, 1)
f.write(b';\n')
f.write(b"\nINSERT INTO AT_plz_dest VALUES\n")
for plz, dest, okz, _, _, _ in plz_dest_rows:
f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf8'))
f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf-8'))
f.seek(-2, 1)
f.write(b';\n')