Fix UTF-8 handling in python
This commit is contained in:
17
data/plz.py
17
data/plz.py
@ -1,4 +1,5 @@
|
||||
#!/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from typing import List, Tuple, Callable
|
||||
import argparse
|
||||
@ -155,7 +156,7 @@ def download_kgv() -> List[KgvRow]:
|
||||
if first:
|
||||
first = False
|
||||
continue
|
||||
line = r_line.decode('utf8').rstrip()
|
||||
line = r_line.decode('utf-8').rstrip()
|
||||
row = [c[1:-1] if c[0] == '"' else int(c) for c in line.split(';')]
|
||||
rows.append((int(row[0]), str(row[1]), int(row[3]), str(row[4])))
|
||||
return rows
|
||||
@ -185,7 +186,7 @@ def download_ov_land(bundesland: str) -> List[OvRow]:
|
||||
if not valid:
|
||||
continue
|
||||
|
||||
with open(f'out/{bundesland}/{page_num + 1:03}.txt', 'w+') as o:
|
||||
with open(f'out/{bundesland}/{page_num + 1:03}.txt', 'w+', encoding='utf-8') as o:
|
||||
o.write(text)
|
||||
|
||||
return rows
|
||||
@ -208,7 +209,7 @@ def parse_ov() -> List[OvRow]:
|
||||
gkz = None
|
||||
last = None
|
||||
for page_name in sorted(os.listdir(f'out/{bundesland}')):
|
||||
with open(f'out/{bundesland}/{page_name}', 'r') as f:
|
||||
with open(f'out/{bundesland}/{page_name}', 'r', encoding='utf-8') as f:
|
||||
cont = False
|
||||
for line in f:
|
||||
line = line.rstrip()
|
||||
@ -279,14 +280,14 @@ def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows:
|
||||
with open('90.plz.sql', 'wb') as f:
|
||||
f.write(b"\nINSERT INTO AT_gem VALUES\n")
|
||||
for gkz, (name, _, _) in gemeinden.items():
|
||||
f.write(f"({gkz:5}, '{name}'),\n".encode('utf8'))
|
||||
f.write(f"({gkz:5}, '{name}'),\n".encode('utf-8'))
|
||||
f.seek(-2, 1)
|
||||
f.write(b';\n')
|
||||
|
||||
f.write(b"\nINSERT INTO AT_kg VALUES\n")
|
||||
for kgnr, name, gkz, _ in kgv_rows:
|
||||
gemeinden[gkz][1].append(kgnr)
|
||||
f.write(f"({kgnr:5}, {gkz:5}, '{name}'),\n".encode('utf8'))
|
||||
f.write(f"({kgnr:5}, {gkz:5}, '{name}'),\n".encode('utf-8'))
|
||||
f.seek(-2, 1)
|
||||
f.write(b';\n')
|
||||
|
||||
@ -317,20 +318,20 @@ def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows:
|
||||
if n11 in n21 or n11 in n22 or n12 in n21 or n12 in n22 or n21 in n11 or n21 in n12 or n22 in n11 or n22 in n12:
|
||||
kgnr_o = kgnr
|
||||
|
||||
f.write(f"({okz:5}, {gkz:5}, {kgnr_o if kgnr_o is not None else 'NULL':>5}, '{name}'),\n".encode('utf8'))
|
||||
f.write(f"({okz:5}, {gkz:5}, {kgnr_o if kgnr_o is not None else 'NULL':>5}, '{name}'),\n".encode('utf-8'))
|
||||
f.seek(-2, 1)
|
||||
f.write(b';\n')
|
||||
|
||||
f.write(b"\nINSERT INTO AT_plz VALUES\n")
|
||||
for plz, ort, blnr, plz_type, internal, addr, po_box in plz_rows:
|
||||
f.write(f"({plz:4}, '{ort}', {blnr}, '{plz_type}', {internal and 'TRUE' or 'FALSE'}, "
|
||||
f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf8'))
|
||||
f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf-8'))
|
||||
f.seek(-2, 1)
|
||||
f.write(b';\n')
|
||||
|
||||
f.write(b"\nINSERT INTO AT_plz_dest VALUES\n")
|
||||
for plz, dest, okz, _, _, _ in plz_dest_rows:
|
||||
f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf8'))
|
||||
f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf-8'))
|
||||
f.seek(-2, 1)
|
||||
f.write(b';\n')
|
||||
|
||||
|
Reference in New Issue
Block a user