Fix UTF-8 handling in python

This commit is contained in:
2023-04-04 00:22:36 +02:00
parent 1defd4259f
commit 0125e8ecee
5 changed files with 16 additions and 10 deletions

View File

@ -1,4 +1,5 @@
#!/bin/env python3 #!/bin/env python3
# -*- coding: utf-8 -*-
from typing import List, Tuple, Callable from typing import List, Tuple, Callable
import argparse import argparse
@ -155,7 +156,7 @@ def download_kgv() -> List[KgvRow]:
if first: if first:
first = False first = False
continue continue
line = r_line.decode('utf8').rstrip() line = r_line.decode('utf-8').rstrip()
row = [c[1:-1] if c[0] == '"' else int(c) for c in line.split(';')] row = [c[1:-1] if c[0] == '"' else int(c) for c in line.split(';')]
rows.append((int(row[0]), str(row[1]), int(row[3]), str(row[4]))) rows.append((int(row[0]), str(row[1]), int(row[3]), str(row[4])))
return rows return rows
@ -185,7 +186,7 @@ def download_ov_land(bundesland: str) -> List[OvRow]:
if not valid: if not valid:
continue continue
with open(f'out/{bundesland}/{page_num + 1:03}.txt', 'w+') as o: with open(f'out/{bundesland}/{page_num + 1:03}.txt', 'w+', encoding='utf-8') as o:
o.write(text) o.write(text)
return rows return rows
@ -208,7 +209,7 @@ def parse_ov() -> List[OvRow]:
gkz = None gkz = None
last = None last = None
for page_name in sorted(os.listdir(f'out/{bundesland}')): for page_name in sorted(os.listdir(f'out/{bundesland}')):
with open(f'out/{bundesland}/{page_name}', 'r') as f: with open(f'out/{bundesland}/{page_name}', 'r', encoding='utf-8') as f:
cont = False cont = False
for line in f: for line in f:
line = line.rstrip() line = line.rstrip()
@ -279,14 +280,14 @@ def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows:
with open('90.plz.sql', 'wb') as f: with open('90.plz.sql', 'wb') as f:
f.write(b"\nINSERT INTO AT_gem VALUES\n") f.write(b"\nINSERT INTO AT_gem VALUES\n")
for gkz, (name, _, _) in gemeinden.items(): for gkz, (name, _, _) in gemeinden.items():
f.write(f"({gkz:5}, '{name}'),\n".encode('utf8')) f.write(f"({gkz:5}, '{name}'),\n".encode('utf-8'))
f.seek(-2, 1) f.seek(-2, 1)
f.write(b';\n') f.write(b';\n')
f.write(b"\nINSERT INTO AT_kg VALUES\n") f.write(b"\nINSERT INTO AT_kg VALUES\n")
for kgnr, name, gkz, _ in kgv_rows: for kgnr, name, gkz, _ in kgv_rows:
gemeinden[gkz][1].append(kgnr) gemeinden[gkz][1].append(kgnr)
f.write(f"({kgnr:5}, {gkz:5}, '{name}'),\n".encode('utf8')) f.write(f"({kgnr:5}, {gkz:5}, '{name}'),\n".encode('utf-8'))
f.seek(-2, 1) f.seek(-2, 1)
f.write(b';\n') f.write(b';\n')
@ -317,20 +318,20 @@ def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows:
if n11 in n21 or n11 in n22 or n12 in n21 or n12 in n22 or n21 in n11 or n21 in n12 or n22 in n11 or n22 in n12: if n11 in n21 or n11 in n22 or n12 in n21 or n12 in n22 or n21 in n11 or n21 in n12 or n22 in n11 or n22 in n12:
kgnr_o = kgnr kgnr_o = kgnr
f.write(f"({okz:5}, {gkz:5}, {kgnr_o if kgnr_o is not None else 'NULL':>5}, '{name}'),\n".encode('utf8')) f.write(f"({okz:5}, {gkz:5}, {kgnr_o if kgnr_o is not None else 'NULL':>5}, '{name}'),\n".encode('utf-8'))
f.seek(-2, 1) f.seek(-2, 1)
f.write(b';\n') f.write(b';\n')
f.write(b"\nINSERT INTO AT_plz VALUES\n") f.write(b"\nINSERT INTO AT_plz VALUES\n")
for plz, ort, blnr, plz_type, internal, addr, po_box in plz_rows: for plz, ort, blnr, plz_type, internal, addr, po_box in plz_rows:
f.write(f"({plz:4}, '{ort}', {blnr}, '{plz_type}', {internal and 'TRUE' or 'FALSE'}, " f.write(f"({plz:4}, '{ort}', {blnr}, '{plz_type}', {internal and 'TRUE' or 'FALSE'}, "
f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf8')) f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf-8'))
f.seek(-2, 1) f.seek(-2, 1)
f.write(b';\n') f.write(b';\n')
f.write(b"\nINSERT INTO AT_plz_dest VALUES\n") f.write(b"\nINSERT INTO AT_plz_dest VALUES\n")
for plz, dest, okz, _, _, _ in plz_dest_rows: for plz, dest, okz, _, _, _ in plz_dest_rows:
f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf8')) f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf-8'))
f.seek(-2, 1) f.seek(-2, 1)
f.write(b';\n') f.write(b';\n')

View File

@ -1,3 +1,5 @@
#!/bin/env python3
# -*- coding: utf-8 -*-
from typing import Iterator, Dict, Any, Optional, Tuple from typing import Iterator, Dict, Any, Optional, Tuple
import re import re

View File

@ -1,3 +1,4 @@
#!/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from typing import Any from typing import Any

View File

@ -1,4 +1,5 @@
#!/bin/env python3 #!/bin/env python3
# -*- coding: utf-8 -*-
from typing import List, Optional from typing import List, Optional
import argparse import argparse
@ -74,8 +75,8 @@ if __name__ == '__main__':
except FileNotFoundError: except FileNotFoundError:
pass pass
sqlite3.register_adapter(datetime.date, lambda d: str(d)) sqlite3.register_adapter(datetime.date, lambda d: d.strftime('%Y-%m-%d'))
sqlite3.register_adapter(datetime.time, lambda t: str(t)) sqlite3.register_adapter(datetime.time, lambda t: t.strftime('%H:%M:%S'))
DB_CNX = sqlite3.connect(args.db) DB_CNX = sqlite3.connect(args.db)
DB_CNX.create_function('REGEXP', 2, sqlite_regexp) DB_CNX.create_function('REGEXP', 2, sqlite_regexp)

View File

@ -1,4 +1,5 @@
#!/bin/env python3 #!/bin/env python3
# -*- coding: utf-8 -*-
from typing import Dict, Any, Tuple, Optional, List, Iterable from typing import Dict, Any, Tuple, Optional, List, Iterable
import argparse import argparse