From 0125e8ecee29b7e71b8b57fb5007fc2162722689 Mon Sep 17 00:00:00 2001 From: Lorenz Stechauner Date: Tue, 4 Apr 2023 00:22:36 +0200 Subject: [PATCH] Fix UTF-8 handling in python --- data/plz.py | 17 +++++++++-------- wgmaster/csv.py | 2 ++ wgmaster/export.py | 1 + wgmaster/import.py | 5 +++-- wgmaster/migrate.py | 1 + 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/data/plz.py b/data/plz.py index 15f2cce..46ebe73 100755 --- a/data/plz.py +++ b/data/plz.py @@ -1,4 +1,5 @@ #!/bin/env python3 +# -*- coding: utf-8 -*- from typing import List, Tuple, Callable import argparse @@ -155,7 +156,7 @@ def download_kgv() -> List[KgvRow]: if first: first = False continue - line = r_line.decode('utf8').rstrip() + line = r_line.decode('utf-8').rstrip() row = [c[1:-1] if c[0] == '"' else int(c) for c in line.split(';')] rows.append((int(row[0]), str(row[1]), int(row[3]), str(row[4]))) return rows @@ -185,7 +186,7 @@ def download_ov_land(bundesland: str) -> List[OvRow]: if not valid: continue - with open(f'out/{bundesland}/{page_num + 1:03}.txt', 'w+') as o: + with open(f'out/{bundesland}/{page_num + 1:03}.txt', 'w+', encoding='utf-8') as o: o.write(text) return rows @@ -208,7 +209,7 @@ def parse_ov() -> List[OvRow]: gkz = None last = None for page_name in sorted(os.listdir(f'out/{bundesland}')): - with open(f'out/{bundesland}/{page_name}', 'r') as f: + with open(f'out/{bundesland}/{page_name}', 'r', encoding='utf-8') as f: cont = False for line in f: line = line.rstrip() @@ -279,14 +280,14 @@ def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows: with open('90.plz.sql', 'wb') as f: f.write(b"\nINSERT INTO AT_gem VALUES\n") for gkz, (name, _, _) in gemeinden.items(): - f.write(f"({gkz:5}, '{name}'),\n".encode('utf8')) + f.write(f"({gkz:5}, '{name}'),\n".encode('utf-8')) f.seek(-2, 1) f.write(b';\n') f.write(b"\nINSERT INTO AT_kg VALUES\n") for kgnr, name, gkz, _ in kgv_rows: gemeinden[gkz][1].append(kgnr) - f.write(f"({kgnr:5}, {gkz:5}, '{name}'),\n".encode('utf8')) + f.write(f"({kgnr:5}, {gkz:5}, '{name}'),\n".encode('utf-8')) f.seek(-2, 1) f.write(b';\n') @@ -317,20 +318,20 @@ def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows: if n11 in n21 or n11 in n22 or n12 in n21 or n12 in n22 or n21 in n11 or n21 in n12 or n22 in n11 or n22 in n12: kgnr_o = kgnr - f.write(f"({okz:5}, {gkz:5}, {kgnr_o if kgnr_o is not None else 'NULL':>5}, '{name}'),\n".encode('utf8')) + f.write(f"({okz:5}, {gkz:5}, {kgnr_o if kgnr_o is not None else 'NULL':>5}, '{name}'),\n".encode('utf-8')) f.seek(-2, 1) f.write(b';\n') f.write(b"\nINSERT INTO AT_plz VALUES\n") for plz, ort, blnr, plz_type, internal, addr, po_box in plz_rows: f.write(f"({plz:4}, '{ort}', {blnr}, '{plz_type}', {internal and 'TRUE' or 'FALSE'}, " - f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf8')) + f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf-8')) f.seek(-2, 1) f.write(b';\n') f.write(b"\nINSERT INTO AT_plz_dest VALUES\n") for plz, dest, okz, _, _, _ in plz_dest_rows: - f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf8')) + f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf-8')) f.seek(-2, 1) f.write(b';\n') diff --git a/wgmaster/csv.py b/wgmaster/csv.py index d079fd9..28f2cd7 100644 --- a/wgmaster/csv.py +++ b/wgmaster/csv.py @@ -1,3 +1,5 @@ +#!/bin/env python3 +# -*- coding: utf-8 -*- from typing import Iterator, Dict, Any, Optional, Tuple import re diff --git a/wgmaster/export.py b/wgmaster/export.py index d8bb0e1..9146423 100644 --- a/wgmaster/export.py +++ b/wgmaster/export.py @@ -1,3 +1,4 @@ +#!/bin/env python3 # -*- coding: utf-8 -*- from typing import Any diff --git a/wgmaster/import.py b/wgmaster/import.py index 017c39a..0298f63 100755 --- a/wgmaster/import.py +++ b/wgmaster/import.py @@ -1,4 +1,5 @@ #!/bin/env python3 +# -*- coding: utf-8 -*- from typing import List, Optional import argparse @@ -74,8 +75,8 @@ if __name__ == '__main__': except FileNotFoundError: pass - sqlite3.register_adapter(datetime.date, lambda d: str(d)) - sqlite3.register_adapter(datetime.time, lambda t: str(t)) + sqlite3.register_adapter(datetime.date, lambda d: d.strftime('%Y-%m-%d')) + sqlite3.register_adapter(datetime.time, lambda t: t.strftime('%H:%M:%S')) DB_CNX = sqlite3.connect(args.db) DB_CNX.create_function('REGEXP', 2, sqlite_regexp) diff --git a/wgmaster/migrate.py b/wgmaster/migrate.py index 5278b60..e270221 100755 --- a/wgmaster/migrate.py +++ b/wgmaster/migrate.py @@ -1,4 +1,5 @@ #!/bin/env python3 +# -*- coding: utf-8 -*- from typing import Dict, Any, Tuple, Optional, List, Iterable import argparse