Fix UTF-8 handling in python
This commit is contained in:
17
data/plz.py
17
data/plz.py
@ -1,4 +1,5 @@
|
|||||||
#!/bin/env python3
|
#!/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from typing import List, Tuple, Callable
|
from typing import List, Tuple, Callable
|
||||||
import argparse
|
import argparse
|
||||||
@ -155,7 +156,7 @@ def download_kgv() -> List[KgvRow]:
|
|||||||
if first:
|
if first:
|
||||||
first = False
|
first = False
|
||||||
continue
|
continue
|
||||||
line = r_line.decode('utf8').rstrip()
|
line = r_line.decode('utf-8').rstrip()
|
||||||
row = [c[1:-1] if c[0] == '"' else int(c) for c in line.split(';')]
|
row = [c[1:-1] if c[0] == '"' else int(c) for c in line.split(';')]
|
||||||
rows.append((int(row[0]), str(row[1]), int(row[3]), str(row[4])))
|
rows.append((int(row[0]), str(row[1]), int(row[3]), str(row[4])))
|
||||||
return rows
|
return rows
|
||||||
@ -185,7 +186,7 @@ def download_ov_land(bundesland: str) -> List[OvRow]:
|
|||||||
if not valid:
|
if not valid:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
with open(f'out/{bundesland}/{page_num + 1:03}.txt', 'w+') as o:
|
with open(f'out/{bundesland}/{page_num + 1:03}.txt', 'w+', encoding='utf-8') as o:
|
||||||
o.write(text)
|
o.write(text)
|
||||||
|
|
||||||
return rows
|
return rows
|
||||||
@ -208,7 +209,7 @@ def parse_ov() -> List[OvRow]:
|
|||||||
gkz = None
|
gkz = None
|
||||||
last = None
|
last = None
|
||||||
for page_name in sorted(os.listdir(f'out/{bundesland}')):
|
for page_name in sorted(os.listdir(f'out/{bundesland}')):
|
||||||
with open(f'out/{bundesland}/{page_name}', 'r') as f:
|
with open(f'out/{bundesland}/{page_name}', 'r', encoding='utf-8') as f:
|
||||||
cont = False
|
cont = False
|
||||||
for line in f:
|
for line in f:
|
||||||
line = line.rstrip()
|
line = line.rstrip()
|
||||||
@ -279,14 +280,14 @@ def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows:
|
|||||||
with open('90.plz.sql', 'wb') as f:
|
with open('90.plz.sql', 'wb') as f:
|
||||||
f.write(b"\nINSERT INTO AT_gem VALUES\n")
|
f.write(b"\nINSERT INTO AT_gem VALUES\n")
|
||||||
for gkz, (name, _, _) in gemeinden.items():
|
for gkz, (name, _, _) in gemeinden.items():
|
||||||
f.write(f"({gkz:5}, '{name}'),\n".encode('utf8'))
|
f.write(f"({gkz:5}, '{name}'),\n".encode('utf-8'))
|
||||||
f.seek(-2, 1)
|
f.seek(-2, 1)
|
||||||
f.write(b';\n')
|
f.write(b';\n')
|
||||||
|
|
||||||
f.write(b"\nINSERT INTO AT_kg VALUES\n")
|
f.write(b"\nINSERT INTO AT_kg VALUES\n")
|
||||||
for kgnr, name, gkz, _ in kgv_rows:
|
for kgnr, name, gkz, _ in kgv_rows:
|
||||||
gemeinden[gkz][1].append(kgnr)
|
gemeinden[gkz][1].append(kgnr)
|
||||||
f.write(f"({kgnr:5}, {gkz:5}, '{name}'),\n".encode('utf8'))
|
f.write(f"({kgnr:5}, {gkz:5}, '{name}'),\n".encode('utf-8'))
|
||||||
f.seek(-2, 1)
|
f.seek(-2, 1)
|
||||||
f.write(b';\n')
|
f.write(b';\n')
|
||||||
|
|
||||||
@ -317,20 +318,20 @@ def write_sql(plz_rows: List[PlzRow], plz_dest_rows: List[PlzDestRow], kgv_rows:
|
|||||||
if n11 in n21 or n11 in n22 or n12 in n21 or n12 in n22 or n21 in n11 or n21 in n12 or n22 in n11 or n22 in n12:
|
if n11 in n21 or n11 in n22 or n12 in n21 or n12 in n22 or n21 in n11 or n21 in n12 or n22 in n11 or n22 in n12:
|
||||||
kgnr_o = kgnr
|
kgnr_o = kgnr
|
||||||
|
|
||||||
f.write(f"({okz:5}, {gkz:5}, {kgnr_o if kgnr_o is not None else 'NULL':>5}, '{name}'),\n".encode('utf8'))
|
f.write(f"({okz:5}, {gkz:5}, {kgnr_o if kgnr_o is not None else 'NULL':>5}, '{name}'),\n".encode('utf-8'))
|
||||||
f.seek(-2, 1)
|
f.seek(-2, 1)
|
||||||
f.write(b';\n')
|
f.write(b';\n')
|
||||||
|
|
||||||
f.write(b"\nINSERT INTO AT_plz VALUES\n")
|
f.write(b"\nINSERT INTO AT_plz VALUES\n")
|
||||||
for plz, ort, blnr, plz_type, internal, addr, po_box in plz_rows:
|
for plz, ort, blnr, plz_type, internal, addr, po_box in plz_rows:
|
||||||
f.write(f"({plz:4}, '{ort}', {blnr}, '{plz_type}', {internal and 'TRUE' or 'FALSE'}, "
|
f.write(f"({plz:4}, '{ort}', {blnr}, '{plz_type}', {internal and 'TRUE' or 'FALSE'}, "
|
||||||
f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf8'))
|
f"{addr and 'TRUE' or 'FALSE'}, {po_box and 'TRUE' or 'FALSE'}),\n".encode('utf-8'))
|
||||||
f.seek(-2, 1)
|
f.seek(-2, 1)
|
||||||
f.write(b';\n')
|
f.write(b';\n')
|
||||||
|
|
||||||
f.write(b"\nINSERT INTO AT_plz_dest VALUES\n")
|
f.write(b"\nINSERT INTO AT_plz_dest VALUES\n")
|
||||||
for plz, dest, okz, _, _, _ in plz_dest_rows:
|
for plz, dest, okz, _, _, _ in plz_dest_rows:
|
||||||
f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf8'))
|
f.write(f"({plz:4}, {okz:5}, '{dest}'),\n".encode('utf-8'))
|
||||||
f.seek(-2, 1)
|
f.seek(-2, 1)
|
||||||
f.write(b';\n')
|
f.write(b';\n')
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
#!/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from typing import Iterator, Dict, Any, Optional, Tuple
|
from typing import Iterator, Dict, Any, Optional, Tuple
|
||||||
import re
|
import re
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#!/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#!/bin/env python3
|
#!/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
import argparse
|
import argparse
|
||||||
@ -74,8 +75,8 @@ if __name__ == '__main__':
|
|||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
sqlite3.register_adapter(datetime.date, lambda d: str(d))
|
sqlite3.register_adapter(datetime.date, lambda d: d.strftime('%Y-%m-%d'))
|
||||||
sqlite3.register_adapter(datetime.time, lambda t: str(t))
|
sqlite3.register_adapter(datetime.time, lambda t: t.strftime('%H:%M:%S'))
|
||||||
|
|
||||||
DB_CNX = sqlite3.connect(args.db)
|
DB_CNX = sqlite3.connect(args.db)
|
||||||
DB_CNX.create_function('REGEXP', 2, sqlite_regexp)
|
DB_CNX.create_function('REGEXP', 2, sqlite_regexp)
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#!/bin/env python3
|
#!/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from typing import Dict, Any, Tuple, Optional, List, Iterable
|
from typing import Dict, Any, Tuple, Optional, List, Iterable
|
||||||
import argparse
|
import argparse
|
||||||
|
Reference in New Issue
Block a user