#!/bin/env python3 # -*- coding: utf-8 -*- from typing import Iterator, Dict, Any, Tuple import re import datetime RE_INT = re.compile(r'-?[0-9]+') RE_FLOAT = re.compile(r'-?[0-9]+\.[0-9]+') def cast_value(value: str) -> Any: if value == '': return None elif value[0] == '"' and value[-1] == '"': return value[1:-1] elif value == 'T': return True elif value == 'F': return False elif RE_INT.fullmatch(value): return int(value) elif RE_FLOAT.fullmatch(value): return float(value) elif len(value) == 10 and value[4] == '-' and value[7] == '-': return datetime.datetime.strptime(value, '%Y-%m-%d').date() elif len(value) == 8 and value[2] == ':' and value[5] == ':': return datetime.time.fromisoformat(value) else: raise RuntimeError(f'unable to infer type of value "{value}"') def convert_value(value: Any, table: str = None, column: str = None) -> str: if value is None: return '' if type(value) == str: return f'"{value}"' elif type(value) == bool: return 'T' if value else 'F' elif type(value) == datetime.datetime and table is not None and column is not None: if value.year == 1899 and value.month == 12 and value.day == 30: return value.strftime('%H:%M:%S') elif value.hour == 0 and value.minute == 0 and value.second == 0: return value.strftime('%Y-%m-%d') return str(value) def parse_line(line_str: str) -> Iterator[str]: w = None s = False for ch in line_str: if w is None: if ch == ';': yield '' continue elif ch in (' ', '\t'): continue w = ch s = ch == '"' continue elif not s and ch in (';', '\n'): yield w.strip() w = None continue elif s and ch == '"': s = False w += ch if w is not None: yield w.strip() def parse(filename: str) -> Iterator[Tuple]: with open(filename, 'r', encoding='utf-8') as f: lines = f.__iter__() yield tuple([part.strip() for part in next(lines).split(';')]) for line in lines: yield tuple([cast_value(part) for part in parse_line(line)]) def parse_dict(filename: str) -> Iterator[Dict[str, Any]]: rows = parse(filename) header = next(rows) for row in rows: yield {header[i]: part for i, part in enumerate(row)} def format_row(*values) -> str: return ';'.join([convert_value(v) for v in values]) + '\n'