#!/bin/env python3 # -*- coding: utf-8 -*- from typing import Iterator, Dict, Any, Tuple import re import datetime RE_INT = re.compile(r'-?[0-9]+') RE_FLOAT = re.compile(r'-?[0-9]+\.[0-9]+') def cast_value(value: str) -> Any: if value == '': return None elif value[0] == '"' and value[-1] == '"': return value[1:-1] elif value == 'T': return True elif value == 'F': return False elif RE_INT.fullmatch(value): return int(value) elif RE_FLOAT.fullmatch(value): return float(value) elif len(value) == 10 and value[4] == '-' and value[7] == '-': return datetime.datetime.strptime(value, '%Y-%m-%d').date() elif len(value) == 8 and value[2] == ':' and value[5] == ':': return datetime.time.fromisoformat(value) else: raise RuntimeError(f'unable to infer type of value "{value}"') def parse_line(line_str: str) -> Iterator[str]: w = None s = False for ch in line_str: if w is None: if ch == ';': yield '' continue elif ch in (' ', '\t'): continue w = ch s = ch == '"' continue elif not s and ch in (';', '\n'): yield w.strip() w = None continue elif s and ch == '"': s = False w += ch if w is not None: yield w.strip() def parse(filename: str) -> Iterator[Tuple]: with open(filename, 'r', encoding='utf-8') as f: lines = f.__iter__() yield tuple([part.strip() for part in next(lines).split(';')]) for line in lines: yield tuple([cast_value(part) for part in parse_line(line)]) def parse_dict(filename: str) -> Iterator[Dict[str, Any]]: rows = parse(filename) header = next(rows) for row in rows: yield {header[i]: part for i, part in enumerate(row)} def format_row(*values) -> str: row = '' for val in values: if val is None: pass elif type(val) == str: row += f'"{val}"' elif type(val) == bool: row += 'T' if val else 'F' else: row += str(val) row += ';' return f'{row[:-1]}\n'