#!/bin/env python3 # -*- coding: utf-8 -*- from typing import Iterator, Dict, Any, Optional, Tuple import re import datetime def parse(filename: str) -> Iterator[Dict[str, Any]]: def parse_line(line_str: str) -> Iterator[str]: w = None s = False for ch in line_str: if w is None: if ch == ';': yield '' continue elif ch in (' ', '\t'): continue w = ch s = ch == '"' continue elif not s and ch in (';', '\n'): yield w.strip() w = None continue elif s and ch == '"': s = False w += ch if w is not None: yield w.strip() with open(filename, 'r', encoding='utf-8') as f: header: Optional[Tuple[str]] = None for line in f: if header is None: header = tuple([e.strip() for e in line.strip().split(';')]) continue obj = {} for i, part in enumerate(parse_line(line)): if part == '': part = None elif part[0] == '"' and part[-1] == '"': part = part[1:-1] elif part == 'T': part = True elif part == 'F': part = False elif re.fullmatch(r'-?[0-9]+', part): part = int(part) elif re.fullmatch(r'-?[0-9]+\.[0-9]+', part): part = float(part) elif len(part) == 10 and part[4] == '-' and part[7] == '-': part = datetime.datetime.strptime(part, '%Y-%m-%d').date() elif len(part) == 8 and part[2] == ':' and part[5] == ':': part = datetime.time.fromisoformat(part) else: raise RuntimeError(f'unable to infer type of value "{part}"') obj[header[i]] = part yield obj def format_row(*values) -> str: row = '' for val in values: if val is None: pass elif type(val) == str: row += f'"{val}"' elif type(val) == bool: row += 'T' if val else 'F' else: row += str(val) row += ';' return f'{row[:-1]}\n'