#!/usr/bin/env python3 # -*- coding: utf-8 -*- from typing import TypeAlias, Optional import argparse import re import datetime import xlrd ExcelRow: TypeAlias = tuple[str, str, int, int, str, float] ExcelTab: TypeAlias = tuple[str, int, str, str, datetime.date, str, list[ExcelRow]] AreaCom: TypeAlias = tuple[int, int, str, Optional[str], int, int, str, Optional[int], Optional[int], Optional[int], Optional[str]] FBNR: int = 0 def extract_table(sheet: xlrd.sheet.Sheet, base_row: int, base_col: int) -> Optional[ExcelTab]: try: mgnr = int(sheet.cell_value(base_row - 4, base_col + 1)) loc = sheet.cell_value(base_row - 6, base_col + 1) name = sheet.cell_value(base_row - 8, base_col + 1) except ValueError: print(f'sheet {sheet.name} skipped!') return None date = None bewirt = None rows: list[ExcelRow] = [] for row_nr in range(base_row + 1, sheet.nrows): row = sheet.row_slice(row_nr, base_col, base_col + 6) if row[0].value == 'Gesamtsumme' or (row[0].value == 'Übertrag' and row_nr - base_row > 2): val = sheet.cell_value(row_nr + 5, base_col + 3) d = re.sub(r'\. ', '.', val.split(',')[-1].strip()).replace(' ', '.').replace('..', '.') if d.endswith('204'): d = d.replace('204', '2014') if d.startswith('…'): d = d.replace('…', '1.1') if d != '': date = datetime.datetime.strptime(d, '%d.%m.%Y').date() else: date = datetime.date(2023, 1, 1) try: bewirt = sheet.cell_value(row_nr + 20, base_col) except: bewirt = '' break elif row[0].value == 'Übertrag': continue rows.append(( str(row[0].value) if row[0].value != '' else None, str(row[1].value if row[1].ctype == xlrd.XL_CELL_TEXT else re.sub(r'\.0$', '', str(row[1].value))) if row[1].value != '' else None, int(row[2].value) if row[2].value != '' else None, int(row[3].value) if row[3].value != '' else None, str(row[4].value) if row[4].value != '' else None, round(float(row[5].value), 2) if row[5].value != '' else None )) return sheet.name, mgnr, name, loc, date, bewirt, rows def parse_part(tabs: list[ExcelTab]) -> list[AreaCom]: global FBNR area_coms = [] last_gst, last_rd = None, None first = True for tab in tabs: mgnr = tab[1] if not first: print('-' * 148) first = False for row in tab[-1]: if row in ((None, None, None, None, None, None), (None, None, None, None, None, 0.0), (None, None, None, 0, None, 0.0), (None, None, 0, 0, None, 0.0)): continue if row[2] == '': last_rd = row[0] or last_rd last_gst = str(row[1]) rd = row[0] or last_rd if 'äüßere' in rd: rd = rd.replace('äüßere', 'Äußere') gst = row[1] if last_gst: gst = last_gst + ', ' + gst last_gst = None if gst is not None: gst = re.sub(r'\s+', ' ', gst.replace(' /', '/').replace('/ ', '/').replace(',', ', ')).strip() total = row[2] geb = row[3] sortid = row[4] kg = row[5] if kg is not None: kg = round(kg, 2) perc = round(kg / geb, 2) if geb != 0 and geb is not None else None print(f'{mgnr:4} {rd:24} {gst or "":64} {total or "":8} {geb or "":8} {sortid or "":5} {kg or "":8} {perc or "-":3} {'KIP' if 'KIP' in tab[5] else tab[5]}') last_rd = rd sortid = sortid.strip() if sortid is not None else None if sortid is None: continue elif sortid == 'MTH': sortid = 'MT' # Müller Thurgau elif sortid == 'GMU': sortid = 'GL' # Goldmuskateller? elif ',' in sortid: sortid = sortid[:2] gsts = re.split(r' ', gst) if gst is not None else [] kgnr = 0 try: if int(gsts[0]) in (6128, 6102, 6110): kgnr = int(gsts[0]) gst = ' '.join(gsts[1:]).strip(',').strip() except ValueError: pass except IndexError: pass FBNR += 1 # TODO kgnr, rdnr area_coms.append((FBNR, mgnr, sortid, 'KIP' if 'KIP' in tab[5] else None, geb, kgnr, gst, None, tab[4].year, None, None)) print('=' * 148) return area_coms def main() -> None: parser = argparse.ArgumentParser() parser.add_argument('file') args = parser.parse_args() area_coms = [] wb = xlrd.open_workbook(args.file) for name in wb.sheet_names(): if name == 'Vorlage': continue sheet = wb.sheet_by_name(name) tabs = [] for row, cell_1 in enumerate(sheet.col(0)): if cell_1.value == 'Ried': for col, cell_2 in enumerate(sheet.row(row)): if cell_2.value == 'Ried': tab = extract_table(sheet, 12, col) if tab: tabs.append(tab) area_coms += parse_part(tabs) wb.unload_sheet(name) for a in area_coms: print(f'{a[0]:4} {a[1]:4} {a[2]:2} {a[3] or "":3} {a[4] or "":6} {a[5]:05} {a[6] or "":40} {a[7] or "":2} {a[8]:4} {a[9] or "":4} {a[10]}') if __name__ == '__main__': main()