elwig-misc/tools/wgw-fb-old.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from typing import TypeAlias, Optional
import argparse
import re
import datetime
import xlrd


ExcelRow: TypeAlias = tuple[str, str, int, int, str, float]
ExcelTab: TypeAlias = tuple[str, int, str, str, datetime.date, str, list[ExcelRow]]
AreaCom: TypeAlias = tuple[int, int, str, Optional[str], int, int, str, Optional[int], Optional[int], Optional[int], Optional[str]]


FBNR: int = 0


def extract_table(sheet: xlrd.sheet.Sheet, base_row: int, base_col: int) -> Optional[ExcelTab]:
    try:
        mgnr = int(sheet.cell_value(base_row - 4, base_col + 1))
        loc = sheet.cell_value(base_row - 6, base_col + 1)
        name = sheet.cell_value(base_row - 8, base_col + 1)
    except ValueError:
        print(f'sheet {sheet.name} skipped!')
        return None
    date = None
    bewirt = None

    rows: list[ExcelRow] = []
    for row_nr in range(base_row + 1, sheet.nrows):
        row = sheet.row_slice(row_nr, base_col, base_col + 6)
        if row[0].value == 'Gesamtsumme' or (row[0].value == 'Übertrag' and row_nr - base_row > 2):
            val = sheet.cell_value(row_nr + 5, base_col + 3)
            d = re.sub(r'\. ', '.', val.split(',')[-1].strip()).replace(' ', '.').replace('..', '.')
            if d.endswith('204'):
                d = d.replace('204', '2014')
            if d.startswith('…'):
                d = d.replace('…', '1.1')
            if d != '':
                date = datetime.datetime.strptime(d, '%d.%m.%Y').date()
            else:
                date = datetime.date(2023, 1, 1)
            try:
                bewirt = sheet.cell_value(row_nr + 20, base_col)
            except:
                bewirt = ''
            break
        elif row[0].value == 'Übertrag':
            continue
        rows.append((
            str(row[0].value) if row[0].value != '' else None,
            str(row[1].value if row[1].ctype == xlrd.XL_CELL_TEXT else
                re.sub(r'\.0$', '', str(row[1].value))) if row[1].value != '' else None,
            int(row[2].value) if row[2].value != '' else None,
            int(row[3].value) if row[3].value != '' else None,
            str(row[4].value) if row[4].value != '' else None,
            round(float(row[5].value), 2) if row[5].value != '' else None
        ))
    return sheet.name, mgnr, name, loc, date, bewirt, rows


def parse_part(tabs: list[ExcelTab]) -> list[AreaCom]:
    global FBNR
    area_coms = []
    last_gst, last_rd = None, None
    first = True
    for tab in tabs:
        mgnr = tab[1]
        if not first:
            print('-' * 148)
        first = False
        for row in tab[-1]:
            if row in ((None, None, None, None, None, None),
                       (None, None, None, None, None, 0.0),
                       (None, None, None, 0, None, 0.0),
                       (None, None, 0, 0, None, 0.0)):
                continue
            if row[2] == '':
                last_rd = row[0] or last_rd
                last_gst = str(row[1])
            rd = row[0] or last_rd
            if 'äüßere' in rd:
                rd = rd.replace('äüßere', 'Äußere')
            gst = row[1]
            if last_gst:
                gst = last_gst + ', ' + gst
                last_gst = None
            if gst is not None:
                gst = re.sub(r'\s+', ' ', gst.replace(' /', '/').replace('/ ', '/').replace(',', ', ')).strip()
            total = row[2]
            geb = row[3]
            sortid = row[4]
            kg = row[5]
            if kg is not None:
                kg = round(kg, 2)
            perc = round(kg / geb, 2) if geb != 0 and geb is not None else None
            print(f'{mgnr:4} {rd:24} {gst or "":64} {total or "":8} {geb or "":8} {sortid or "":5} {kg or "":8} {perc or "-":3} {'KIP' if 'KIP' in tab[5] else tab[5]}')

            last_rd = rd
            sortid = sortid.strip() if sortid is not None else None
            if sortid is None:
                continue
            elif sortid == 'MTH':
                sortid = 'MT'  # Müller Thurgau
            elif sortid == 'GMU':
                sortid = 'GL'  # Goldmuskateller?
            elif ',' in sortid:
                sortid = sortid[:2]
            gsts = re.split(r' ', gst) if gst is not None else []
            kgnr = 0
            try:
                if int(gsts[0]) in (6128, 6102, 6110):
                    kgnr = int(gsts[0])
                    gst = ' '.join(gsts[1:]).strip(',').strip()
            except ValueError:
                pass
            except IndexError:
                pass
            FBNR += 1
            # TODO kgnr, rdnr
            area_coms.append((FBNR, mgnr, sortid, 'KIP' if 'KIP' in tab[5] else None, geb, kgnr, gst, None, tab[4].year, None, None))
    print('=' * 148)
    return area_coms


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument('file')
    args = parser.parse_args()

    area_coms = []
    wb = xlrd.open_workbook(args.file)
    for name in wb.sheet_names():
        if name == 'Vorlage':
            continue
        sheet = wb.sheet_by_name(name)
        tabs = []
        for row, cell_1 in enumerate(sheet.col(0)):
            if cell_1.value == 'Ried':
                for col, cell_2 in enumerate(sheet.row(row)):
                    if cell_2.value == 'Ried':
                        tab = extract_table(sheet, 12, col)
                        if tab:
                            tabs.append(tab)
        area_coms += parse_part(tabs)
        wb.unload_sheet(name)
    for a in area_coms:
        print(f'{a[0]:4} {a[1]:4} {a[2]:2} {a[3] or "":3} {a[4] or "":6} {a[5]:05} {a[6] or "":40} {a[7] or "":2} {a[8]:4} {a[9] or "":4} {a[10]}')


if __name__ == '__main__':
    main()