diff --git a/data/plz.py b/data/plz.py index eca62a1..e1688c9 100755 --- a/data/plz.py +++ b/data/plz.py @@ -5,7 +5,7 @@ from typing import List, Tuple, Callable import argparse import requests import re -import xlrd +import openpyxl import tempfile import os import zipfile @@ -92,7 +92,7 @@ def get_plz_url(button: re.Pattern) -> str: def download_excel(url: str, transform: Callable[[List[str]], Tuple]) -> List: f_name = None try: - f = tempfile.NamedTemporaryFile(delete=False) + f = tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) with requests.get(url, stream=True, headers={'User-Agent': 'Mozilla/5.0'}) as r: if r.status_code != 200: raise RuntimeError(f'Unexpected response: {r.status_code} {r.reason}') @@ -102,11 +102,14 @@ def download_excel(url: str, transform: Callable[[List[str]], Tuple]) -> List: f.close() rows = [] - wb = xlrd.open_workbook(f_name) - sheet = wb.sheet_by_index(0) - for r in range(1, sheet.nrows): - row = sheet.row_values(r) - rows.append(transform(row)) + wb = openpyxl.open(f_name) + sheet = wb.worksheets[0] + first = True + for row in sheet.rows: + if first: + first = False + continue + rows.append(transform([r.value for r in row])) return rows finally: if f_name is not None: