93 lines
3.3 KiB
Python
Executable File
93 lines
3.3 KiB
Python
Executable File
#!/bin/env python3
|
|
|
|
import re
|
|
import argparse
|
|
import requests
|
|
import html
|
|
import json
|
|
import urllib.parse
|
|
|
|
|
|
BASE_URL = 'https://www.bioqs.at'
|
|
URL = f'{BASE_URL}/ACM/faces/form/cms/portal/index.jsp'
|
|
ACTION_RE = re.compile(r'action="([^"]*)"')
|
|
HIDDEN_RE = re.compile(r'<input type="hidden" name="([^"]*)" .*?value="([^"]*)"')
|
|
|
|
ROW_RE = re.compile(r'<tr[^>]*>\s*(.*?)\s*</tr>', re.DOTALL)
|
|
UNCOLLAPSED_ROW_RE = re.compile(r'<tr style="">(\s*<td>\s*(.*?)\s*</td>\s*){7}</tr>', re.DOTALL)
|
|
COLLAPSED_ROW_RE = re.compile(r'<table width=[^>]*>\s*(.*?)\s*</table>', re.DOTALL)
|
|
TD_RE = re.compile(r'<td[^>]*>\s*(.*?)\s*</td>', re.DOTALL)
|
|
TAG_RE = re.compile(r'<[^>]*>')
|
|
SPACE_RE = re.compile(r'\s+')
|
|
ATTACHMENT_RE = re.compile(r"\[\['cert_attachment_sid','([^']*)'\]\]")
|
|
|
|
|
|
def remove_tags(text: str) -> str:
|
|
return SPACE_RE.sub(' ', html.unescape(TAG_RE.sub(' ', text))).strip()
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('query', type=str)
|
|
args = parser.parse_args()
|
|
query = {'PartnerCertSearchForm:pcs_' + q.split('=', 1)[0]: urllib.parse.unquote(q.split('=', 1)[-1]) for q in args.query.split('&')}
|
|
|
|
s = requests.Session()
|
|
r = s.get(f'{URL}?menu_sid=5002')
|
|
uri = ACTION_RE.findall(r.text)[0]
|
|
hidden = {m[1]: m[2] for m in HIDDEN_RE.finditer(r.text)}
|
|
|
|
r = s.post(f'{BASE_URL}{uri}', data={
|
|
**query,
|
|
'PartnerCertSearchForm:button_search': 'Suche starten...',
|
|
'PartnerCertSearchForm_SUBMIT': '1',
|
|
'javax.faces.ViewState': hidden['javax.faces.ViewState'],
|
|
})
|
|
|
|
result_table = r.text[r.text.find('<table'):r.text.rfind('</table>') + 8]
|
|
uncollapsed_rows = [tuple(remove_tags(m[1])
|
|
for m in TD_RE.finditer(row[0]))
|
|
for row in UNCOLLAPSED_ROW_RE.finditer(result_table)]
|
|
collapsed_rows = [[tuple(remove_tags((ATTACHMENT_RE.search(m[1]) or m)[1]) for m in TD_RE.finditer(row[1]))
|
|
for row in ROW_RE.finditer(tbl[0])]
|
|
for tbl in COLLAPSED_ROW_RE.finditer(result_table)]
|
|
print('[')
|
|
first = True
|
|
for row, tbl in zip(uncollapsed_rows, collapsed_rows):
|
|
meta = {}
|
|
certificates = []
|
|
for srow in tbl:
|
|
if len(srow) == 1:
|
|
[k,v] = srow[0].split(':', 1)
|
|
meta[k.strip()] = v.strip()
|
|
continue
|
|
if len(srow) == 0:
|
|
continue
|
|
certificates.append({
|
|
'nr': srow[0],
|
|
'validFrom': '-'.join(reversed(srow[1].split('-'))),
|
|
'validTo': '-'.join(reversed(srow[2].split('-'))),
|
|
'type': srow[3],
|
|
'attachmentSid': srow[4],
|
|
'url': f'https://elwig.at/organic/external/bioqs/attachments/{urllib.parse.quote(srow[0])}',
|
|
})
|
|
if not first:
|
|
print(',', flush=True)
|
|
print(' ', json.dumps({
|
|
'idNr': row[0],
|
|
'lfbisNr': row[1] or None,
|
|
'name': row[2],
|
|
'postalCode': row[3],
|
|
'city': row[4],
|
|
'address': row[5],
|
|
'autorityName': meta['Kontrollstelle'],
|
|
'productGroups': meta['Bereiche'],
|
|
'certificates': certificates,
|
|
}, ensure_ascii=False), end='')
|
|
first = False
|
|
print('\n]')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|