From a44f8860862ebdbfcaa18cff7844d801d2e78bc1 Mon Sep 17 00:00:00 2001 From: Lorenz Stechauner Date: Tue, 15 Jul 2025 18:10:48 +0200 Subject: [PATCH] organic: Add exernal BioQS api --- www/organic/external/bioqs/.attachment.py | 54 +++++++++++++ www/organic/external/bioqs/.operators.py | 91 ++++++++++++++++++++++ www/organic/external/bioqs/attachments.php | 36 +++++++++ www/organic/external/bioqs/operators.php | 43 ++++++++++ 4 files changed, 224 insertions(+) create mode 100755 www/organic/external/bioqs/.attachment.py create mode 100755 www/organic/external/bioqs/.operators.py create mode 100644 www/organic/external/bioqs/attachments.php create mode 100644 www/organic/external/bioqs/operators.php diff --git a/www/organic/external/bioqs/.attachment.py b/www/organic/external/bioqs/.attachment.py new file mode 100755 index 0000000..6fb584a --- /dev/null +++ b/www/organic/external/bioqs/.attachment.py @@ -0,0 +1,54 @@ +#!/bin/env python3 + +import re +import argparse +import requests +import sys + + +BASE_URL = 'https://www.bioqs.at' +URL = f'{BASE_URL}/ACM/faces/form/cms/portal/index.jsp' +ACTION_RE = re.compile(r'action="([^"]*)"') +HIDDEN_RE = re.compile(r' None: + parser = argparse.ArgumentParser() + parser.add_argument('cert_nr', type=str) + args = parser.parse_args() + + s = requests.Session() + r = s.get(f'{URL}?menu_sid=5002') + uri = ACTION_RE.findall(r.text)[0] + hidden = {m[1]: m[2] for m in HIDDEN_RE.finditer(r.text)} + + r = s.post(f'{BASE_URL}{uri}', data={ + 'PartnerCertSearchForm:pcs_seqidall': args.cert_nr, + 'PartnerCertSearchForm:button_search': 'Suche starten...', + 'PartnerCertSearchForm_SUBMIT': '1', + 'javax.faces.ViewState': hidden['javax.faces.ViewState'], + }) + + p1 = r.text.find(f'>{args.cert_nr}<') + p2 = r.text.find('id="', p1) + p3 = r.text.find('"', p2 + 4) + if p1 == -1 or p2 == -1 or p3 == -1: + exit(1) + id = r.text[p2 + 4:p3] + + r = s.post(f'{BASE_URL}{uri}', data={ + 'PartnerCertSearchForm:_idcl': id, + 'PartnerCertSearchForm_SUBMIT': '1', + 'javax.faces.ViewState': hidden['javax.faces.ViewState'], + }) + + if 'Content-Disposition' in r.headers: + dispo = r.headers['Content-Disposition'] + if 'filename="' in dispo: + filename = dispo[dispo.find('filename="') + 10:dispo.rfind('"')] + print(filename, file=sys.stderr) + sys.stdout.buffer.write(r.content) + + +if __name__ == '__main__': + main() diff --git a/www/organic/external/bioqs/.operators.py b/www/organic/external/bioqs/.operators.py new file mode 100755 index 0000000..8481c7e --- /dev/null +++ b/www/organic/external/bioqs/.operators.py @@ -0,0 +1,91 @@ +#!/bin/env python3 + +import re +import argparse +import requests +import html +import json +import urllib.parse + + +BASE_URL = 'https://www.bioqs.at' +URL = f'{BASE_URL}/ACM/faces/form/cms/portal/index.jsp' +ACTION_RE = re.compile(r'action="([^"]*)"') +HIDDEN_RE = re.compile(r']*>\s*(.*?)\s*', re.DOTALL) +UNCOLLAPSED_ROW_RE = re.compile(r'(\s*\s*(.*?)\s*\s*){7}', re.DOTALL) +COLLAPSED_ROW_RE = re.compile(r']*>\s*(.*?)\s*
', re.DOTALL) +TD_RE = re.compile(r']*>\s*(.*?)\s*', re.DOTALL) +TAG_RE = re.compile(r'<[^>]*>') +SPACE_RE = re.compile(r'\s+') +ATTACHMENT_RE = re.compile(r"\[\['cert_attachment_sid','([^']*)'\]\]") + + +def remove_tags(text: str) -> str: + return SPACE_RE.sub(' ', html.unescape(TAG_RE.sub(' ', text))).strip() + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument('query', type=str) + args = parser.parse_args() + query = {'PartnerCertSearchForm:pcs_' + q.split('=', 1)[0]: urllib.parse.unquote(q.split('=', 1)[-1]) for q in args.query.split('&')} + + s = requests.Session() + r = s.get(f'{URL}?menu_sid=5002') + uri = ACTION_RE.findall(r.text)[0] + hidden = {m[1]: m[2] for m in HIDDEN_RE.finditer(r.text)} + + r = s.post(f'{BASE_URL}{uri}', data={ + **query, + 'PartnerCertSearchForm:button_search': 'Suche starten...', + 'PartnerCertSearchForm_SUBMIT': '1', + 'javax.faces.ViewState': hidden['javax.faces.ViewState'], + }) + + result_table = r.text[r.text.find('') + 8] + uncollapsed_rows = [tuple(remove_tags(m[1]) + for m in TD_RE.finditer(row[0])) + for row in UNCOLLAPSED_ROW_RE.finditer(result_table)] + collapsed_rows = [[tuple(remove_tags((ATTACHMENT_RE.search(m[1]) or m)[1]) for m in TD_RE.finditer(row[1])) + for row in ROW_RE.finditer(tbl[0])] + for tbl in COLLAPSED_ROW_RE.finditer(result_table)] + print('[') + first = True + for row, tbl in zip(uncollapsed_rows, collapsed_rows): + meta = {} + certificates = [] + for srow in tbl: + if len(srow) == 1: + [k,v] = srow[0].split(':', 1) + meta[k.strip()] = v.strip() + continue + if len(srow) == 0: + continue + certificates.append({ + 'nr': srow[0], + 'validFrom': '-'.join(reversed(srow[1].split('-'))), + 'validTo': '-'.join(reversed(srow[2].split('-'))), + 'type': srow[3], + 'attachmentSid': srow[4], + }) + if not first: + print(',', flush=True) + print(' ', json.dumps({ + 'idNr': row[0], + 'lfbisNr': row[1] or None, + 'name': row[2], + 'postalCode': row[3], + 'city': row[4], + 'address': row[5], + 'autorityName': meta['Kontrollstelle'], + 'productGroups': meta['Bereiche'], + 'certificates': certificates, + }, ensure_ascii=False), end='') + first = False + print('\n]') + + +if __name__ == '__main__': + main() diff --git a/www/organic/external/bioqs/attachments.php b/www/organic/external/bioqs/attachments.php new file mode 100644 index 0000000..7787455 --- /dev/null +++ b/www/organic/external/bioqs/attachments.php @@ -0,0 +1,36 @@ +&1 > $filename ")) === false) { + header('Status: 500'); + header('Content-Length: 0'); + exit; +} + +header('Content-Type: application/pdf'); +header('Content-Length: ' . filesize($filename)); +header('Content-Disposition: inline; filename="' . $pdfName . '"'); +readfile($filename); diff --git a/www/organic/external/bioqs/operators.php b/www/organic/external/bioqs/operators.php new file mode 100644 index 0000000..7d79cb0 --- /dev/null +++ b/www/organic/external/bioqs/operators.php @@ -0,0 +1,43 @@ + '1', + 'DE' => '2', + ][$_GET['country']] ?? ''; +} +if (isset($_GET['postalCode'])) { + $query[] = 'zipcode=' . urlencode($_GET['postalCode']); +} +if (isset($_GET['name'])) { + $query[] = 'aname=' . urlencode($_GET['name']); +} +if (isset($_GET['idNr'])) { + $query[] = 'clientcode=' . urlencode($_GET['idNr']); +} +if (isset($_GET['lfbisNr'])) { + $query[] = 'lfbis=' . urlencode($_GET['lfbisNr']); +} + +header('Content-Type: application/json; charset=UTF-8'); + +echo "{\"data\":"; +passthru("python3 .operators.py " . escapeshellarg(implode('&', $query))); +echo "}\n";