organic: Add exernal BioQS api

This commit is contained in:
2025-07-15 18:10:48 +02:00
parent 3a39cb6635
commit a44f886086
4 changed files with 224 additions and 0 deletions

54
www/organic/external/bioqs/.attachment.py vendored Executable file
View File

@@ -0,0 +1,54 @@
#!/bin/env python3
import re
import argparse
import requests
import sys
BASE_URL = 'https://www.bioqs.at'
URL = f'{BASE_URL}/ACM/faces/form/cms/portal/index.jsp'
ACTION_RE = re.compile(r'action="([^"]*)"')
HIDDEN_RE = re.compile(r'<input type="hidden" name="([^"]*)" .*?value="([^"]*)"')
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument('cert_nr', type=str)
args = parser.parse_args()
s = requests.Session()
r = s.get(f'{URL}?menu_sid=5002')
uri = ACTION_RE.findall(r.text)[0]
hidden = {m[1]: m[2] for m in HIDDEN_RE.finditer(r.text)}
r = s.post(f'{BASE_URL}{uri}', data={
'PartnerCertSearchForm:pcs_seqidall': args.cert_nr,
'PartnerCertSearchForm:button_search': 'Suche starten...',
'PartnerCertSearchForm_SUBMIT': '1',
'javax.faces.ViewState': hidden['javax.faces.ViewState'],
})
p1 = r.text.find(f'>{args.cert_nr}<')
p2 = r.text.find('id="', p1)
p3 = r.text.find('"', p2 + 4)
if p1 == -1 or p2 == -1 or p3 == -1:
exit(1)
id = r.text[p2 + 4:p3]
r = s.post(f'{BASE_URL}{uri}', data={
'PartnerCertSearchForm:_idcl': id,
'PartnerCertSearchForm_SUBMIT': '1',
'javax.faces.ViewState': hidden['javax.faces.ViewState'],
})
if 'Content-Disposition' in r.headers:
dispo = r.headers['Content-Disposition']
if 'filename="' in dispo:
filename = dispo[dispo.find('filename="') + 10:dispo.rfind('"')]
print(filename, file=sys.stderr)
sys.stdout.buffer.write(r.content)
if __name__ == '__main__':
main()

91
www/organic/external/bioqs/.operators.py vendored Executable file
View File

@@ -0,0 +1,91 @@
#!/bin/env python3
import re
import argparse
import requests
import html
import json
import urllib.parse
BASE_URL = 'https://www.bioqs.at'
URL = f'{BASE_URL}/ACM/faces/form/cms/portal/index.jsp'
ACTION_RE = re.compile(r'action="([^"]*)"')
HIDDEN_RE = re.compile(r'<input type="hidden" name="([^"]*)" .*?value="([^"]*)"')
ROW_RE = re.compile(r'<tr[^>]*>\s*(.*?)\s*</tr>', re.DOTALL)
UNCOLLAPSED_ROW_RE = re.compile(r'<tr style="">(\s*<td>\s*(.*?)\s*</td>\s*){7}</tr>', re.DOTALL)
COLLAPSED_ROW_RE = re.compile(r'<table width=[^>]*>\s*(.*?)\s*</table>', re.DOTALL)
TD_RE = re.compile(r'<td[^>]*>\s*(.*?)\s*</td>', re.DOTALL)
TAG_RE = re.compile(r'<[^>]*>')
SPACE_RE = re.compile(r'\s+')
ATTACHMENT_RE = re.compile(r"\[\['cert_attachment_sid','([^']*)'\]\]")
def remove_tags(text: str) -> str:
return SPACE_RE.sub(' ', html.unescape(TAG_RE.sub(' ', text))).strip()
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument('query', type=str)
args = parser.parse_args()
query = {'PartnerCertSearchForm:pcs_' + q.split('=', 1)[0]: urllib.parse.unquote(q.split('=', 1)[-1]) for q in args.query.split('&')}
s = requests.Session()
r = s.get(f'{URL}?menu_sid=5002')
uri = ACTION_RE.findall(r.text)[0]
hidden = {m[1]: m[2] for m in HIDDEN_RE.finditer(r.text)}
r = s.post(f'{BASE_URL}{uri}', data={
**query,
'PartnerCertSearchForm:button_search': 'Suche starten...',
'PartnerCertSearchForm_SUBMIT': '1',
'javax.faces.ViewState': hidden['javax.faces.ViewState'],
})
result_table = r.text[r.text.find('<table'):r.text.rfind('</table>') + 8]
uncollapsed_rows = [tuple(remove_tags(m[1])
for m in TD_RE.finditer(row[0]))
for row in UNCOLLAPSED_ROW_RE.finditer(result_table)]
collapsed_rows = [[tuple(remove_tags((ATTACHMENT_RE.search(m[1]) or m)[1]) for m in TD_RE.finditer(row[1]))
for row in ROW_RE.finditer(tbl[0])]
for tbl in COLLAPSED_ROW_RE.finditer(result_table)]
print('[')
first = True
for row, tbl in zip(uncollapsed_rows, collapsed_rows):
meta = {}
certificates = []
for srow in tbl:
if len(srow) == 1:
[k,v] = srow[0].split(':', 1)
meta[k.strip()] = v.strip()
continue
if len(srow) == 0:
continue
certificates.append({
'nr': srow[0],
'validFrom': '-'.join(reversed(srow[1].split('-'))),
'validTo': '-'.join(reversed(srow[2].split('-'))),
'type': srow[3],
'attachmentSid': srow[4],
})
if not first:
print(',', flush=True)
print(' ', json.dumps({
'idNr': row[0],
'lfbisNr': row[1] or None,
'name': row[2],
'postalCode': row[3],
'city': row[4],
'address': row[5],
'autorityName': meta['Kontrollstelle'],
'productGroups': meta['Bereiche'],
'certificates': certificates,
}, ensure_ascii=False), end='')
first = False
print('\n]')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,36 @@
<?php
header('Access-Control-Allow-Origin: *');
if ($_SERVER['REQUEST_METHOD'] !== 'GET' && $_SERVER['REQUEST_METHOD'] !== 'HEAD') {
header('Status: 405');
header('Content-Length: 0');
header('Allow: GET, HEAD');
exit;
}
$info = $_SERVER['PATH_INFO'];
if ($info === '') {
header('Status: 404');
header('Content-Length: 0');
exit;
}
$certId = substr($info, 1);
$file = tmpfile();
if (!$file) {
header('Status: 500');
header('Content-Length: 0');
exit;
}
$filename = stream_get_meta_data($file)['uri'];
if (($pdfName = exec("python3 .attachment.py " . escapeshellarg($certId) . " 2>&1 > $filename ")) === false) {
header('Status: 500');
header('Content-Length: 0');
exit;
}
header('Content-Type: application/pdf');
header('Content-Length: ' . filesize($filename));
header('Content-Disposition: inline; filename="' . $pdfName . '"');
readfile($filename);

View File

@@ -0,0 +1,43 @@
<?php
header('Access-Control-Allow-Origin: *');
if ($_SERVER['REQUEST_METHOD'] !== 'GET' && $_SERVER['REQUEST_METHOD'] !== 'HEAD') {
header('Status: 405');
header('Content-Length: 0');
header('Allow: GET, HEAD');
exit;
}
$info = $_SERVER['PATH_INFO'];
if ($info !== '') {
header('Status: 404');
header('Content-Length: 0');
exit;
}
$query = [];
if (isset($_GET['country'])) {
$query[] = 'country=' . [
'AT' => '1',
'DE' => '2',
][$_GET['country']] ?? '';
}
if (isset($_GET['postalCode'])) {
$query[] = 'zipcode=' . urlencode($_GET['postalCode']);
}
if (isset($_GET['name'])) {
$query[] = 'aname=' . urlencode($_GET['name']);
}
if (isset($_GET['idNr'])) {
$query[] = 'clientcode=' . urlencode($_GET['idNr']);
}
if (isset($_GET['lfbisNr'])) {
$query[] = 'lfbis=' . urlencode($_GET['lfbisNr']);
}
header('Content-Type: application/json; charset=UTF-8');
echo "{\"data\":";
passthru("python3 .operators.py " . escapeshellarg(implode('&', $query)));
echo "}\n";