organic: Add exernal BioQS api
This commit is contained in:
91
www/organic/external/bioqs/.operators.py
vendored
Executable file
91
www/organic/external/bioqs/.operators.py
vendored
Executable file
@@ -0,0 +1,91 @@
|
||||
#!/bin/env python3
|
||||
|
||||
import re
|
||||
import argparse
|
||||
import requests
|
||||
import html
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
|
||||
BASE_URL = 'https://www.bioqs.at'
|
||||
URL = f'{BASE_URL}/ACM/faces/form/cms/portal/index.jsp'
|
||||
ACTION_RE = re.compile(r'action="([^"]*)"')
|
||||
HIDDEN_RE = re.compile(r'<input type="hidden" name="([^"]*)" .*?value="([^"]*)"')
|
||||
|
||||
ROW_RE = re.compile(r'<tr[^>]*>\s*(.*?)\s*</tr>', re.DOTALL)
|
||||
UNCOLLAPSED_ROW_RE = re.compile(r'<tr style="">(\s*<td>\s*(.*?)\s*</td>\s*){7}</tr>', re.DOTALL)
|
||||
COLLAPSED_ROW_RE = re.compile(r'<table width=[^>]*>\s*(.*?)\s*</table>', re.DOTALL)
|
||||
TD_RE = re.compile(r'<td[^>]*>\s*(.*?)\s*</td>', re.DOTALL)
|
||||
TAG_RE = re.compile(r'<[^>]*>')
|
||||
SPACE_RE = re.compile(r'\s+')
|
||||
ATTACHMENT_RE = re.compile(r"\[\['cert_attachment_sid','([^']*)'\]\]")
|
||||
|
||||
|
||||
def remove_tags(text: str) -> str:
|
||||
return SPACE_RE.sub(' ', html.unescape(TAG_RE.sub(' ', text))).strip()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('query', type=str)
|
||||
args = parser.parse_args()
|
||||
query = {'PartnerCertSearchForm:pcs_' + q.split('=', 1)[0]: urllib.parse.unquote(q.split('=', 1)[-1]) for q in args.query.split('&')}
|
||||
|
||||
s = requests.Session()
|
||||
r = s.get(f'{URL}?menu_sid=5002')
|
||||
uri = ACTION_RE.findall(r.text)[0]
|
||||
hidden = {m[1]: m[2] for m in HIDDEN_RE.finditer(r.text)}
|
||||
|
||||
r = s.post(f'{BASE_URL}{uri}', data={
|
||||
**query,
|
||||
'PartnerCertSearchForm:button_search': 'Suche starten...',
|
||||
'PartnerCertSearchForm_SUBMIT': '1',
|
||||
'javax.faces.ViewState': hidden['javax.faces.ViewState'],
|
||||
})
|
||||
|
||||
result_table = r.text[r.text.find('<table'):r.text.rfind('</table>') + 8]
|
||||
uncollapsed_rows = [tuple(remove_tags(m[1])
|
||||
for m in TD_RE.finditer(row[0]))
|
||||
for row in UNCOLLAPSED_ROW_RE.finditer(result_table)]
|
||||
collapsed_rows = [[tuple(remove_tags((ATTACHMENT_RE.search(m[1]) or m)[1]) for m in TD_RE.finditer(row[1]))
|
||||
for row in ROW_RE.finditer(tbl[0])]
|
||||
for tbl in COLLAPSED_ROW_RE.finditer(result_table)]
|
||||
print('[')
|
||||
first = True
|
||||
for row, tbl in zip(uncollapsed_rows, collapsed_rows):
|
||||
meta = {}
|
||||
certificates = []
|
||||
for srow in tbl:
|
||||
if len(srow) == 1:
|
||||
[k,v] = srow[0].split(':', 1)
|
||||
meta[k.strip()] = v.strip()
|
||||
continue
|
||||
if len(srow) == 0:
|
||||
continue
|
||||
certificates.append({
|
||||
'nr': srow[0],
|
||||
'validFrom': '-'.join(reversed(srow[1].split('-'))),
|
||||
'validTo': '-'.join(reversed(srow[2].split('-'))),
|
||||
'type': srow[3],
|
||||
'attachmentSid': srow[4],
|
||||
})
|
||||
if not first:
|
||||
print(',', flush=True)
|
||||
print(' ', json.dumps({
|
||||
'idNr': row[0],
|
||||
'lfbisNr': row[1] or None,
|
||||
'name': row[2],
|
||||
'postalCode': row[3],
|
||||
'city': row[4],
|
||||
'address': row[5],
|
||||
'autorityName': meta['Kontrollstelle'],
|
||||
'productGroups': meta['Bereiche'],
|
||||
'certificates': certificates,
|
||||
}, ensure_ascii=False), end='')
|
||||
first = False
|
||||
print('\n]')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Reference in New Issue
Block a user