266 lines
9.4 KiB
PHP
266 lines
9.4 KiB
PHP
<?php
|
|
|
|
if ($_SERVER['REQUEST_METHOD'] !== 'GET' && $_SERVER['REQUEST_METHOD'] !== 'POST' && $_SERVER['REQUEST_METHOD'] !== 'HEAD') {
|
|
header('Status: 405');
|
|
header('Content-Length: 0');
|
|
header('Allow: GET, POST, HEAD');
|
|
exit;
|
|
}
|
|
|
|
$info = $_SERVER['PATH_INFO'];
|
|
if ($info !== '') {
|
|
header('Status: 404');
|
|
header('Content-Length: 0');
|
|
exit;
|
|
}
|
|
|
|
$url = isset($_GET['url']) ? str_replace(' ', '+', $_GET['url']) : null;
|
|
$format = $_GET['format'] ?? 'json';
|
|
|
|
function get_address($array, $from, $to): array {
|
|
$address = [];
|
|
$postalCode = [];
|
|
$city = [];
|
|
for ($i = $to; $i >= $from; $i--) {
|
|
$el = $array[$i];
|
|
if (sizeof($postalCode) > 0) {
|
|
if (sizeof($address) === 0) $el = rtrim($el, ", \n\r\t\v\0");
|
|
if (strlen($el) === 0) continue;
|
|
array_unshift($address, $el);
|
|
} else if (preg_match("/^[A-Z0-9.\-]{3,},?$/", $el)) {
|
|
array_unshift($postalCode, trim($el, ", \n\r\t\v\0"));
|
|
} else {
|
|
array_unshift($city, $el);
|
|
}
|
|
}
|
|
return [implode(' ', $address), implode(' ', $postalCode), implode(' ', $city)];
|
|
}
|
|
|
|
function jenc($data): string {
|
|
return json_encode($data, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
|
|
}
|
|
|
|
if ($format === 'text') {
|
|
header('Content-Type: text/plain; charset=UTF-8');
|
|
|
|
if ($_SERVER['REQUEST_METHOD'] === 'POST') {
|
|
$fd_spec = [
|
|
0 => ["pipe", "r"], // stdin
|
|
1 => ["pipe", "w"], // stdout
|
|
2 => ["pipe", "w"], // stderr
|
|
];
|
|
$process = proc_open(['pdftotext', '-raw', '-', '-'], $fd_spec, $pipes);
|
|
$input = fopen("php://input", "rb");
|
|
while (!feof($input)) {
|
|
if (($buffer = fread($input, 8192)) === false)
|
|
break;
|
|
fwrite($pipes[0], $buffer);
|
|
}
|
|
fclose($input);
|
|
fclose($pipes[0]);
|
|
|
|
fpassthru($pipes[1]);
|
|
fclose($pipes[1]);
|
|
$stderr = stream_get_contents($pipes[2]);
|
|
fclose($pipes[2]);
|
|
$return_value = proc_close($process);
|
|
} else {
|
|
passthru("curl -s '" . escapeshellarg($url) . "' | pdftotext -raw - -");
|
|
}
|
|
} else if ($format === 'json') {
|
|
header('Content-Type: application/json; charset=UTF-8');
|
|
|
|
$fd_spec = [
|
|
0 => ["pipe", "r"], // stdin
|
|
1 => ["pipe", "w"], // stdout
|
|
2 => ["pipe", "w"], // stderr
|
|
];
|
|
if ($_SERVER['REQUEST_METHOD'] === 'POST') {
|
|
$process = proc_open(['pdftotext', '-raw', '-', '-'], $fd_spec, $pipes);
|
|
$input = fopen("php://input", "rb");
|
|
while (!feof($input)) {
|
|
if (($buffer = fread($input, 8192)) === false)
|
|
break;
|
|
fwrite($pipes[0], $buffer);
|
|
}
|
|
fclose($input);
|
|
} else {
|
|
$process = proc_open(
|
|
['bash', '-c',
|
|
"curl -s " . escapeshellarg($url) . " | " .
|
|
"pdftotext -raw - -"],
|
|
$fd_spec,
|
|
$pipes
|
|
);
|
|
}
|
|
|
|
fclose($pipes[0]);
|
|
$text = stream_get_contents($pipes[1]);
|
|
fclose($pipes[1]);
|
|
$stderr = stream_get_contents($pipes[2]);
|
|
fclose($pipes[2]);
|
|
$return_value = proc_close($process);
|
|
|
|
if ($stderr !== '') {
|
|
header('Status: 500');
|
|
header('Content-Length: ' . strlen($stderr));
|
|
header('Content-Type: text/plain');
|
|
exit($stderr);
|
|
}
|
|
|
|
$r = preg_match('@([a-z]{2}) (https://webgate\.ec\.europa\.eu/tracesnt/directory/publication/organic-operator/(.*?)\.pdf) (\d+) / (\d+)@', $text, $matches);
|
|
if ($r === 1) {
|
|
// TRACES certificate
|
|
|
|
$data = [];
|
|
$parts = preg_split('@\n(I+\.\d+) ([^\n]*)@', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
|
|
$status = str_replace("\n", '', $parts[0]);
|
|
for ($i = 3; $i < sizeof($parts); $i += 3) {
|
|
$data[$parts[$i - 2]] = trim($parts[$i]);
|
|
}
|
|
|
|
$lang = $matches[1];
|
|
$splitAddr = [
|
|
'de' => 'Adresse',
|
|
'en' => 'Address',
|
|
][$lang];
|
|
$splitCountry = [
|
|
'de' => 'Land',
|
|
'en' => 'Country',
|
|
][$lang];
|
|
$statusMap = [
|
|
'de' => [
|
|
'AUSGESTELLT' => 'issued',
|
|
],
|
|
'en' => [
|
|
'ISSUED' => 'issued',
|
|
]
|
|
][$lang];
|
|
$activityMap = [
|
|
'de' => [
|
|
'Aufbereitung' => 'preparation',
|
|
'Ausfuhr' => 'export',
|
|
'Einfuhr' => 'import',
|
|
'Lagerung' => 'storing',
|
|
'Produktion' => 'production',
|
|
'Vertrieb' => 'distribution',
|
|
'Vertrieb/Inverkehrbringen' => 'distribution_placing_on_the_market',
|
|
],
|
|
'en' => [
|
|
'Distribution' => 'distribution',
|
|
'Distribution/Placing on the market' => 'distribution_placing_on_the_market',
|
|
'Export' => 'export',
|
|
'Import' => 'import',
|
|
'Preparation' => 'preparation',
|
|
'Production' => 'production',
|
|
'Storing' => 'storing',
|
|
],
|
|
][$lang];
|
|
|
|
$certUrl = $matches[2];
|
|
$certId = $matches[3];
|
|
$authorityId = explode('.', $certId)[0];
|
|
$operatorId = explode('.', $certId)[1];
|
|
$operator = preg_split('@\s+@', trim($data['I.3']));
|
|
$p1 = array_search($splitAddr, $operator);
|
|
$p2 = array_search($splitCountry, $operator);
|
|
$operatorName = implode(' ', array_filter($operator, fn($k,$i) => $i > 0 && $i < $p1, ARRAY_FILTER_USE_BOTH));
|
|
[$opAddr, $opPostal, $opCity] = get_address($operator, $p1 + 1, $p2 - 1);
|
|
|
|
$authority = preg_split('@\s+@', trim($data['I.4']));
|
|
$until = array_search("($authorityId)", $authority);
|
|
$p1 = array_search($splitAddr, $authority);
|
|
$p2 = array_search($splitCountry, $authority);
|
|
$authorityName = implode(' ', array_filter($authority, fn($k,$i) => $i > 0 && $i < $p1 - 1 && ($i !== $p1 - 2 || !str_starts_with($k, '(')), ARRAY_FILTER_USE_BOTH));
|
|
[$aAddr, $aPostal, $aCity] = get_address($authority, $p1 + 1, $p2 - 1);
|
|
|
|
$activities = [];
|
|
foreach (explode("\n", $data['I.5']) as $a) {
|
|
$activities[] = $activityMap[trim($a, '• ')];
|
|
}
|
|
|
|
preg_match_all('/\([a-g]\)/', $data['I.6'], $matches, PREG_SET_ORDER);
|
|
$products = [];
|
|
foreach ($matches as $m) {
|
|
$products[] = $m[0];
|
|
}
|
|
|
|
preg_match_all('@\d+/\d+/\d+@', $data['I.8'], $matches, PREG_SET_ORDER);
|
|
$valid1 = implode('-', array_reverse(explode('/', $matches[0][0])));
|
|
$valid2 = implode('-', array_reverse(explode('/', $matches[1][0])));
|
|
|
|
echo "{\"type\":\"traces\",\"lang\":\"$lang\",\"id\":\"$certId\",\"status\":\"$statusMap[$status]\"";
|
|
echo ",\n \"operator\":{\"id\":" . jenc($operatorId).
|
|
',"groupOfOperators":' . jenc(!str_starts_with($data['I.2'], '☑')) .
|
|
',"name":' . jenc($operatorName) .
|
|
',"address":' . jenc($opAddr) .
|
|
',"postalCode":' . jenc($opPostal) .
|
|
',"city":' . jenc($opCity) .
|
|
',"countryCode":' . jenc($operator[sizeof($operator) - 1]) .
|
|
"},\n \"authority\":{\"id\":" . jenc($authorityId) .
|
|
',"name":' . jenc($authorityName) .
|
|
',"address":' . jenc($aAddr) .
|
|
',"postalCode":' . jenc($aPostal) .
|
|
',"city":' . jenc($aCity) .
|
|
',"countryCode":' . jenc($authority[sizeof($authority) - 1]) .
|
|
"},\n \"activities\":" . jenc($activities) .
|
|
",\n \"productCategories\":" . jenc($products) .
|
|
",\n \"validFrom\":" . jenc($valid1) .
|
|
',"validTo":' . jenc($valid2) .
|
|
",\n \"url\":\"$certUrl\"\n}\n";
|
|
} else {
|
|
echo "{\"type\":\"unknown\"}\n";
|
|
}
|
|
} else {
|
|
$fd_spec = [
|
|
0 => ["pipe", "r"], // stdin
|
|
1 => ["pipe", "w"], // stdout
|
|
2 => ["pipe", "w"], // stderr
|
|
3 => ["pipe", "w"], // headers
|
|
];
|
|
$process = proc_open(['curl', '-s', '-D', '/dev/fd/3', $url], $fd_spec, $pipes);
|
|
fclose($pipes[0]);
|
|
|
|
$headers = [];
|
|
$status_code = null;
|
|
while (($line = fgets($pipes[3])) !== false) {
|
|
if (trim($line) === '') break;
|
|
if ($status_code === null) {
|
|
$status_code = intval(explode(' ', $line)[1]);
|
|
if ($status_code !== 200)
|
|
break;
|
|
continue;
|
|
}
|
|
[$k,$v] = explode(':', $line, 2);
|
|
$k = strtolower(trim($k));
|
|
$v = trim($v);
|
|
$headers[$k] = $v;
|
|
}
|
|
fclose($pipes[3]);
|
|
|
|
if ($status_code === 200 && str_starts_with($headers['content-type'], "application/pdf")) {
|
|
header('Content-Type: application/pdf');
|
|
$content_length = null;
|
|
if (isset($headers['content-length'])) {
|
|
$content_length = intval($headers['content-length']);
|
|
header('Content-Length: ' . $headers['content-length']);
|
|
}
|
|
$parts = explode('/', $url);
|
|
$filename = $parts[sizeof($parts) - 1];
|
|
if (isset($headers['content-disposition'])) {
|
|
preg_match('@filename="(.*?)"@', $headers['content-disposition'], $matches);
|
|
$filename = $matches[1];
|
|
}
|
|
header('Content-Disposition: inline; filename="' . $filename . '"');
|
|
fpassthru($pipes[1]);
|
|
} else {
|
|
header('Status: 500');
|
|
header('Content-Length: 0');
|
|
}
|
|
fclose($pipes[1]);
|
|
|
|
$stderr = stream_get_contents($pipes[2]);
|
|
fclose($pipes[2]);
|
|
$return_value = proc_close($process);
|
|
}
|