[#1] winziprint.py: Add server mode

This commit is contained in:
2024-01-14 12:13:53 +01:00
parent a76d540bae
commit cd6b30cf80

View File

@ -1,18 +1,22 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from typing import Union from typing import TextIO
import os import os
import sys import sys
import time import time
import traceback import traceback
import gc import gc
import socketserver
import io
import signal
import weasyprint import weasyprint
import pypdf import pypdf
VERSION = __version__ = '0.1.0' VERSION = __version__ = '0.1.0'
SOCKET_ADDRESS = ('127.0.0.1', 30983)
BATCH_SIZE = 10 BATCH_SIZE = 10
@ -21,7 +25,8 @@ def convert(input_files: list[str],
output_files: str, output_files: str,
encoding: str = None, encoding: str = None,
padding: bool = False, padding: bool = False,
progress: bool = False) -> list[int]: progress: bool = False,
out: TextIO = sys.stdout) -> list[int]:
# it takes roughly 100ms to generate one document # it takes roughly 100ms to generate one document
tmp_page_nums = [] tmp_page_nums = []
tmp_file_names = [] tmp_file_names = []
@ -40,7 +45,7 @@ def convert(input_files: list[str],
documents.append(doc) documents.append(doc)
del html del html
if progress: if progress:
print(f'progress: {i + n + i // BATCH_SIZE + 1}/{steps}', flush=True) print(f'progress: {i + n + i // BATCH_SIZE + 1}/{steps}', file=out, flush=True)
all_pages = [p for doc in documents for p in doc.pages] all_pages = [p for doc in documents for p in doc.pages]
tmp_file_name = f'{output_files}.{i:04}.part' tmp_file_name = f'{output_files}.{i:04}.part'
documents[0].copy(all_pages).write_pdf(tmp_file_name) documents[0].copy(all_pages).write_pdf(tmp_file_name)
@ -49,8 +54,8 @@ def convert(input_files: list[str],
del documents del documents
del all_pages del all_pages
gc.collect() gc.collect()
if progress and i < BATCH_SIZE: if progress and i + BATCH_SIZE < len(html_files):
print(f'progress: {i + BATCH_SIZE + i // BATCH_SIZE + 1}/{steps}', flush=True) print(f'progress: {i + BATCH_SIZE + i // BATCH_SIZE + 1}/{steps}', file=out, flush=True)
merger = pypdf.PdfWriter() merger = pypdf.PdfWriter()
i = 0 i = 0
@ -76,51 +81,68 @@ def convert(input_files: list[str],
os.remove(pdf) os.remove(pdf)
if progress: if progress:
print(f'progress: {steps}/{steps}', flush=True) print(f'progress: {steps}/{steps}', file=out, flush=True)
return page_nums return page_nums
def _wrapper_convert(args: list[str], encoding: str = None, padding: bool = False, progress: bool = False) -> None: def _wrapper_convert(args: list[str],
encoding: str = None,
padding: bool = False,
progress: bool = False,
out: TextIO = sys.stdout) -> None:
try: try:
if len(args) < 2: if len(args) < 2:
print(f'error: Too few arguments', flush=True) print(f'error: Too few arguments', file=out, flush=True)
return return
inputs = args[:-1] inputs = args[:-1]
output = args[-1] output = args[-1]
if inputs[0] == '-2': while len(inputs) > 0:
inputs.pop(0) if inputs[0] == '-2':
padding = True inputs.pop(0)
padding = True
elif inputs[0].startswith('-e'):
encoding = inputs.pop(0)[2:].strip()
elif inputs[0].startswith('-p'):
inputs.pop(0)
progress = True
else:
break
if len(inputs) == 0:
print(f'error: Too few arguments', file=out, flush=True)
return
t0 = time.process_time() t0 = time.process_time()
pages = convert(inputs, output, encoding=encoding, padding=padding, progress=progress) pages = convert(inputs, output, encoding=encoding, padding=padding, progress=progress, out=out)
total = sum(p + 1 if padding and p % 2 != 0 else p for p in pages) total = sum(p + 1 if padding and p % 2 != 0 else p for p in pages)
t1 = time.process_time() t1 = time.process_time()
print(f'success: ' print(f'success: '
f'{len(args) - 1} documents, ' f'{len(args) - 1} documents, '
f'{total} pages ({", ".join(str(p) for p in pages)}), ' f'{total} pages ({", ".join(str(p) for p in pages)}), '
f'{t1 - t0:.1f} sec', f'{t1 - t0:.1f} sec',
flush=True) file=out, flush=True)
except Exception as e: except Exception as e:
msg = str(e).replace('\n', ' ') msg = str(e).replace('\n', ' ')
print(f'error: {msg}', flush=True) print(f'error: {msg}', file=out, flush=True)
traceback.print_exception(e) traceback.print_exception(e, file=sys.stderr)
finally: finally:
gc.collect() gc.collect()
def usage(error: bool = False) -> None: def usage(error: bool = False) -> None:
print(f'usage: {sys.argv[0]} [-h] [-v] [-p] [-2] [-d DIR] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ]\n\n' print(f'usage: {sys.argv[0]} [-h] [-v] [-d DIR] [ -D | [-p] [-2] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ] ]\n'
'\n'
'options:\n' 'options:\n'
' -h, --help show this help message and exit\n' ' -h, --help show this help message and exit\n'
' -v, --version show version and exit\n' ' -V, --version show version and exit\n'
' -D, --daemon run as a daemon and expose a named socket\n'
' -d, --directory set the working directory\n' ' -d, --directory set the working directory\n'
' -e, --encoding encoding of the input files\n' ' -e, --encoding encoding of the input files\n'
' -2, --double-sided pad documents to an even number of pages\n' ' -2, --double-sided pad documents to an even number of pages\n'
' -p, --progress show progress updates\n' ' -p, --progress show progress updates\n'
'\n' '\n'
' - use stdin for retrieving input and output file names (semi-colon-seperated)\n' ' - use stdin for retrieving input and output file names (semi-colon-seperated)\n'
' INPUT name of an html input file\n' ' INPUT name of a html input file\n'
' OUTPUT name of an pdf output file', ' OUTPUT name of a pdf output file',
file=sys.stderr if error else sys.stdout) file=sys.stderr if error else sys.stdout)
sys.exit(1 if error else 0) sys.exit(1 if error else 0)
@ -132,7 +154,7 @@ def version() -> None:
sys.exit(0) sys.exit(0)
def _get_arg(args: list[str], n1: str, n2: str = None, flag: bool = False) -> Union[None, str, bool]: def _get_arg(args: list[str], n1: str, n2: str = None, flag: bool = False) -> None | str | bool:
v = None v = None
for n in [n1] + (n2 and [n2] or []): for n in [n1] + (n2 and [n2] or []):
if flag: if flag:
@ -150,16 +172,45 @@ def _get_arg(args: list[str], n1: str, n2: str = None, flag: bool = False) -> Un
return v if not flag else v or False return v if not flag else v or False
class ConnectionHandler(socketserver.StreamRequestHandler):
def handle(self):
try:
while True:
out = io.TextIOWrapper(self.wfile, encoding='utf-8')
for line in io.TextIOWrapper(self.rfile, encoding='utf-8'):
_wrapper_convert(line.strip().split(';'), out=out)
except ValueError:
pass # socket closed by client
def main() -> None: def main() -> None:
args = sys.argv[1:] args = sys.argv[1:]
if len(args) == 0 or '-h' in args or '--help' in args: if len(args) == 0 or '-h' in args or '--help' in args:
usage() usage()
elif '-v' in args or '--version' in args: elif '-V' in args or '--version' in args:
version() version()
working_dir = _get_arg(args, '-d', '--directory') working_dir = _get_arg(args, '-d', '--directory')
if working_dir: if working_dir:
os.chdir(working_dir) os.chdir(working_dir)
if '-D' in args:
print('Running as daemon')
if len(args) != 1:
usage(True)
# a tcp server is used due to the lack of unix sockets on Windows
with socketserver.ThreadingTCPServer(SOCKET_ADDRESS, ConnectionHandler) as server:
def exit_gracefully(signum: int, frame) -> None:
raise KeyboardInterrupt()
signal.signal(signal.SIGINT, exit_gracefully)
signal.signal(signal.SIGTERM, exit_gracefully)
try:
server.serve_forever()
except KeyboardInterrupt:
print('', file=sys.stderr)
print('Shutting down')
return
encoding = _get_arg(args, '-e', '--encoding') encoding = _get_arg(args, '-e', '--encoding')
progress = _get_arg(args, '-p', '--progress', flag=True) progress = _get_arg(args, '-p', '--progress', flag=True)
double_sided = _get_arg(args, '-2', '--double-sided', flag=True) double_sided = _get_arg(args, '-2', '--double-sided', flag=True)