[#1] winziprint.py: Add server mode

This commit is contained in:
2024-01-14 12:13:53 +01:00
parent a76d540bae
commit cd6b30cf80

View File

@ -1,18 +1,22 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from typing import Union
from typing import TextIO
import os
import sys
import time
import traceback
import gc
import socketserver
import io
import signal
import weasyprint
import pypdf
VERSION = __version__ = '0.1.0'
SOCKET_ADDRESS = ('127.0.0.1', 30983)
BATCH_SIZE = 10
@ -21,7 +25,8 @@ def convert(input_files: list[str],
output_files: str,
encoding: str = None,
padding: bool = False,
progress: bool = False) -> list[int]:
progress: bool = False,
out: TextIO = sys.stdout) -> list[int]:
# it takes roughly 100ms to generate one document
tmp_page_nums = []
tmp_file_names = []
@ -40,7 +45,7 @@ def convert(input_files: list[str],
documents.append(doc)
del html
if progress:
print(f'progress: {i + n + i // BATCH_SIZE + 1}/{steps}', flush=True)
print(f'progress: {i + n + i // BATCH_SIZE + 1}/{steps}', file=out, flush=True)
all_pages = [p for doc in documents for p in doc.pages]
tmp_file_name = f'{output_files}.{i:04}.part'
documents[0].copy(all_pages).write_pdf(tmp_file_name)
@ -49,8 +54,8 @@ def convert(input_files: list[str],
del documents
del all_pages
gc.collect()
if progress and i < BATCH_SIZE:
print(f'progress: {i + BATCH_SIZE + i // BATCH_SIZE + 1}/{steps}', flush=True)
if progress and i + BATCH_SIZE < len(html_files):
print(f'progress: {i + BATCH_SIZE + i // BATCH_SIZE + 1}/{steps}', file=out, flush=True)
merger = pypdf.PdfWriter()
i = 0
@ -76,51 +81,68 @@ def convert(input_files: list[str],
os.remove(pdf)
if progress:
print(f'progress: {steps}/{steps}', flush=True)
print(f'progress: {steps}/{steps}', file=out, flush=True)
return page_nums
def _wrapper_convert(args: list[str], encoding: str = None, padding: bool = False, progress: bool = False) -> None:
def _wrapper_convert(args: list[str],
encoding: str = None,
padding: bool = False,
progress: bool = False,
out: TextIO = sys.stdout) -> None:
try:
if len(args) < 2:
print(f'error: Too few arguments', flush=True)
print(f'error: Too few arguments', file=out, flush=True)
return
inputs = args[:-1]
output = args[-1]
if inputs[0] == '-2':
inputs.pop(0)
padding = True
while len(inputs) > 0:
if inputs[0] == '-2':
inputs.pop(0)
padding = True
elif inputs[0].startswith('-e'):
encoding = inputs.pop(0)[2:].strip()
elif inputs[0].startswith('-p'):
inputs.pop(0)
progress = True
else:
break
if len(inputs) == 0:
print(f'error: Too few arguments', file=out, flush=True)
return
t0 = time.process_time()
pages = convert(inputs, output, encoding=encoding, padding=padding, progress=progress)
pages = convert(inputs, output, encoding=encoding, padding=padding, progress=progress, out=out)
total = sum(p + 1 if padding and p % 2 != 0 else p for p in pages)
t1 = time.process_time()
print(f'success: '
f'{len(args) - 1} documents, '
f'{total} pages ({", ".join(str(p) for p in pages)}), '
f'{t1 - t0:.1f} sec',
flush=True)
file=out, flush=True)
except Exception as e:
msg = str(e).replace('\n', ' ')
print(f'error: {msg}', flush=True)
traceback.print_exception(e)
print(f'error: {msg}', file=out, flush=True)
traceback.print_exception(e, file=sys.stderr)
finally:
gc.collect()
def usage(error: bool = False) -> None:
print(f'usage: {sys.argv[0]} [-h] [-v] [-p] [-2] [-d DIR] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ]\n\n'
print(f'usage: {sys.argv[0]} [-h] [-v] [-d DIR] [ -D | [-p] [-2] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ] ]\n'
'\n'
'options:\n'
' -h, --help show this help message and exit\n'
' -v, --version show version and exit\n'
' -V, --version show version and exit\n'
' -D, --daemon run as a daemon and expose a named socket\n'
' -d, --directory set the working directory\n'
' -e, --encoding encoding of the input files\n'
' -2, --double-sided pad documents to an even number of pages\n'
' -p, --progress show progress updates\n'
'\n'
' - use stdin for retrieving input and output file names (semi-colon-seperated)\n'
' INPUT name of an html input file\n'
' OUTPUT name of an pdf output file',
' INPUT name of a html input file\n'
' OUTPUT name of a pdf output file',
file=sys.stderr if error else sys.stdout)
sys.exit(1 if error else 0)
@ -132,7 +154,7 @@ def version() -> None:
sys.exit(0)
def _get_arg(args: list[str], n1: str, n2: str = None, flag: bool = False) -> Union[None, str, bool]:
def _get_arg(args: list[str], n1: str, n2: str = None, flag: bool = False) -> None | str | bool:
v = None
for n in [n1] + (n2 and [n2] or []):
if flag:
@ -150,16 +172,45 @@ def _get_arg(args: list[str], n1: str, n2: str = None, flag: bool = False) -> Un
return v if not flag else v or False
class ConnectionHandler(socketserver.StreamRequestHandler):
def handle(self):
try:
while True:
out = io.TextIOWrapper(self.wfile, encoding='utf-8')
for line in io.TextIOWrapper(self.rfile, encoding='utf-8'):
_wrapper_convert(line.strip().split(';'), out=out)
except ValueError:
pass # socket closed by client
def main() -> None:
args = sys.argv[1:]
if len(args) == 0 or '-h' in args or '--help' in args:
usage()
elif '-v' in args or '--version' in args:
elif '-V' in args or '--version' in args:
version()
working_dir = _get_arg(args, '-d', '--directory')
if working_dir:
os.chdir(working_dir)
if '-D' in args:
print('Running as daemon')
if len(args) != 1:
usage(True)
# a tcp server is used due to the lack of unix sockets on Windows
with socketserver.ThreadingTCPServer(SOCKET_ADDRESS, ConnectionHandler) as server:
def exit_gracefully(signum: int, frame) -> None:
raise KeyboardInterrupt()
signal.signal(signal.SIGINT, exit_gracefully)
signal.signal(signal.SIGTERM, exit_gracefully)
try:
server.serve_forever()
except KeyboardInterrupt:
print('', file=sys.stderr)
print('Shutting down')
return
encoding = _get_arg(args, '-e', '--encoding')
progress = _get_arg(args, '-p', '--progress', flag=True)
double_sided = _get_arg(args, '-2', '--double-sided', flag=True)