Files
elwig-misc/winziprint/winziprint.py
2023-10-19 21:10:14 +02:00

112 lines
3.2 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import time
import traceback
import gc
import weasyprint
import pypdf
BATCH_SIZE = 10
def convert(input_file_names: list[str], output_file_name: str, encoding: str = None) -> list[int]:
# it takes roughly 100ms to generate one document
page_nums = []
tmp_file_names = []
for i in range(0, len(input_file_names), BATCH_SIZE):
batch = input_file_names[i:i + BATCH_SIZE]
documents = []
for file_name in batch:
html = weasyprint.HTML(file_name, encoding=encoding)
doc = html.render()
documents.append(doc)
del html
all_pages = [p for doc in documents for p in doc.pages]
tmp_file_name = f'{output_file_name}.part.{i:0000}'
documents[0].copy(all_pages).write_pdf(tmp_file_name)
tmp_file_names.append(tmp_file_name)
page_nums += [len(doc.pages) for doc in documents]
del documents
del all_pages
gc.collect()
merger = pypdf.PdfWriter()
for pdf in tmp_file_names:
merger.append(pdf)
os.remove(pdf)
merger.write(output_file_name)
merger.close()
return page_nums
def _wrapper_convert(args: list[str], encoding: str = None) -> None:
try:
t0 = time.process_time()
pages = convert(args[:-1], args[-1], encoding=encoding)
t1 = time.process_time()
print(f'{len(args) - 1} documents, '
f'{sum(pages)} pages ({", ".join(str(p) for p in pages)}), '
f'{t1 - t0:.1f} sec',
flush=True)
except Exception as e:
msg = str(e).replace('\n', ' ')
print(f'error: {msg}', flush=True)
traceback.print_exception(e)
def usage() -> None:
print(f'usage: {sys.argv[0]} [-h] [-d DIR] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ]\n\n'
'options:\n'
' -h, --help show this help message and exit\n'
' -d, --directory set the working directory\n'
' -e, --encoding encoding of the input files\n'
'\n'
' - use stdin for retrieving input and output file names (semi-colon-seperated)\n'
' INPUT name of an html input file\n'
' OUTPUT name of an pdf output file', file=sys.stderr)
sys.exit(1)
def _get_arg(args: list[str], n1: str, n2: str = None) -> str:
v = None
for n in [n1] + (n2 and [n2] or []):
if n in args:
i = args.index(n)
if i + 1 >= len(args):
usage()
v = args[i + 1]
args.pop(i)
args.pop(i)
return v
def main() -> None:
args = sys.argv[1:]
if len(args) == 0 or '-h' in args or '--help' in args:
usage()
working_dir = _get_arg(args, '-d', '--directory')
if working_dir:
os.chdir(working_dir)
encoding = _get_arg(args, '-e', '--encoding')
if args == ['-']:
for line in sys.stdin:
_wrapper_convert(line.strip().split(';'), encoding=encoding)
elif len(args) < 2:
usage()
else:
_wrapper_convert(args, encoding=encoding)
if __name__ == '__main__':
main()
sys.exit(0)