commit 362e071bb1e093f9b9262b67e06913bc1e962bce Author: Lorenz Stechauner Date: Sat Nov 25 20:51:15 2023 +0100 Initial commit diff --git a/CreateExe.bat b/CreateExe.bat new file mode 100644 index 0000000..83085a4 --- /dev/null +++ b/CreateExe.bat @@ -0,0 +1,6 @@ +pyinstaller --noconfirm ^ + --onefile ^ + --console ^ + --icon "NONE" ^ + --name "WinziPrint" ^ + src/winziprint.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..e9f2197 --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ + +# WinziPrint + +A standalone [WeasyPrint](https://weasyprint.org/) wrapper for Windows. + diff --git a/src/winziprint.py b/src/winziprint.py new file mode 100755 index 0000000..7d802c0 --- /dev/null +++ b/src/winziprint.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from typing import Union +import os +import sys +import time +import traceback +import gc + +import weasyprint +import pypdf + + +VERSION = __version__ = '0.1.0' + +BATCH_SIZE = 10 + + +def convert(input_files: list[str], + output_files: str, + encoding: str = None, + padding: bool = False, + progress: bool = False) -> list[int]: + # it takes roughly 100ms to generate one document + tmp_page_nums = [] + tmp_file_names = [] + page_nums = [] + + html_files = [file.lstrip('!') for file in input_files if not file.endswith('.pdf')] + steps = len(html_files) + len(html_files) // BATCH_SIZE + 1 + + try: + for i in range(0, len(html_files), BATCH_SIZE): + batch = html_files[i:i + BATCH_SIZE] + documents = [] + for n, file_name in enumerate(batch): + html = weasyprint.HTML(filename=file_name, encoding=encoding) + doc = html.render() + documents.append(doc) + del html + if progress: + print(f'progress: {i + n + i // BATCH_SIZE + 1}/{steps}', flush=True) + all_pages = [p for doc in documents for p in doc.pages] + tmp_file_name = f'{output_files}.{i:04}.part' + documents[0].copy(all_pages).write_pdf(tmp_file_name) + tmp_file_names.append(tmp_file_name) + tmp_page_nums += [len(doc.pages) for doc in documents] + del documents + del all_pages + gc.collect() + if progress and i < BATCH_SIZE: + print(f'progress: {i + BATCH_SIZE + i // BATCH_SIZE + 1}/{steps}', flush=True) + + merger = pypdf.PdfWriter() + i = 0 + for n, file_name in enumerate(input_files): + p0 = len(merger.pages) + if file_name.endswith('.pdf'): + merger.append(file_name.lstrip('!')) + else: + batch_page_nums = tmp_page_nums[i // BATCH_SIZE * BATCH_SIZE:(i // BATCH_SIZE + 1) * BATCH_SIZE] + page_start = sum(batch_page_nums[:i % BATCH_SIZE]) + merger.append(tmp_file_names[n // BATCH_SIZE], pages=(page_start, page_start + tmp_page_nums[i])) + i += 1 + p1 = len(merger.pages) + page_nums.append(p1 - p0) + if padding and file_name[0] != '!' and len(merger.pages) % 2 != 0: + merger.add_blank_page() + merger.write(output_files) + merger.close() + del merger + finally: + for pdf in tmp_file_names: + if os.path.isfile(pdf): + os.remove(pdf) + + if progress: + print(f'progress: {steps}/{steps}', flush=True) + + return page_nums + + +def _wrapper_convert(args: list[str], encoding: str = None, padding: bool = False, progress: bool = False) -> None: + try: + if len(args) < 2: + print(f'error: Too few arguments', flush=True) + return + inputs = args[:-1] + output = args[-1] + if inputs[0] == '-2': + inputs.pop(0) + padding = True + t0 = time.process_time() + pages = convert(inputs, output, encoding=encoding, padding=padding, progress=progress) + total = sum(p + 1 if padding and p % 2 != 0 else p for p in pages) + t1 = time.process_time() + print(f'success: ' + f'{len(args) - 1} documents, ' + f'{total} pages ({", ".join(str(p) for p in pages)}), ' + f'{t1 - t0:.1f} sec', + flush=True) + except Exception as e: + msg = str(e).replace('\n', ' ') + print(f'error: {msg}', flush=True) + traceback.print_exception(e) + finally: + gc.collect() + + +def usage(error: bool = False) -> None: + print(f'usage: {sys.argv[0]} [-h] [-v] [-p] [-2] [-d DIR] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ]\n\n' + 'options:\n' + ' -h, --help show this help message and exit\n' + ' -v, --version show version and exit\n' + ' -d, --directory set the working directory\n' + ' -e, --encoding encoding of the input files\n' + ' -2, --double-sided pad documents to an even number of pages\n' + ' -p, --progress show progress updates\n' + '\n' + ' - use stdin for retrieving input and output file names (semi-colon-seperated)\n' + ' INPUT name of an html input file\n' + ' OUTPUT name of an pdf output file', + file=sys.stderr if error else sys.stdout) + sys.exit(1 if error else 0) + + +def version() -> None: + print(f'WinziPrint: {__version__}\n' + f'WeasyPrint: {weasyprint.__version__}\n' + f'pypdf: {pypdf.__version__}') + sys.exit(0) + + +def _get_arg(args: list[str], n1: str, n2: str = None, flag: bool = False) -> Union[None, str, bool]: + v = None + for n in [n1] + (n2 and [n2] or []): + if flag: + if n in args: + v = True + args.remove(n) + else: + if n in args: + i = args.index(n) + if i + 1 >= len(args): + usage(True) + v = args[i + 1] + args.pop(i) + args.pop(i) + return v if not flag else v or False + + +def main() -> None: + args = sys.argv[1:] + if len(args) == 0 or '-h' in args or '--help' in args: + usage() + elif '-v' in args or '--version' in args: + version() + + working_dir = _get_arg(args, '-d', '--directory') + if working_dir: + os.chdir(working_dir) + encoding = _get_arg(args, '-e', '--encoding') + progress = _get_arg(args, '-p', '--progress', flag=True) + double_sided = _get_arg(args, '-2', '--double-sided', flag=True) + + if args == ['-']: + for line in sys.stdin: + _wrapper_convert(line.strip().split(';'), encoding=encoding, padding=double_sided, progress=progress) + elif len(args) < 2: + usage(True) + else: + _wrapper_convert(args, encoding=encoding, padding=double_sided, progress=progress) + + +if __name__ == '__main__': + main() + sys.exit(0)