diff --git a/winziprint/winziprint.py b/winziprint/winziprint.py index 34e23ec..7430731 100755 --- a/winziprint/winziprint.py +++ b/winziprint/winziprint.py @@ -14,33 +14,37 @@ import pypdf BATCH_SIZE = 10 -def convert(input_file_names: list[str], output_file_name: str, encoding: str = None) -> list[int]: +def convert(input_files: list[str], output_files: str, encoding: str = None, progress: bool = False) -> list[int]: # it takes roughly 100ms to generate one document page_nums = [] tmp_file_names = [] + steps = len(input_files) // BATCH_SIZE + 1 + try: - for i in range(0, len(input_file_names), BATCH_SIZE): - batch = input_file_names[i:i + BATCH_SIZE] + for i in range(0, len(input_files), BATCH_SIZE): + batch = input_files[i:i + BATCH_SIZE] documents = [] - for file_name in batch: + for n, file_name in enumerate(batch): html = weasyprint.HTML(file_name, encoding=encoding) doc = html.render() documents.append(doc) del html all_pages = [p for doc in documents for p in doc.pages] - tmp_file_name = f'{output_file_name}.part.{i:0000}' + tmp_file_name = f'{output_files}.part.{i:0000}' documents[0].copy(all_pages).write_pdf(tmp_file_name) tmp_file_names.append(tmp_file_name) page_nums += [len(doc.pages) for doc in documents] del documents del all_pages gc.collect() + if progress: + print(f'progress: {i // BATCH_SIZE + 1}/{steps}', flush=True) merger = pypdf.PdfWriter() for pdf in tmp_file_names: merger.append(pdf) - merger.write(output_file_name) + merger.write(output_files) merger.close() del merger finally: @@ -48,15 +52,19 @@ def convert(input_file_names: list[str], output_file_name: str, encoding: str = if os.path.isfile(pdf): os.remove(pdf) + if progress: + print(f'progress: {steps}/{steps}', flush=True) + return page_nums -def _wrapper_convert(args: list[str], encoding: str = None) -> None: +def _wrapper_convert(args: list[str], encoding: str = None, progress: bool = False) -> None: try: t0 = time.process_time() - pages = convert(args[:-1], args[-1], encoding=encoding) + pages = convert(args[:-1], args[-1], encoding=encoding, progress=progress) t1 = time.process_time() - print(f'{len(args) - 1} documents, ' + print(f'success: ' + f'{len(args) - 1} documents, ' f'{sum(pages)} pages ({", ".join(str(p) for p in pages)}), ' f'{t1 - t0:.1f} sec', flush=True) @@ -69,11 +77,12 @@ def _wrapper_convert(args: list[str], encoding: str = None) -> None: def usage() -> None: - print(f'usage: {sys.argv[0]} [-h] [-d DIR] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ]\n\n' + print(f'usage: {sys.argv[0]} [-h] [-p] [-d DIR] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ]\n\n' 'options:\n' ' -h, --help show this help message and exit\n' ' -d, --directory set the working directory\n' ' -e, --encoding encoding of the input files\n' + ' -p, --progress show progress updates\n' '\n' ' - use stdin for retrieving input and output file names (semi-colon-seperated)\n' ' INPUT name of an html input file\n' @@ -104,13 +113,21 @@ def main() -> None: os.chdir(working_dir) encoding = _get_arg(args, '-e', '--encoding') + progress = False + if '-p' in args: + args.remove('-p') + progress = True + if '--progress' in args: + args.remove('--progress') + progress = True + if args == ['-']: for line in sys.stdin: - _wrapper_convert(line.strip().split(';'), encoding=encoding) + _wrapper_convert(line.strip().split(';'), encoding=encoding, progress=progress) elif len(args) < 2: usage() else: - _wrapper_convert(args, encoding=encoding) + _wrapper_convert(args, encoding=encoding, progress=progress) if __name__ == '__main__':