winziprint: add progress updates

2023-10-19 21:48:00 +02:00
parent 5c924c944c
commit 499cd2032e
1 changed files with 29 additions and 12 deletions
@@ -14,33 +14,37 @@ import pypdf
 BATCH_SIZE = 10


-def convert(input_file_names: list[str], output_file_name: str, encoding: str = None) -> list[int]:
+def convert(input_files: list[str], output_files: str, encoding: str = None, progress: bool = False) -> list[int]:
    # it takes roughly 100ms to generate one document
    page_nums = []
    tmp_file_names = []

+    steps = len(input_files) // BATCH_SIZE + 1
+
    try:
-        for i in range(0, len(input_file_names), BATCH_SIZE):
-            batch = input_file_names[i:i + BATCH_SIZE]
+        for i in range(0, len(input_files), BATCH_SIZE):
+            batch = input_files[i:i + BATCH_SIZE]
            documents = []
-            for file_name in batch:
+            for n, file_name in enumerate(batch):
                html = weasyprint.HTML(file_name, encoding=encoding)
                doc = html.render()
                documents.append(doc)
                del html
            all_pages = [p for doc in documents for p in doc.pages]
-            tmp_file_name = f'{output_file_name}.part.{i:0000}'
+            tmp_file_name = f'{output_files}.part.{i:0000}'
            documents[0].copy(all_pages).write_pdf(tmp_file_name)
            tmp_file_names.append(tmp_file_name)
            page_nums += [len(doc.pages) for doc in documents]
            del documents
            del all_pages
            gc.collect()
+            if progress:
+                print(f'progress: {i // BATCH_SIZE + 1}/{steps}', flush=True)

        merger = pypdf.PdfWriter()
        for pdf in tmp_file_names:
            merger.append(pdf)
-        merger.write(output_file_name)
+        merger.write(output_files)
        merger.close()
        del merger
    finally:
@@ -48,15 +52,19 @@ def convert(input_file_names: list[str], output_file_name: str, encoding: str =
            if os.path.isfile(pdf):
                os.remove(pdf)

+    if progress:
+        print(f'progress: {steps}/{steps}', flush=True)
+
    return page_nums


-def _wrapper_convert(args: list[str], encoding: str = None) -> None:
+def _wrapper_convert(args: list[str], encoding: str = None, progress: bool = False) -> None:
    try:
        t0 = time.process_time()
-        pages = convert(args[:-1], args[-1], encoding=encoding)
+        pages = convert(args[:-1], args[-1], encoding=encoding, progress=progress)
        t1 = time.process_time()
-        print(f'{len(args) - 1} documents, '
+        print(f'success: '
+              f'{len(args) - 1} documents, '
              f'{sum(pages)} pages ({", ".join(str(p) for p in pages)}), '
              f'{t1 - t0:.1f} sec',
              flush=True)
@@ -69,11 +77,12 @@ def _wrapper_convert(args: list[str], encoding: str = None) -> None:


 def usage() -> None:
-    print(f'usage: {sys.argv[0]} [-h] [-d DIR] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ]\n\n'
+    print(f'usage: {sys.argv[0]} [-h] [-p] [-d DIR] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ]\n\n'
          'options:\n'
          ' -h, --help       show this help message and exit\n'
          ' -d, --directory  set the working directory\n'
          ' -e, --encoding   encoding of the input files\n'
+          ' -p, --progress   show progress updates\n'
          '\n'
          ' -                use stdin for retrieving input and output file names (semi-colon-seperated)\n'
          ' INPUT            name of an html input file\n'
@@ -104,13 +113,21 @@ def main() -> None:
        os.chdir(working_dir)
    encoding = _get_arg(args, '-e', '--encoding')

+    progress = False
+    if '-p' in args:
+        args.remove('-p')
+        progress = True
+    if '--progress' in args:
+        args.remove('--progress')
+        progress = True
+
    if args == ['-']:
        for line in sys.stdin:
-            _wrapper_convert(line.strip().split(';'), encoding=encoding)
+            _wrapper_convert(line.strip().split(';'), encoding=encoding, progress=progress)
    elif len(args) < 2:
        usage()
    else:
-        _wrapper_convert(args, encoding=encoding)
+        _wrapper_convert(args, encoding=encoding, progress=progress)


 if __name__ == '__main__':