winziprint: add progress updates
This commit is contained in:
@ -14,33 +14,37 @@ import pypdf
|
|||||||
BATCH_SIZE = 10
|
BATCH_SIZE = 10
|
||||||
|
|
||||||
|
|
||||||
def convert(input_file_names: list[str], output_file_name: str, encoding: str = None) -> list[int]:
|
def convert(input_files: list[str], output_files: str, encoding: str = None, progress: bool = False) -> list[int]:
|
||||||
# it takes roughly 100ms to generate one document
|
# it takes roughly 100ms to generate one document
|
||||||
page_nums = []
|
page_nums = []
|
||||||
tmp_file_names = []
|
tmp_file_names = []
|
||||||
|
|
||||||
|
steps = len(input_files) // BATCH_SIZE + 1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for i in range(0, len(input_file_names), BATCH_SIZE):
|
for i in range(0, len(input_files), BATCH_SIZE):
|
||||||
batch = input_file_names[i:i + BATCH_SIZE]
|
batch = input_files[i:i + BATCH_SIZE]
|
||||||
documents = []
|
documents = []
|
||||||
for file_name in batch:
|
for n, file_name in enumerate(batch):
|
||||||
html = weasyprint.HTML(file_name, encoding=encoding)
|
html = weasyprint.HTML(file_name, encoding=encoding)
|
||||||
doc = html.render()
|
doc = html.render()
|
||||||
documents.append(doc)
|
documents.append(doc)
|
||||||
del html
|
del html
|
||||||
all_pages = [p for doc in documents for p in doc.pages]
|
all_pages = [p for doc in documents for p in doc.pages]
|
||||||
tmp_file_name = f'{output_file_name}.part.{i:0000}'
|
tmp_file_name = f'{output_files}.part.{i:0000}'
|
||||||
documents[0].copy(all_pages).write_pdf(tmp_file_name)
|
documents[0].copy(all_pages).write_pdf(tmp_file_name)
|
||||||
tmp_file_names.append(tmp_file_name)
|
tmp_file_names.append(tmp_file_name)
|
||||||
page_nums += [len(doc.pages) for doc in documents]
|
page_nums += [len(doc.pages) for doc in documents]
|
||||||
del documents
|
del documents
|
||||||
del all_pages
|
del all_pages
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
if progress:
|
||||||
|
print(f'progress: {i // BATCH_SIZE + 1}/{steps}', flush=True)
|
||||||
|
|
||||||
merger = pypdf.PdfWriter()
|
merger = pypdf.PdfWriter()
|
||||||
for pdf in tmp_file_names:
|
for pdf in tmp_file_names:
|
||||||
merger.append(pdf)
|
merger.append(pdf)
|
||||||
merger.write(output_file_name)
|
merger.write(output_files)
|
||||||
merger.close()
|
merger.close()
|
||||||
del merger
|
del merger
|
||||||
finally:
|
finally:
|
||||||
@ -48,15 +52,19 @@ def convert(input_file_names: list[str], output_file_name: str, encoding: str =
|
|||||||
if os.path.isfile(pdf):
|
if os.path.isfile(pdf):
|
||||||
os.remove(pdf)
|
os.remove(pdf)
|
||||||
|
|
||||||
|
if progress:
|
||||||
|
print(f'progress: {steps}/{steps}', flush=True)
|
||||||
|
|
||||||
return page_nums
|
return page_nums
|
||||||
|
|
||||||
|
|
||||||
def _wrapper_convert(args: list[str], encoding: str = None) -> None:
|
def _wrapper_convert(args: list[str], encoding: str = None, progress: bool = False) -> None:
|
||||||
try:
|
try:
|
||||||
t0 = time.process_time()
|
t0 = time.process_time()
|
||||||
pages = convert(args[:-1], args[-1], encoding=encoding)
|
pages = convert(args[:-1], args[-1], encoding=encoding, progress=progress)
|
||||||
t1 = time.process_time()
|
t1 = time.process_time()
|
||||||
print(f'{len(args) - 1} documents, '
|
print(f'success: '
|
||||||
|
f'{len(args) - 1} documents, '
|
||||||
f'{sum(pages)} pages ({", ".join(str(p) for p in pages)}), '
|
f'{sum(pages)} pages ({", ".join(str(p) for p in pages)}), '
|
||||||
f'{t1 - t0:.1f} sec',
|
f'{t1 - t0:.1f} sec',
|
||||||
flush=True)
|
flush=True)
|
||||||
@ -69,11 +77,12 @@ def _wrapper_convert(args: list[str], encoding: str = None) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def usage() -> None:
|
def usage() -> None:
|
||||||
print(f'usage: {sys.argv[0]} [-h] [-d DIR] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ]\n\n'
|
print(f'usage: {sys.argv[0]} [-h] [-p] [-d DIR] [-e ENCODING] [ - | INPUT [INPUT...] OUTPUT ]\n\n'
|
||||||
'options:\n'
|
'options:\n'
|
||||||
' -h, --help show this help message and exit\n'
|
' -h, --help show this help message and exit\n'
|
||||||
' -d, --directory set the working directory\n'
|
' -d, --directory set the working directory\n'
|
||||||
' -e, --encoding encoding of the input files\n'
|
' -e, --encoding encoding of the input files\n'
|
||||||
|
' -p, --progress show progress updates\n'
|
||||||
'\n'
|
'\n'
|
||||||
' - use stdin for retrieving input and output file names (semi-colon-seperated)\n'
|
' - use stdin for retrieving input and output file names (semi-colon-seperated)\n'
|
||||||
' INPUT name of an html input file\n'
|
' INPUT name of an html input file\n'
|
||||||
@ -104,13 +113,21 @@ def main() -> None:
|
|||||||
os.chdir(working_dir)
|
os.chdir(working_dir)
|
||||||
encoding = _get_arg(args, '-e', '--encoding')
|
encoding = _get_arg(args, '-e', '--encoding')
|
||||||
|
|
||||||
|
progress = False
|
||||||
|
if '-p' in args:
|
||||||
|
args.remove('-p')
|
||||||
|
progress = True
|
||||||
|
if '--progress' in args:
|
||||||
|
args.remove('--progress')
|
||||||
|
progress = True
|
||||||
|
|
||||||
if args == ['-']:
|
if args == ['-']:
|
||||||
for line in sys.stdin:
|
for line in sys.stdin:
|
||||||
_wrapper_convert(line.strip().split(';'), encoding=encoding)
|
_wrapper_convert(line.strip().split(';'), encoding=encoding, progress=progress)
|
||||||
elif len(args) < 2:
|
elif len(args) < 2:
|
||||||
usage()
|
usage()
|
||||||
else:
|
else:
|
||||||
_wrapper_convert(args, encoding=encoding)
|
_wrapper_convert(args, encoding=encoding, progress=progress)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Reference in New Issue
Block a user