From dffe4f530812ff4087622523a598085255abc00c Mon Sep 17 00:00:00 2001 From: Matthew Lemon Date: Sat, 22 Jun 2024 16:34:09 +0100 Subject: Fixes bug where multiple PDFs not snappshotted Includes test of new function which determines the length and composition of the snappshotted pages. --- pyblackbird_cc/resources/views.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'pyblackbird_cc/resources/views.py') diff --git a/pyblackbird_cc/resources/views.py b/pyblackbird_cc/resources/views.py index 47121b5..b566eee 100644 --- a/pyblackbird_cc/resources/views.py +++ b/pyblackbird_cc/resources/views.py @@ -26,6 +26,20 @@ from .models import Resource logger = logging.getLogger(__name__) +def _get_pdf_collection_type(coll) -> str: + if len(coll) == 1 and len(coll[0]) == 1: + return "SINGLE_PDF_SINGLE_PAGE" + if len(coll) == 1 and len(coll[0]) > 1: + return "SINGLE_PDF_MULTI_PAGE" + if len(coll) > 1: + for c in coll: + if len(c) > 1: + return "MULTI_PDF_MULTI_PAGE" + else: + return "MULTI_PDF_SINGLE_PAGE" + return "TODO" + + # I want to create a dataclass here to hold the resource information to pass to the view @dataclass class ResourceInfo: @@ -143,7 +157,7 @@ def get_presigned_obj_url(bucket_name, obj_name, expiration=3600) -> str | None: return response -def upload_to_s3(pdf_files, thumbnail_files, snappedshotted_pages) -> bool: +def upload_to_s3(pdf_files, thumbnail_files, snapshotted_pages) -> bool: session = boto3.Session() client = session.client( "s3", @@ -168,24 +182,27 @@ def upload_to_s3(pdf_files, thumbnail_files, snappedshotted_pages) -> bool: settings.AWS_STORAGE_BUCKET_NAME, f"thumbnails/{f.name}", ) - if len(snappedshotted_pages[0]) == 1: - for img in snappedshotted_pages[0]: + if _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_SINGLE_PAGE" \ + or _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_MULTI_PAGE": + for img in snapshotted_pages[0]: logger.info("Uploading {img} to S3") client.upload_file( img, settings.AWS_STORAGE_BUCKET_NAME, f"snapshotted_pages/{Path(img).name}", ) - else: - for lst in snappedshotted_pages: - for img in lst: + return True + if _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_SINGLE_PAGE" \ + or _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_MULTI_PAGE": + for pdf in snapshotted_pages: + for img in pdf: logger.info("Uploading {img} to S3") client.upload_file( img, settings.AWS_STORAGE_BUCKET_NAME, f"snapshotted_pages/{Path(img).name}", ) - return True + return True except ClientError: logging.exception("Error uploading files to S3") return False -- cgit v1.2.3