From dffe4f530812ff4087622523a598085255abc00c Mon Sep 17 00:00:00 2001 From: Matthew Lemon Date: Sat, 22 Jun 2024 16:34:09 +0100 Subject: Fixes bug where multiple PDFs not snappshotted Includes test of new function which determines the length and composition of the snappshotted pages. --- .../resources/tests/test_file_processing.py | 14 ++++++++++ pyblackbird_cc/resources/views.py | 31 +++++++++++++++++----- 2 files changed, 38 insertions(+), 7 deletions(-) (limited to 'pyblackbird_cc/resources') diff --git a/pyblackbird_cc/resources/tests/test_file_processing.py b/pyblackbird_cc/resources/tests/test_file_processing.py index 40cc5eb..cbb4972 100644 --- a/pyblackbird_cc/resources/tests/test_file_processing.py +++ b/pyblackbird_cc/resources/tests/test_file_processing.py @@ -5,6 +5,8 @@ from django.core.files.uploadedfile import TemporaryUploadedFile from django.test import TestCase from django.urls import reverse +from pyblackbird_cc.resources.views import _get_pdf_collection_type + from .. import services """ Explanation: @@ -36,6 +38,18 @@ We also test the integrity of the uploaded PDF file here by checking the number """ +def test_detect_snapshotted_pdf_collection(): + single_pdf_single_page = [["toss"]] + single_pdf_multi_page = [["toss2", "toss8"]] + multi_pdf_single_page = [["toss"], ["toss2"]] + multi_pdf_multi_page = [["toss", "toss2"], ["toss", "toss2"]] + + assert _get_pdf_collection_type(single_pdf_single_page) == "SINGLE_PDF_SINGLE_PAGE" + assert _get_pdf_collection_type(single_pdf_multi_page) == "SINGLE_PDF_MULTI_PAGE" + assert _get_pdf_collection_type(multi_pdf_single_page) == "MULTI_PDF_SINGLE_PAGE" + assert _get_pdf_collection_type(multi_pdf_multi_page) == "MULTI_PDF_MULTI_PAGE" + + class PDFFileUploadTestCase(TestCase): def setUp(self): self.url = reverse("resources:create_resource") diff --git a/pyblackbird_cc/resources/views.py b/pyblackbird_cc/resources/views.py index 47121b5..b566eee 100644 --- a/pyblackbird_cc/resources/views.py +++ b/pyblackbird_cc/resources/views.py @@ -26,6 +26,20 @@ from .models import Resource logger = logging.getLogger(__name__) +def _get_pdf_collection_type(coll) -> str: + if len(coll) == 1 and len(coll[0]) == 1: + return "SINGLE_PDF_SINGLE_PAGE" + if len(coll) == 1 and len(coll[0]) > 1: + return "SINGLE_PDF_MULTI_PAGE" + if len(coll) > 1: + for c in coll: + if len(c) > 1: + return "MULTI_PDF_MULTI_PAGE" + else: + return "MULTI_PDF_SINGLE_PAGE" + return "TODO" + + # I want to create a dataclass here to hold the resource information to pass to the view @dataclass class ResourceInfo: @@ -143,7 +157,7 @@ def get_presigned_obj_url(bucket_name, obj_name, expiration=3600) -> str | None: return response -def upload_to_s3(pdf_files, thumbnail_files, snappedshotted_pages) -> bool: +def upload_to_s3(pdf_files, thumbnail_files, snapshotted_pages) -> bool: session = boto3.Session() client = session.client( "s3", @@ -168,24 +182,27 @@ def upload_to_s3(pdf_files, thumbnail_files, snappedshotted_pages) -> bool: settings.AWS_STORAGE_BUCKET_NAME, f"thumbnails/{f.name}", ) - if len(snappedshotted_pages[0]) == 1: - for img in snappedshotted_pages[0]: + if _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_SINGLE_PAGE" \ + or _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_MULTI_PAGE": + for img in snapshotted_pages[0]: logger.info("Uploading {img} to S3") client.upload_file( img, settings.AWS_STORAGE_BUCKET_NAME, f"snapshotted_pages/{Path(img).name}", ) - else: - for lst in snappedshotted_pages: - for img in lst: + return True + if _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_SINGLE_PAGE" \ + or _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_MULTI_PAGE": + for pdf in snapshotted_pages: + for img in pdf: logger.info("Uploading {img} to S3") client.upload_file( img, settings.AWS_STORAGE_BUCKET_NAME, f"snapshotted_pages/{Path(img).name}", ) - return True + return True except ClientError: logging.exception("Error uploading files to S3") return False -- cgit v1.2.3