aboutsummaryrefslogtreecommitdiffstats
path: root/pyblackbird_cc
diff options
context:
space:
mode:
authorMatthew Lemon <y@yulqen.org>2024-06-22 16:34:09 +0100
committerMatthew Lemon <y@yulqen.org>2024-06-22 16:36:17 +0100
commitdffe4f530812ff4087622523a598085255abc00c (patch)
treeb54d1a9f558bd13b4e030348364a682efd343b51 /pyblackbird_cc
parent3e55a1d2843805de40714a3e920b97cf546966f1 (diff)
Fixes bug where multiple PDFs not snappshotted
Includes test of new function which determines the length and composition of the snappshotted pages.
Diffstat (limited to 'pyblackbird_cc')
-rw-r--r--pyblackbird_cc/resources/tests/test_file_processing.py14
-rw-r--r--pyblackbird_cc/resources/views.py31
2 files changed, 38 insertions, 7 deletions
diff --git a/pyblackbird_cc/resources/tests/test_file_processing.py b/pyblackbird_cc/resources/tests/test_file_processing.py
index 40cc5eb..cbb4972 100644
--- a/pyblackbird_cc/resources/tests/test_file_processing.py
+++ b/pyblackbird_cc/resources/tests/test_file_processing.py
@@ -5,6 +5,8 @@ from django.core.files.uploadedfile import TemporaryUploadedFile
from django.test import TestCase
from django.urls import reverse
+from pyblackbird_cc.resources.views import _get_pdf_collection_type
+
from .. import services
""" Explanation:
@@ -36,6 +38,18 @@ We also test the integrity of the uploaded PDF file here by checking the number
"""
+def test_detect_snapshotted_pdf_collection():
+ single_pdf_single_page = [["toss"]]
+ single_pdf_multi_page = [["toss2", "toss8"]]
+ multi_pdf_single_page = [["toss"], ["toss2"]]
+ multi_pdf_multi_page = [["toss", "toss2"], ["toss", "toss2"]]
+
+ assert _get_pdf_collection_type(single_pdf_single_page) == "SINGLE_PDF_SINGLE_PAGE"
+ assert _get_pdf_collection_type(single_pdf_multi_page) == "SINGLE_PDF_MULTI_PAGE"
+ assert _get_pdf_collection_type(multi_pdf_single_page) == "MULTI_PDF_SINGLE_PAGE"
+ assert _get_pdf_collection_type(multi_pdf_multi_page) == "MULTI_PDF_MULTI_PAGE"
+
+
class PDFFileUploadTestCase(TestCase):
def setUp(self):
self.url = reverse("resources:create_resource")
diff --git a/pyblackbird_cc/resources/views.py b/pyblackbird_cc/resources/views.py
index 47121b5..b566eee 100644
--- a/pyblackbird_cc/resources/views.py
+++ b/pyblackbird_cc/resources/views.py
@@ -26,6 +26,20 @@ from .models import Resource
logger = logging.getLogger(__name__)
+def _get_pdf_collection_type(coll) -> str:
+ if len(coll) == 1 and len(coll[0]) == 1:
+ return "SINGLE_PDF_SINGLE_PAGE"
+ if len(coll) == 1 and len(coll[0]) > 1:
+ return "SINGLE_PDF_MULTI_PAGE"
+ if len(coll) > 1:
+ for c in coll:
+ if len(c) > 1:
+ return "MULTI_PDF_MULTI_PAGE"
+ else:
+ return "MULTI_PDF_SINGLE_PAGE"
+ return "TODO"
+
+
# I want to create a dataclass here to hold the resource information to pass to the view
@dataclass
class ResourceInfo:
@@ -143,7 +157,7 @@ def get_presigned_obj_url(bucket_name, obj_name, expiration=3600) -> str | None:
return response
-def upload_to_s3(pdf_files, thumbnail_files, snappedshotted_pages) -> bool:
+def upload_to_s3(pdf_files, thumbnail_files, snapshotted_pages) -> bool:
session = boto3.Session()
client = session.client(
"s3",
@@ -168,24 +182,27 @@ def upload_to_s3(pdf_files, thumbnail_files, snappedshotted_pages) -> bool:
settings.AWS_STORAGE_BUCKET_NAME,
f"thumbnails/{f.name}",
)
- if len(snappedshotted_pages[0]) == 1:
- for img in snappedshotted_pages[0]:
+ if _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_SINGLE_PAGE" \
+ or _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_MULTI_PAGE":
+ for img in snapshotted_pages[0]:
logger.info("Uploading {img} to S3")
client.upload_file(
img,
settings.AWS_STORAGE_BUCKET_NAME,
f"snapshotted_pages/{Path(img).name}",
)
- else:
- for lst in snappedshotted_pages:
- for img in lst:
+ return True
+ if _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_SINGLE_PAGE" \
+ or _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_MULTI_PAGE":
+ for pdf in snapshotted_pages:
+ for img in pdf:
logger.info("Uploading {img} to S3")
client.upload_file(
img,
settings.AWS_STORAGE_BUCKET_NAME,
f"snapshotted_pages/{Path(img).name}",
)
- return True
+ return True
except ClientError:
logging.exception("Error uploading files to S3")
return False