aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--pyblackbird_cc/resources/tests/test_file_processing.py14
-rw-r--r--pyblackbird_cc/resources/views.py31
2 files changed, 38 insertions, 7 deletions
diff --git a/pyblackbird_cc/resources/tests/test_file_processing.py b/pyblackbird_cc/resources/tests/test_file_processing.py
index 40cc5eb..cbb4972 100644
--- a/pyblackbird_cc/resources/tests/test_file_processing.py
+++ b/pyblackbird_cc/resources/tests/test_file_processing.py
@@ -5,6 +5,8 @@ from django.core.files.uploadedfile import TemporaryUploadedFile
from django.test import TestCase
from django.urls import reverse
+from pyblackbird_cc.resources.views import _get_pdf_collection_type
+
from .. import services
""" Explanation:
@@ -36,6 +38,18 @@ We also test the integrity of the uploaded PDF file here by checking the number
"""
+def test_detect_snapshotted_pdf_collection():
+ single_pdf_single_page = [["toss"]]
+ single_pdf_multi_page = [["toss2", "toss8"]]
+ multi_pdf_single_page = [["toss"], ["toss2"]]
+ multi_pdf_multi_page = [["toss", "toss2"], ["toss", "toss2"]]
+
+ assert _get_pdf_collection_type(single_pdf_single_page) == "SINGLE_PDF_SINGLE_PAGE"
+ assert _get_pdf_collection_type(single_pdf_multi_page) == "SINGLE_PDF_MULTI_PAGE"
+ assert _get_pdf_collection_type(multi_pdf_single_page) == "MULTI_PDF_SINGLE_PAGE"
+ assert _get_pdf_collection_type(multi_pdf_multi_page) == "MULTI_PDF_MULTI_PAGE"
+
+
class PDFFileUploadTestCase(TestCase):
def setUp(self):
self.url = reverse("resources:create_resource")
diff --git a/pyblackbird_cc/resources/views.py b/pyblackbird_cc/resources/views.py
index 47121b5..b566eee 100644
--- a/pyblackbird_cc/resources/views.py
+++ b/pyblackbird_cc/resources/views.py
@@ -26,6 +26,20 @@ from .models import Resource
logger = logging.getLogger(__name__)
+def _get_pdf_collection_type(coll) -> str:
+ if len(coll) == 1 and len(coll[0]) == 1:
+ return "SINGLE_PDF_SINGLE_PAGE"
+ if len(coll) == 1 and len(coll[0]) > 1:
+ return "SINGLE_PDF_MULTI_PAGE"
+ if len(coll) > 1:
+ for c in coll:
+ if len(c) > 1:
+ return "MULTI_PDF_MULTI_PAGE"
+ else:
+ return "MULTI_PDF_SINGLE_PAGE"
+ return "TODO"
+
+
# I want to create a dataclass here to hold the resource information to pass to the view
@dataclass
class ResourceInfo:
@@ -143,7 +157,7 @@ def get_presigned_obj_url(bucket_name, obj_name, expiration=3600) -> str | None:
return response
-def upload_to_s3(pdf_files, thumbnail_files, snappedshotted_pages) -> bool:
+def upload_to_s3(pdf_files, thumbnail_files, snapshotted_pages) -> bool:
session = boto3.Session()
client = session.client(
"s3",
@@ -168,24 +182,27 @@ def upload_to_s3(pdf_files, thumbnail_files, snappedshotted_pages) -> bool:
settings.AWS_STORAGE_BUCKET_NAME,
f"thumbnails/{f.name}",
)
- if len(snappedshotted_pages[0]) == 1:
- for img in snappedshotted_pages[0]:
+ if _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_SINGLE_PAGE" \
+ or _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_MULTI_PAGE":
+ for img in snapshotted_pages[0]:
logger.info("Uploading {img} to S3")
client.upload_file(
img,
settings.AWS_STORAGE_BUCKET_NAME,
f"snapshotted_pages/{Path(img).name}",
)
- else:
- for lst in snappedshotted_pages:
- for img in lst:
+ return True
+ if _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_SINGLE_PAGE" \
+ or _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_MULTI_PAGE":
+ for pdf in snapshotted_pages:
+ for img in pdf:
logger.info("Uploading {img} to S3")
client.upload_file(
img,
settings.AWS_STORAGE_BUCKET_NAME,
f"snapshotted_pages/{Path(img).name}",
)
- return True
+ return True
except ClientError:
logging.exception("Error uploading files to S3")
return False