aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Lemon <y@yulqen.org>2024-08-03 20:30:31 +0100
committerMatthew Lemon <y@yulqen.org>2024-08-03 20:30:31 +0100
commita4de4737f0e97aa005281f9ac79482149a1d5bb7 (patch)
tree74f186cee81c811f765646eaba03c3b7f9a2e07d
parenta53e1c79714d05807d42a50010b44d13721934c3 (diff)
Add S3 utility module and refactor S3 functions into it
Created a new `s3.py` utility module for handling S3 interactions including file uploads and generating presigned URLs. Refactored views to utilize these new utility functions and moved the PDF collection type function to a new `utils.py` module to improve code organization and readability.
-rw-r--r--pyblackbird_cc/resources/s3.py103
-rw-r--r--pyblackbird_cc/resources/tests/test_file_processing.py2
-rw-r--r--pyblackbird_cc/resources/utils.py10
-rw-r--r--pyblackbird_cc/resources/views.py114
4 files changed, 116 insertions, 113 deletions
diff --git a/pyblackbird_cc/resources/s3.py b/pyblackbird_cc/resources/s3.py
new file mode 100644
index 0000000..d00f19b
--- /dev/null
+++ b/pyblackbird_cc/resources/s3.py
@@ -0,0 +1,103 @@
+import logging
+from pathlib import Path
+from typing import Sequence
+
+import boto3
+from boto3 import Session
+from botocore.exceptions import ClientError
+from django.conf import settings
+
+from pyblackbird_cc.resources.utils import _get_pdf_collection_type
+
+logger = logging.getLogger(__name__)
+
+
+def get_presigned_obj_url(bucket_name, obj_name, expiration=3600) -> str | None:
+ client = boto3.client(
+ "s3",
+ endpoint_url=settings.AWS_S3_ENDPOINT_URL,
+ aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
+ aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
+ region_name=settings.AWS_S3_REGION_NAME,
+ )
+ logger.info("Client created", extra={"client": client})
+ try:
+ response = client.generate_presigned_url(
+ "get_object",
+ Params={"Bucket": bucket_name, "Key": obj_name},
+ ExpiresIn=expiration,
+ )
+ except ClientError as e:
+ logger.exception("Error generating presigned URL", extra={"error": e})
+ return None
+ return response
+
+
+def get_s3_client() -> Session.client:
+ return boto3.Session().client(
+ "s3",
+ endpoint_url=settings.AWS_S3_ENDPOINT_URL,
+ aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
+ aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
+ region_name=settings.AWS_S3_REGION_NAME
+ )
+
+
+def upload_files_to_s3(files: Sequence, dir_name: str) -> None:
+ """
+ Generic upload function. Pass "thumbnails" or "pdfuploads" as dir_name to
+ dictate the type of file to upload.
+ :param files:
+ :param dir_name:
+ :return:
+ """
+ s3_client = get_s3_client()
+ for file in files:
+ logging.info(f"Uploading {file.name} to S3")
+ s3_client.upload_fileobj(
+ file,
+ settings.AWS_STORAGE_BUCKET_NAME,
+ f"{dir_name}/{file.name}"
+ )
+
+
+def upload_snapshotted_pages_to_s3(snapshotted_pages):
+ s3_client = get_s3_client()
+ collection_type = _get_pdf_collection_type(snapshotted_pages)
+ if collection_type in ["SINGLE_PDF_SINGLE_PAGE", "SINGLE_PDF_MULTI_PAGE"]:
+ for img in snapshotted_pages[0]:
+ logging.info(f"Uploading {img} to S3")
+ s3_client.upload_file(
+ img,
+ settings.AWS_STORAGE_BUCKET_NAME,
+ f"snapshotted_pages/{Path(img).name}"
+ )
+ return True
+ if collection_type in ["MULTI_PDF_SINGLE_PAGE", "MULTI_PDF_MULTI_PAGE"]:
+ for pdf in snapshotted_pages:
+ for img in pdf:
+ logging.info(f"Uploading {img} to S3")
+ s3_client.upload_file(
+ img,
+ settings.AWS_STORAGE_BUCKET_NAME,
+ f"snapshotted_pages/{Path(img).name}"
+ )
+ return True
+ return False
+
+
+def upload_to_s3(pdf_files, thumbnail_files, snapshotted_pages) -> bool:
+ """
+
+ :param pdf_files: a list of PDF files
+ :param thumbnail_files: a list of thumbnail files
+ :param snapshotted_pages: a list of snapshotted pages
+ :return: True if the files was uploaded, False otherwise
+ """
+ try:
+ upload_files_to_s3(pdf_files, dir_name='pdfuploads')
+ upload_files_to_s3(thumbnail_files, dir_name='thumbnails')
+ return upload_snapshotted_pages_to_s3(snapshotted_pages)
+ except ClientError:
+ logging.exception("Error uploading files to S3")
+ return False
diff --git a/pyblackbird_cc/resources/tests/test_file_processing.py b/pyblackbird_cc/resources/tests/test_file_processing.py
index e3514c9..0a0f1a3 100644
--- a/pyblackbird_cc/resources/tests/test_file_processing.py
+++ b/pyblackbird_cc/resources/tests/test_file_processing.py
@@ -5,7 +5,7 @@ from django.core.files.uploadedfile import TemporaryUploadedFile
from django.test import TestCase
from django.urls import reverse
-from pyblackbird_cc.resources.views import _get_pdf_collection_type
+from ..utils import _get_pdf_collection_type
from .. import services
diff --git a/pyblackbird_cc/resources/utils.py b/pyblackbird_cc/resources/utils.py
new file mode 100644
index 0000000..d17aa69
--- /dev/null
+++ b/pyblackbird_cc/resources/utils.py
@@ -0,0 +1,10 @@
+def _get_pdf_collection_type(coll) -> str:
+ if len(coll) == 1 and len(coll[0]) == 1:
+ return "SINGLE_PDF_SINGLE_PAGE"
+ if len(coll) == 1 and len(coll[0]) > 1:
+ return "SINGLE_PDF_MULTI_PAGE"
+ if len(coll) > 1:
+ for c in coll:
+ if len(c) > 1:
+ return "MULTI_PDF_MULTI_PAGE"
+ return "MULTI_PDF_SINGLE_PAGE"
diff --git a/pyblackbird_cc/resources/views.py b/pyblackbird_cc/resources/views.py
index 4ce2472..09bdb9c 100644
--- a/pyblackbird_cc/resources/views.py
+++ b/pyblackbird_cc/resources/views.py
@@ -4,9 +4,6 @@ import tempfile
from collections.abc import Generator
from dataclasses import dataclass
-import boto3
-from botocore.exceptions import ClientError
-from botocore.utils import Path
from django.conf import settings
from django.contrib import messages
from django.contrib.auth.decorators import login_required
@@ -22,22 +19,11 @@ from .forms import ResourceUpdateMetadataForm
from .models import PDFPageSnapshot, ResourceSubcategory
from .models import PDFResource
from .models import Resource
+from .s3 import get_presigned_obj_url, upload_files_to_s3, upload_to_s3
logger = logging.getLogger(__name__)
-def _get_pdf_collection_type(coll) -> str:
- if len(coll) == 1 and len(coll[0]) == 1:
- return "SINGLE_PDF_SINGLE_PAGE"
- if len(coll) == 1 and len(coll[0]) > 1:
- return "SINGLE_PDF_MULTI_PAGE"
- if len(coll) > 1:
- for c in coll:
- if len(c) > 1:
- return "MULTI_PDF_MULTI_PAGE"
- return "MULTI_PDF_SINGLE_PAGE"
-
-
# I want to create a dataclass here to hold the resource information to pass to the view
@dataclass
class ResourceInfo:
@@ -138,101 +124,6 @@ def index(request):
return render(request, "resources/resource_list.html", context)
-def get_presigned_obj_url(bucket_name, obj_name, expiration=3600) -> str | None:
- client = boto3.client(
- "s3",
- endpoint_url=settings.AWS_S3_ENDPOINT_URL,
- aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
- aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
- region_name=settings.AWS_S3_REGION_NAME,
- )
- logger.info("Client created", extra={"client": client})
- try:
- response = client.generate_presigned_url(
- "get_object",
- Params={"Bucket": bucket_name, "Key": obj_name},
- ExpiresIn=expiration,
- )
- except ClientError as e:
- logger.exception("Error generating presigned URL", extra={"error": e})
- return None
- return response
-
-
-def upload_to_s3(pdf_files, thumbnail_files, snapshotted_pages) -> bool:
- session = boto3.Session()
- client = session.client(
- "s3",
- endpoint_url=settings.AWS_S3_ENDPOINT_URL,
- aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
- aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
- region_name=settings.AWS_S3_REGION_NAME,
- )
-
- try:
- for pdf_file in pdf_files:
- logger.info("Uploading {pdf_file.name} to S3")
- client.upload_fileobj(
- pdf_file,
- settings.AWS_STORAGE_BUCKET_NAME,
- f"pdfuploads/{pdf_file.name}",
- )
- for f in thumbnail_files:
- logger.info("Uploading {f.name} to S3")
- client.upload_fileobj(
- f,
- settings.AWS_STORAGE_BUCKET_NAME,
- f"thumbnails/{f.name}",
- )
- if _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_SINGLE_PAGE" \
- or _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_MULTI_PAGE":
- for img in snapshotted_pages[0]:
- logger.info("Uploading {img} to S3")
- client.upload_file(
- img,
- settings.AWS_STORAGE_BUCKET_NAME,
- f"snapshotted_pages/{Path(img).name}",
- )
- return True
- if _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_SINGLE_PAGE" \
- or _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_MULTI_PAGE":
- for pdf in snapshotted_pages:
- for img in pdf:
- logger.info("Uploading {img} to S3")
- client.upload_file(
- img,
- settings.AWS_STORAGE_BUCKET_NAME,
- f"snapshotted_pages/{Path(img).name}",
- )
- return True
- except ClientError:
- logging.exception("Error uploading files to S3")
- return False
-
-
-def upload_thumbnails_to_s3(thumbnail_files) -> bool:
- session = boto3.Session()
- client = session.client(
- "s3",
- endpoint_url=settings.AWS_S3_ENDPOINT_URL,
- aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
- aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
- region_name=settings.AWS_S3_REGION_NAME,
- )
- try:
- for f in thumbnail_files:
- logger.info(f"Uploading {f.name} to S3")
- client.upload_fileobj(
- f,
- settings.AWS_STORAGE_BUCKET_NAME,
- f"thumbnails/{f.name}",
- )
- return True
- except Exception as e: # Any exceptions generated by boto3 client will be caught here
- logger.error(f"Error uploading thumbnail files to S3: {e}")
- return False
-
-
def _write_pdf_to_tempdir(f) -> str:
temp_dir = tempfile.mkdtemp()
file_path = os.path.join(temp_dir, f.name)
@@ -402,8 +293,7 @@ def update_resource_thumbnails(request, pk):
if form.is_valid():
thumbnail_files = form.cleaned_data["thumbnail_files"]
resource.thumbnail_filenames = [f.name for f in thumbnail_files]
- if not upload_thumbnails_to_s3(thumbnail_files):
- raise Exception("Error uploading files to S3")
+ upload_files_to_s3(thumbnail_files, "thumbnails")
resource.save()
return redirect("resources:resource_detail", resource_id=resource.id)