diff options
author | Matthew Lemon <y@yulqen.org> | 2024-08-03 20:30:31 +0100 |
---|---|---|
committer | Matthew Lemon <y@yulqen.org> | 2024-08-03 20:30:31 +0100 |
commit | a4de4737f0e97aa005281f9ac79482149a1d5bb7 (patch) | |
tree | 74f186cee81c811f765646eaba03c3b7f9a2e07d | |
parent | a53e1c79714d05807d42a50010b44d13721934c3 (diff) |
Add S3 utility module and refactor S3 functions into it
Created a new `s3.py` utility module for handling S3 interactions including file uploads and generating presigned URLs. Refactored views to utilize these new utility functions and moved the PDF collection type function to a new `utils.py` module to improve code organization and readability.
-rw-r--r-- | pyblackbird_cc/resources/s3.py | 103 | ||||
-rw-r--r-- | pyblackbird_cc/resources/tests/test_file_processing.py | 2 | ||||
-rw-r--r-- | pyblackbird_cc/resources/utils.py | 10 | ||||
-rw-r--r-- | pyblackbird_cc/resources/views.py | 114 |
4 files changed, 116 insertions, 113 deletions
diff --git a/pyblackbird_cc/resources/s3.py b/pyblackbird_cc/resources/s3.py new file mode 100644 index 0000000..d00f19b --- /dev/null +++ b/pyblackbird_cc/resources/s3.py @@ -0,0 +1,103 @@ +import logging +from pathlib import Path +from typing import Sequence + +import boto3 +from boto3 import Session +from botocore.exceptions import ClientError +from django.conf import settings + +from pyblackbird_cc.resources.utils import _get_pdf_collection_type + +logger = logging.getLogger(__name__) + + +def get_presigned_obj_url(bucket_name, obj_name, expiration=3600) -> str | None: + client = boto3.client( + "s3", + endpoint_url=settings.AWS_S3_ENDPOINT_URL, + aws_access_key_id=settings.AWS_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, + region_name=settings.AWS_S3_REGION_NAME, + ) + logger.info("Client created", extra={"client": client}) + try: + response = client.generate_presigned_url( + "get_object", + Params={"Bucket": bucket_name, "Key": obj_name}, + ExpiresIn=expiration, + ) + except ClientError as e: + logger.exception("Error generating presigned URL", extra={"error": e}) + return None + return response + + +def get_s3_client() -> Session.client: + return boto3.Session().client( + "s3", + endpoint_url=settings.AWS_S3_ENDPOINT_URL, + aws_access_key_id=settings.AWS_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, + region_name=settings.AWS_S3_REGION_NAME + ) + + +def upload_files_to_s3(files: Sequence, dir_name: str) -> None: + """ + Generic upload function. Pass "thumbnails" or "pdfuploads" as dir_name to + dictate the type of file to upload. + :param files: + :param dir_name: + :return: + """ + s3_client = get_s3_client() + for file in files: + logging.info(f"Uploading {file.name} to S3") + s3_client.upload_fileobj( + file, + settings.AWS_STORAGE_BUCKET_NAME, + f"{dir_name}/{file.name}" + ) + + +def upload_snapshotted_pages_to_s3(snapshotted_pages): + s3_client = get_s3_client() + collection_type = _get_pdf_collection_type(snapshotted_pages) + if collection_type in ["SINGLE_PDF_SINGLE_PAGE", "SINGLE_PDF_MULTI_PAGE"]: + for img in snapshotted_pages[0]: + logging.info(f"Uploading {img} to S3") + s3_client.upload_file( + img, + settings.AWS_STORAGE_BUCKET_NAME, + f"snapshotted_pages/{Path(img).name}" + ) + return True + if collection_type in ["MULTI_PDF_SINGLE_PAGE", "MULTI_PDF_MULTI_PAGE"]: + for pdf in snapshotted_pages: + for img in pdf: + logging.info(f"Uploading {img} to S3") + s3_client.upload_file( + img, + settings.AWS_STORAGE_BUCKET_NAME, + f"snapshotted_pages/{Path(img).name}" + ) + return True + return False + + +def upload_to_s3(pdf_files, thumbnail_files, snapshotted_pages) -> bool: + """ + + :param pdf_files: a list of PDF files + :param thumbnail_files: a list of thumbnail files + :param snapshotted_pages: a list of snapshotted pages + :return: True if the files was uploaded, False otherwise + """ + try: + upload_files_to_s3(pdf_files, dir_name='pdfuploads') + upload_files_to_s3(thumbnail_files, dir_name='thumbnails') + return upload_snapshotted_pages_to_s3(snapshotted_pages) + except ClientError: + logging.exception("Error uploading files to S3") + return False diff --git a/pyblackbird_cc/resources/tests/test_file_processing.py b/pyblackbird_cc/resources/tests/test_file_processing.py index e3514c9..0a0f1a3 100644 --- a/pyblackbird_cc/resources/tests/test_file_processing.py +++ b/pyblackbird_cc/resources/tests/test_file_processing.py @@ -5,7 +5,7 @@ from django.core.files.uploadedfile import TemporaryUploadedFile from django.test import TestCase from django.urls import reverse -from pyblackbird_cc.resources.views import _get_pdf_collection_type +from ..utils import _get_pdf_collection_type from .. import services diff --git a/pyblackbird_cc/resources/utils.py b/pyblackbird_cc/resources/utils.py new file mode 100644 index 0000000..d17aa69 --- /dev/null +++ b/pyblackbird_cc/resources/utils.py @@ -0,0 +1,10 @@ +def _get_pdf_collection_type(coll) -> str: + if len(coll) == 1 and len(coll[0]) == 1: + return "SINGLE_PDF_SINGLE_PAGE" + if len(coll) == 1 and len(coll[0]) > 1: + return "SINGLE_PDF_MULTI_PAGE" + if len(coll) > 1: + for c in coll: + if len(c) > 1: + return "MULTI_PDF_MULTI_PAGE" + return "MULTI_PDF_SINGLE_PAGE" diff --git a/pyblackbird_cc/resources/views.py b/pyblackbird_cc/resources/views.py index 4ce2472..09bdb9c 100644 --- a/pyblackbird_cc/resources/views.py +++ b/pyblackbird_cc/resources/views.py @@ -4,9 +4,6 @@ import tempfile from collections.abc import Generator from dataclasses import dataclass -import boto3 -from botocore.exceptions import ClientError -from botocore.utils import Path from django.conf import settings from django.contrib import messages from django.contrib.auth.decorators import login_required @@ -22,22 +19,11 @@ from .forms import ResourceUpdateMetadataForm from .models import PDFPageSnapshot, ResourceSubcategory from .models import PDFResource from .models import Resource +from .s3 import get_presigned_obj_url, upload_files_to_s3, upload_to_s3 logger = logging.getLogger(__name__) -def _get_pdf_collection_type(coll) -> str: - if len(coll) == 1 and len(coll[0]) == 1: - return "SINGLE_PDF_SINGLE_PAGE" - if len(coll) == 1 and len(coll[0]) > 1: - return "SINGLE_PDF_MULTI_PAGE" - if len(coll) > 1: - for c in coll: - if len(c) > 1: - return "MULTI_PDF_MULTI_PAGE" - return "MULTI_PDF_SINGLE_PAGE" - - # I want to create a dataclass here to hold the resource information to pass to the view @dataclass class ResourceInfo: @@ -138,101 +124,6 @@ def index(request): return render(request, "resources/resource_list.html", context) -def get_presigned_obj_url(bucket_name, obj_name, expiration=3600) -> str | None: - client = boto3.client( - "s3", - endpoint_url=settings.AWS_S3_ENDPOINT_URL, - aws_access_key_id=settings.AWS_ACCESS_KEY_ID, - aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, - region_name=settings.AWS_S3_REGION_NAME, - ) - logger.info("Client created", extra={"client": client}) - try: - response = client.generate_presigned_url( - "get_object", - Params={"Bucket": bucket_name, "Key": obj_name}, - ExpiresIn=expiration, - ) - except ClientError as e: - logger.exception("Error generating presigned URL", extra={"error": e}) - return None - return response - - -def upload_to_s3(pdf_files, thumbnail_files, snapshotted_pages) -> bool: - session = boto3.Session() - client = session.client( - "s3", - endpoint_url=settings.AWS_S3_ENDPOINT_URL, - aws_access_key_id=settings.AWS_ACCESS_KEY_ID, - aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, - region_name=settings.AWS_S3_REGION_NAME, - ) - - try: - for pdf_file in pdf_files: - logger.info("Uploading {pdf_file.name} to S3") - client.upload_fileobj( - pdf_file, - settings.AWS_STORAGE_BUCKET_NAME, - f"pdfuploads/{pdf_file.name}", - ) - for f in thumbnail_files: - logger.info("Uploading {f.name} to S3") - client.upload_fileobj( - f, - settings.AWS_STORAGE_BUCKET_NAME, - f"thumbnails/{f.name}", - ) - if _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_SINGLE_PAGE" \ - or _get_pdf_collection_type(snapshotted_pages) == "SINGLE_PDF_MULTI_PAGE": - for img in snapshotted_pages[0]: - logger.info("Uploading {img} to S3") - client.upload_file( - img, - settings.AWS_STORAGE_BUCKET_NAME, - f"snapshotted_pages/{Path(img).name}", - ) - return True - if _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_SINGLE_PAGE" \ - or _get_pdf_collection_type(snapshotted_pages) == "MULTI_PDF_MULTI_PAGE": - for pdf in snapshotted_pages: - for img in pdf: - logger.info("Uploading {img} to S3") - client.upload_file( - img, - settings.AWS_STORAGE_BUCKET_NAME, - f"snapshotted_pages/{Path(img).name}", - ) - return True - except ClientError: - logging.exception("Error uploading files to S3") - return False - - -def upload_thumbnails_to_s3(thumbnail_files) -> bool: - session = boto3.Session() - client = session.client( - "s3", - endpoint_url=settings.AWS_S3_ENDPOINT_URL, - aws_access_key_id=settings.AWS_ACCESS_KEY_ID, - aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, - region_name=settings.AWS_S3_REGION_NAME, - ) - try: - for f in thumbnail_files: - logger.info(f"Uploading {f.name} to S3") - client.upload_fileobj( - f, - settings.AWS_STORAGE_BUCKET_NAME, - f"thumbnails/{f.name}", - ) - return True - except Exception as e: # Any exceptions generated by boto3 client will be caught here - logger.error(f"Error uploading thumbnail files to S3: {e}") - return False - - def _write_pdf_to_tempdir(f) -> str: temp_dir = tempfile.mkdtemp() file_path = os.path.join(temp_dir, f.name) @@ -402,8 +293,7 @@ def update_resource_thumbnails(request, pk): if form.is_valid(): thumbnail_files = form.cleaned_data["thumbnail_files"] resource.thumbnail_filenames = [f.name for f in thumbnail_files] - if not upload_thumbnails_to_s3(thumbnail_files): - raise Exception("Error uploading files to S3") + upload_files_to_s3(thumbnail_files, "thumbnails") resource.save() return redirect("resources:resource_detail", resource_id=resource.id) |