aboutsummaryrefslogtreecommitdiffstats
path: root/pyblackbird_cc/resources/tests/test_file_processing.py
blob: 7b95a9db3fd7bb07cb359dbee98d7eed2cb347f0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from pathlib import Path

from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.files.uploadedfile import TemporaryUploadedFile
from django.test import TestCase
from django.urls import reverse

from .. import services
from ..utils import _get_pdf_collection_type


def test_detect_snapshotted_pdf_collection():
    single_pdf_single_page = [["toss"]]
    single_pdf_multi_page = [["toss2", "toss8"]]
    multi_pdf_single_page = [["toss"], ["toss2"]]
    multi_pdf_multi_page = [["toss", "toss2"], ["toss", "toss2"]]

    assert _get_pdf_collection_type(single_pdf_single_page) == "SINGLE_PDF_SINGLE_PAGE"
    assert _get_pdf_collection_type(single_pdf_multi_page) == "SINGLE_PDF_MULTI_PAGE"
    assert _get_pdf_collection_type(multi_pdf_single_page) == "MULTI_PDF_SINGLE_PAGE"
    assert _get_pdf_collection_type(multi_pdf_multi_page) == "MULTI_PDF_MULTI_PAGE"


class PDFFileUploadTestCase(TestCase):
    def setUp(self):
        self.url = reverse("resources:create_resource")
        self.test_file_path = Path(
            settings.BASE_DIR / "pyblackbird_cc" / "resources/tests/testdata/test_small_file.pdf"
        )

        # Create a test user
        self.email = "testuser@example.com"
        self.password = "testpassword"
        self.user = get_user_model().objects.create_user(
            email=self.email,
            password=self.password,
        )

    def test_file_upload(self):
        """
        Test that a file can be uploaded successfully using our create_resource view.
        """
        self.client.login(
            email=self.email,
            password=self.password,
        )  # Log in the test user

        with open(self.test_file_path, "rb") as file:
            uploaded_file = SimpleUploadedFile(
                "test_file.pdf",
                file.read(),
                content_type="application/pdf",
            )

        response = self.client.post(self.url, {"pdf_files": [uploaded_file]})

        # Check if the response is OK
        self.assertEqual(response.status_code, 200)

    def test_file_upload_with_upload_handlers(self):
        """
        This test does not test my code but the behavior of the Django file upload handlers.
        """
        self.client.login(
            email=self.email,
            password=self.password,
        )  # Log in the test user

        with open(self.test_file_path, "rb") as file:
            uploaded_file = SimpleUploadedFile(
                "test_file.pdf",
                file.read(),
                content_type="application/pdf",
            )

        response = self.client.post(self.url, {"pdf_files": [uploaded_file]})

        self.assertEqual(response.status_code, 200)

        # Check if the uploaded file was handled by MemoryFileUploadHandler or TemporaryFileUploadHandler
        uploaded_files = response.wsgi_request.FILES.getlist("pdf_files")
        self.assertEqual(len(uploaded_files), 1)

        # We should expect an instance of InMemoryUploadedFile here because test_small_file.pdf is less than 2.5 MB
        self.assertIsInstance(
            uploaded_files[0],
            (SimpleUploadedFile, TemporaryUploadedFile, InMemoryUploadedFile),
        )

    def test_uploaded_pdf_file_metadata(self):
        """
        This test does not test my application code, but rather tests the
        behavior of the Django file upload handlers.
        """
        self.client.login(
            email=self.email,
            password=self.password,
        )  # Log in the test user

        with open(self.test_file_path, "rb") as file:
            uploaded_file = SimpleUploadedFile(
                "test_file.pdf",
                file.read(),
                content_type="application/pdf",
            )

        response = self.client.post(self.url, {"pdf_files": [uploaded_file]})

        self.assertEqual(
            response.status_code,
            200,
        )

        # Extract metadata from the uploaded file
        pdf_metadata_from_path = services.get_pdf_metadata_from_path(self.test_file_path)

        # Get the number of pages in the PDF - is 4
        self.assertEqual(pdf_metadata_from_path.n_pages, 4)

        # Get the file size in bytes
        self.assertGreater(pdf_metadata_from_path.file_size, 0)

        self.assertLess(
            pdf_metadata_from_path.file_size,
            5 * 1024 * 1024,
        )  # Assuming a maximum file size of 5 MB

        # self.assertTrue(services.export_pdf_pages_as_images_temp_dir(self.test_file_path))
        # capture the output of the export_pdf_pages_as_images_temp_dir function coroutine
        files = list(services.export_pages_as_images(self.test_file_path))
        self.assertEqual(len(files), 4)