aboutsummaryrefslogtreecommitdiffstats
path: root/pyblackbird_cc/resources/tests/test_file_processing.py
blob: cbb49722c0a308b9fbb7fd0bc2e7726d2a7b12ff (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from django.contrib.auth import get_user_model
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.files.uploadedfile import TemporaryUploadedFile
from django.test import TestCase
from django.urls import reverse

from pyblackbird_cc.resources.views import _get_pdf_collection_type

from .. import services

""" Explanation:
The type of uploaded file object (InMemoryUploadedFile, TemporaryUploadedFile, or SimpleUploadedFile) is determined by the size of
the uploaded file and the value of the FILE_UPLOAD_MAX_MEMORY_SIZE setting in Django.

By default, FILE_UPLOAD_MAX_MEMORY_SIZE is set to 2.5 MB (2621440 bytes). This means that if the size of the uploaded file
is less than or equal to 2.5 MB, Django will use the MemoryFileUploadHandler to handle the file upload, and the resulting uploaded
file object will be an instance of InMemoryUploadedFile.

If the size of the uploaded file exceeds the FILE_UPLOAD_MAX_MEMORY_SIZE threshold, Django will use the TemporaryFileUploadHandler to
handle the file upload, and the resulting uploaded file object will be an instance of TemporaryUploadedFile. In this case, the uploaded
file is temporarily stored on disk instead of being kept in memory.

The SimpleUploadedFile is used when you manually create an uploaded file object in your code, such as in tests or when handling file
uploads programmatically.

To test the behavior with different file sizes, you can try the following:

Create a test file that is smaller than 2.5 MB and run the test. The uploaded file should be an instance of InMemoryUploadedFile.
Create a test file that is larger than 2.5 MB and run the test. The uploaded file should be an instance of TemporaryUploadedFile.
If you want to change the threshold size for using InMemoryUploadedFile, you can modify the FILE_UPLOAD_MAX_MEMORY_SIZE setting in your Django settings file. For example:
FILE_UPLOAD_MAX_MEMORY_SIZE = 5 * 1024 * 1024  # 5 MB

With this setting, files up to 5 MB will be handled as InMemoryUploadedFile, and files larger than 5 MB will be handled as TemporaryUploadedFile.
Remember to test with files of different sizes to ensure that your application handles file uploads correctly based on your specific requirements and settings.

We also test the integrity of the uploaded PDF file here by checking the number of pages in the file and whether it is a valid PDF file.
"""


def test_detect_snapshotted_pdf_collection():
    single_pdf_single_page = [["toss"]]
    single_pdf_multi_page = [["toss2", "toss8"]]
    multi_pdf_single_page = [["toss"], ["toss2"]]
    multi_pdf_multi_page = [["toss", "toss2"], ["toss", "toss2"]]

    assert _get_pdf_collection_type(single_pdf_single_page) == "SINGLE_PDF_SINGLE_PAGE"
    assert _get_pdf_collection_type(single_pdf_multi_page) == "SINGLE_PDF_MULTI_PAGE"
    assert _get_pdf_collection_type(multi_pdf_single_page) == "MULTI_PDF_SINGLE_PAGE"
    assert _get_pdf_collection_type(multi_pdf_multi_page) == "MULTI_PDF_MULTI_PAGE"


class PDFFileUploadTestCase(TestCase):
    def setUp(self):
        self.url = reverse("resources:create_resource")
        self.test_file_path = "pyblackbird_cc/resources/tests/testdata/test_small_file.pdf"

        # Create a test user
        self.email = "testuser@example.com"
        self.password = "testpassword"
        self.user = get_user_model().objects.create_user(
            email=self.email,
            password=self.password,
        )

    def test_file_upload(self):
        """
        Test that a file can be uploaded successfully using our create_resource view.
        """
        self.client.login(
            email=self.email,
            password=self.password,
        )  # Log in the test user

        with open(self.test_file_path, "rb") as file:
            uploaded_file = SimpleUploadedFile(
                "test_file.pdf",
                file.read(),
                content_type="application/pdf",
            )

        response = self.client.post(self.url, {"pdf_files": [uploaded_file]})

        # Check if the response is OK
        self.assertEqual(response.status_code, 200)

    def test_file_upload_with_upload_handlers(self):
        """
        This test does not test my code but the behavior of the Django file upload handlers.
        """
        self.client.login(
            email=self.email,
            password=self.password,
        )  # Log in the test user

        with open(self.test_file_path, "rb") as file:
            uploaded_file = SimpleUploadedFile(
                "test_file.pdf",
                file.read(),
                content_type="application/pdf",
            )

        response = self.client.post(self.url, {"pdf_files": [uploaded_file]})

        self.assertEqual(response.status_code, 200)

        # Check if the uploaded file was handled by MemoryFileUploadHandler or TemporaryFileUploadHandler
        uploaded_files = response.wsgi_request.FILES.getlist("pdf_files")
        self.assertEqual(len(uploaded_files), 1)

        # We should expect an instance of InMemoryUploadedFile here because test_small_file.pdf is less than 2.5 MB
        self.assertIsInstance(
            uploaded_files[0],
            (SimpleUploadedFile, TemporaryUploadedFile, InMemoryUploadedFile),
        )

    def test_uploaded_pdf_file_metadata(self):
        """
        This test does not test my application code, but rather tests the
        behavior of the Django file upload handlers.
        """
        self.client.login(
            email=self.email,
            password=self.password,
        )  # Log in the test user

        with open(self.test_file_path, "rb") as file:
            uploaded_file = SimpleUploadedFile(
                "test_file.pdf",
                file.read(),
                content_type="application/pdf",
            )

        response = self.client.post(self.url, {"pdf_files": [uploaded_file]})

        self.assertEqual(
            response.status_code,
            200,
        )

        # Extract metadata from the uploaded file
        pdf_metadata_from_path = services.get_pdf_metadata_from_path(self.test_file_path)

        # Get the number of pages in the PDF - is 4
        self.assertEqual(pdf_metadata_from_path.n_pages, 4)

        # Get the file size in bytes
        self.assertGreater(pdf_metadata_from_path.file_size, 0)

        self.assertLess(
            pdf_metadata_from_path.file_size,
            5 * 1024 * 1024,
        )  # Assuming a maximum file size of 5 MB

        # self.assertTrue(services.export_pdf_pages_as_images_temp_dir(self.test_file_path))
        # capture the output of the export_pdf_pages_as_images_temp_dir function coroutine
        files = list(services.export_pages_as_images(self.test_file_path))
        self.assertEqual(len(files), 4)