Renamed from pyblackbird_cc to alphabetlearning - everywhere

author: Matthew Lemon <y@yulqen.org> 2024-10-15 21:01:31 +0100
committer: Matthew Lemon <y@yulqen.org> 2024-10-15 21:01:31 +0100
commit: eeaddb27560d723ca7d61359744ceb2709fccd2d (patch)
tree: 04ddbc49ae7b73d5f5a9e1716d7227aecd3b9f85 /alphabetlearning/resources/services.py
parent: 7a3044c859043837e6c7c95bb4894d04e9b2cbc2 (diff)
1 files changed, 55 insertions, 0 deletions
diff --git a/alphabetlearning/resources/services.py b/alphabetlearning/resources/services.py
new file mode 100644
index 0000000..03c53af
--- /dev/null
+++ b/alphabetlearning/resources/services.py
@@ -0,0 +1,55 @@
+import dataclasses
+import os.path
+import tempfile
+
+from pdf2image import convert_from_path
+from PyPDF2 import PdfReader
+
+
+@dataclasses.dataclass
+class PDFMetadata:
+    file_name: str
+    file_size: int
+    n_pages: int
+
+
+def get_pdf_metadata_from_path(file_path: str) -> PDFMetadata:
+    """
+    This function returns the metadata of a PDF file
+    :param file_path:
+    :return: PDFMetadata
+    """
+    if not os.path.isfile(file_path):
+        raise ValueError("file_path must be a file. {file_path} is not a file.")
+    reader = PdfReader(file_path)
+    n_pages = len(reader.pages)
+    file_size = os.path.getsize(file_path)
+    # pdf.close()
+    return PDFMetadata(file_name=file_path, file_size=file_size, n_pages=n_pages)
+
+
+def export_pages_as_images(file_path: str) -> list[str]:
+    """
+    This function exports the pages of a PDF file as JPEG images.
+    :param file_path:
+    :return: List of paths to the JPEG images
+    """
+    output_dir = tempfile.mkdtemp()  # Create a temporary directory
+    reader = PdfReader(file_path)
+    n_pages = len(reader.pages)
+    try:
+        with tempfile.TemporaryDirectory() as path:
+            images_from_path = convert_from_path(file_path, 56, size=300, output_folder=path)
+            # get the file_name of this PDF file at file_path
+            file_name = os.path.basename(file_path)
+            image_paths = []
+            for i in range(n_pages):
+                image = images_from_path[i]
+                image_path = os.path.join(output_dir, f"{file_name}_{i:03d}.jpg")
+                image.save(image_path)
+                image_paths.append(image_path)
+            return image_paths
+    finally:
+        # Optionally handle cleanup later or elsewhere in your code
+        # Remove later with shutil.rmtree(output_dir)
+        pass
author	Matthew Lemon <y@yulqen.org>	2024-10-15 21:01:31 +0100
committer	Matthew Lemon <y@yulqen.org>	2024-10-15 21:01:31 +0100
commit	eeaddb27560d723ca7d61359744ceb2709fccd2d (patch)
tree	04ddbc49ae7b73d5f5a9e1716d7227aecd3b9f85 /alphabetlearning/resources/services.py
parent	7a3044c859043837e6c7c95bb4894d04e9b2cbc2 (diff)