aboutsummaryrefslogtreecommitdiffstats
path: root/pyblackbird_cc/resources/services.py
diff options
context:
space:
mode:
authorMatthew Lemon <y@yulqen.org>2024-10-15 21:01:31 +0100
committerMatthew Lemon <y@yulqen.org>2024-10-15 21:01:31 +0100
commiteeaddb27560d723ca7d61359744ceb2709fccd2d (patch)
tree04ddbc49ae7b73d5f5a9e1716d7227aecd3b9f85 /pyblackbird_cc/resources/services.py
parent7a3044c859043837e6c7c95bb4894d04e9b2cbc2 (diff)
Renamed from pyblackbird_cc to alphabetlearning - everywhere
Diffstat (limited to 'pyblackbird_cc/resources/services.py')
-rw-r--r--pyblackbird_cc/resources/services.py55
1 files changed, 0 insertions, 55 deletions
diff --git a/pyblackbird_cc/resources/services.py b/pyblackbird_cc/resources/services.py
deleted file mode 100644
index 03c53af..0000000
--- a/pyblackbird_cc/resources/services.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import dataclasses
-import os.path
-import tempfile
-
-from pdf2image import convert_from_path
-from PyPDF2 import PdfReader
-
-
-@dataclasses.dataclass
-class PDFMetadata:
- file_name: str
- file_size: int
- n_pages: int
-
-
-def get_pdf_metadata_from_path(file_path: str) -> PDFMetadata:
- """
- This function returns the metadata of a PDF file
- :param file_path:
- :return: PDFMetadata
- """
- if not os.path.isfile(file_path):
- raise ValueError("file_path must be a file. {file_path} is not a file.")
- reader = PdfReader(file_path)
- n_pages = len(reader.pages)
- file_size = os.path.getsize(file_path)
- # pdf.close()
- return PDFMetadata(file_name=file_path, file_size=file_size, n_pages=n_pages)
-
-
-def export_pages_as_images(file_path: str) -> list[str]:
- """
- This function exports the pages of a PDF file as JPEG images.
- :param file_path:
- :return: List of paths to the JPEG images
- """
- output_dir = tempfile.mkdtemp() # Create a temporary directory
- reader = PdfReader(file_path)
- n_pages = len(reader.pages)
- try:
- with tempfile.TemporaryDirectory() as path:
- images_from_path = convert_from_path(file_path, 56, size=300, output_folder=path)
- # get the file_name of this PDF file at file_path
- file_name = os.path.basename(file_path)
- image_paths = []
- for i in range(n_pages):
- image = images_from_path[i]
- image_path = os.path.join(output_dir, f"{file_name}_{i:03d}.jpg")
- image.save(image_path)
- image_paths.append(image_path)
- return image_paths
- finally:
- # Optionally handle cleanup later or elsewhere in your code
- # Remove later with shutil.rmtree(output_dir)
- pass