aboutsummaryrefslogtreecommitdiffstats
path: root/pyblackbird_cc/resources/services.py
diff options
context:
space:
mode:
authorMatthew Lemon <y@yulqen.org>2024-05-14 12:53:28 +0100
committerMatthew Lemon <y@yulqen.org>2024-05-14 12:53:28 +0100
commit46f11648d902b22a177b878e35d6049a7a127ce7 (patch)
treef59f6630717bc9097c988a6d8d3eebe4ad548f1d /pyblackbird_cc/resources/services.py
parentb5e2c4b9a7aab20db6dd6072a01abd114e8e55de (diff)
Can now upload to Spaces
Diffstat (limited to '')
-rw-r--r--pyblackbird_cc/resources/services.py57
1 files changed, 57 insertions, 0 deletions
diff --git a/pyblackbird_cc/resources/services.py b/pyblackbird_cc/resources/services.py
new file mode 100644
index 0000000..b33afcf
--- /dev/null
+++ b/pyblackbird_cc/resources/services.py
@@ -0,0 +1,57 @@
+import dataclasses
+import os.path
+import tempfile
+
+import pypdfium2 as pdfium
+
+
+@dataclasses.dataclass
+class PDFMetadata:
+ file_name: str
+ file_size: int
+ n_pages: int
+
+
+def get_pdf_metadata_from_path(file_path: str) -> PDFMetadata:
+ """
+ This function returns the metadata of a PDF file
+ :param file_path:
+ :return: PDFMetadata
+ """
+ if file_path is None:
+ raise ValueError("file_path cannot be None")
+ if not os.path.isfile(file_path):
+ raise ValueError(f"file_path must be a file. {file_path} is not a file.")
+ pdf = pdfium.PdfDocument(file_path)
+ n_pages = len(pdf)
+ file_size = os.path.getsize(file_path)
+ pdf.close()
+ return PDFMetadata(file_name=file_path, file_size=file_size, n_pages=n_pages)
+
+
+def export_pages_as_images(file_path: str) -> list[str]:
+ """
+ This function exports the pages of a PDF file as JPEG images.
+ :param file_path:
+ :return: List of paths to the JPEG images
+ """
+ if file_path is None:
+ raise ValueError("file_path cannot be None")
+ output_dir = tempfile.mkdtemp() # Create a temporary directory
+ try:
+ pdf = pdfium.PdfDocument(file_path)
+ # get the file_name of this PDF file at file_path
+ file_name = os.path.basename(file_path)
+ image_paths = []
+ for i in range(len(pdf)):
+ page = pdf[i]
+ image = page.render(scale=0.6).to_pil()
+ image_path = os.path.join(output_dir, f"{file_name}_{i:03d}.jpg")
+ image.save(image_path)
+ image_paths.append(image_path)
+ pdf.close()
+ return image_paths
+ finally:
+ # Optionally handle cleanup later or elsewhere in your code
+ # Remove later with shutil.rmtree(output_dir)
+ pass