diff options
Diffstat (limited to 'pyblackbird_cc/resources/services.py')
-rw-r--r-- | pyblackbird_cc/resources/services.py | 55 |
1 files changed, 0 insertions, 55 deletions
diff --git a/pyblackbird_cc/resources/services.py b/pyblackbird_cc/resources/services.py deleted file mode 100644 index 03c53af..0000000 --- a/pyblackbird_cc/resources/services.py +++ /dev/null @@ -1,55 +0,0 @@ -import dataclasses -import os.path -import tempfile - -from pdf2image import convert_from_path -from PyPDF2 import PdfReader - - -@dataclasses.dataclass -class PDFMetadata: - file_name: str - file_size: int - n_pages: int - - -def get_pdf_metadata_from_path(file_path: str) -> PDFMetadata: - """ - This function returns the metadata of a PDF file - :param file_path: - :return: PDFMetadata - """ - if not os.path.isfile(file_path): - raise ValueError("file_path must be a file. {file_path} is not a file.") - reader = PdfReader(file_path) - n_pages = len(reader.pages) - file_size = os.path.getsize(file_path) - # pdf.close() - return PDFMetadata(file_name=file_path, file_size=file_size, n_pages=n_pages) - - -def export_pages_as_images(file_path: str) -> list[str]: - """ - This function exports the pages of a PDF file as JPEG images. - :param file_path: - :return: List of paths to the JPEG images - """ - output_dir = tempfile.mkdtemp() # Create a temporary directory - reader = PdfReader(file_path) - n_pages = len(reader.pages) - try: - with tempfile.TemporaryDirectory() as path: - images_from_path = convert_from_path(file_path, 56, size=300, output_folder=path) - # get the file_name of this PDF file at file_path - file_name = os.path.basename(file_path) - image_paths = [] - for i in range(n_pages): - image = images_from_path[i] - image_path = os.path.join(output_dir, f"{file_name}_{i:03d}.jpg") - image.save(image_path) - image_paths.append(image_path) - return image_paths - finally: - # Optionally handle cleanup later or elsewhere in your code - # Remove later with shutil.rmtree(output_dir) - pass |