Spaces:
Runtime error
Runtime error
| from pdf2image import convert_from_path | |
| import os | |
| import shutil | |
| class PdfManager: | |
| def __init__(self): | |
| # Use relative paths for Hugging Face Spaces compatibility | |
| # Get the directory where the main application file is located | |
| app_dir = os.path.dirname(os.path.abspath(__file__)) | |
| # Use /tmp for Hugging Face Spaces, fallback to relative path | |
| if os.path.exists("/tmp") and os.access("/tmp", os.W_OK): | |
| self.base_output_dir = "/tmp/pages" | |
| print(f"β Using /tmp directory for Hugging Face Spaces: {self.base_output_dir}") | |
| else: | |
| # Fallback to relative path from app directory | |
| self.base_output_dir = os.path.join(app_dir, "pages") | |
| print(f"β Using relative path: {self.base_output_dir}") | |
| # Ensure the base directory exists | |
| os.makedirs(self.base_output_dir, exist_ok=True) | |
| def clear_and_recreate_dir(self, output_folder): | |
| print(f"Clearing output folder {output_folder}") | |
| if os.path.exists(output_folder): | |
| shutil.rmtree(output_folder) | |
| #print("Clearing is unused for now for persistency") | |
| else: | |
| os.makedirs(output_folder) | |
| #print("Clearing is unused for now for persistency") | |
| def save_images(self, id, pdf_path, max_pages, pages: list[int] = None) -> list[str]: | |
| # Use absolute path for Hugging Face Spaces compatibility | |
| output_folder = os.path.join(self.base_output_dir, id) | |
| images = convert_from_path(pdf_path) | |
| print(f"Saving images from {pdf_path} to {output_folder}. Max pages: {max_pages}") | |
| self.clear_and_recreate_dir(output_folder) | |
| num_page_processed = 0 | |
| for i, image in enumerate(images): | |
| if max_pages and num_page_processed >= max_pages: | |
| break | |
| if pages and i not in pages: | |
| continue | |
| full_save_path = os.path.join(output_folder, f"page_{i + 1}.png") | |
| #print(f"Saving image to {full_save_path}") | |
| image.save(full_save_path, "PNG") | |
| num_page_processed += 1 | |
| return [os.path.join(output_folder, f"page_{i + 1}.png") for i in range(num_page_processed)] | |