Spaces:
Build error
Build error
| import base64 | |
| from io import BytesIO | |
| import pymupdf | |
| from PIL import Image | |
| import streamlit as st | |
| import os | |
| from datetime import datetime | |
| def generate_metadata(file_path): | |
| """Generate metadata dictionary from file path and properties""" | |
| file_stat = os.stat(file_path) | |
| file_name = os.path.basename(file_path) | |
| parent_dir = os.path.basename(os.path.dirname(file_path)) | |
| metadata = { | |
| "File Name": file_name, | |
| "Directory": parent_dir, | |
| "File Size": f"{file_stat.st_size / 1024:.2f} KB", | |
| "Last Modified": datetime.fromtimestamp(file_stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'), | |
| "Created": datetime.fromtimestamp(file_stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S'), | |
| "File Extension": os.path.splitext(file_name)[1], | |
| "Full Path": file_path | |
| } | |
| # Add image-specific metadata if it's an image | |
| if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')): | |
| try: | |
| with Image.open(file_path) as img: | |
| metadata.update({ | |
| "Image Size": f"{img.size[0]}x{img.size[1]}", | |
| "Image Mode": img.mode, | |
| "Image Format": img.format | |
| }) | |
| except Exception as e: | |
| st.error(f"Error reading image metadata: {str(e)}") | |
| # Add PDF-specific metadata if it's a PDF | |
| elif file_name.lower().endswith('.pdf'): | |
| try: | |
| doc = pymupdf.Document(file_path) | |
| metadata.update({ | |
| "Page Count": len(doc), | |
| "PDF Version": doc.pdf_version, | |
| "Document Info": doc.metadata if doc.metadata else "No PDF metadata available" | |
| }) | |
| except Exception as e: | |
| st.error(f"Error reading PDF metadata: {str(e)}") | |
| return metadata | |
| def load_pdf_as_image(file_path): | |
| # Open PDF file | |
| doc = pymupdf.Document(file_path) | |
| # Get the first page | |
| page = doc[0] | |
| # Convert to image | |
| pix = page.get_pixmap() | |
| # Convert to PIL Image | |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| return img | |
| def im_2_b64(image): | |
| buff = BytesIO() | |
| image.save(buff, format="JPEG") | |
| img_str = base64.b64encode(buff.getvalue()).decode("utf-8") | |
| return img_str | |
| def encode_image(image_path): | |
| with open(image_path, "rb") as image_file: | |
| return base64.b64encode(image_file.read()).decode('utf-8') | |