Spaces:
Sleeping
Sleeping
| import os | |
| import base64 | |
| from pdf2image import convert_from_path | |
| from langchain_core.documents import Document | |
| from langchain_openai import ChatOpenAI | |
| from langchain_core.messages import HumanMessage | |
| class VisionProcessor: | |
| def __init__(self): | |
| self.vision_model = ChatOpenAI(model="gpt-4o", max_tokens=1024) | |
| # PRO FIX: Point to local Poppler bin | |
| # This assumes 'poppler' folder is in the project root | |
| self.poppler_path = os.path.join(os.getcwd(), "poppler", "Library", "bin") | |
| def process_visual_pdf(self, pdf_path): | |
| print(f" 👁️ Processing Visual PDF: {os.path.basename(pdf_path)}...") | |
| documents = [] | |
| try: | |
| # Check if our local poppler exists | |
| if not os.path.exists(self.poppler_path): | |
| print(f" ❌ Error: Poppler not found at {self.poppler_path}") | |
| return [] | |
| # 1. Convert PDF pages to Images (Using local poppler) | |
| images = convert_from_path(pdf_path, fmt="jpeg", poppler_path=self.poppler_path) | |
| print(f" -> Extracted {len(images)} images (pages) from PDF.") | |
| # 2. Analyze first 3 pages (Cost Saving Mode) | |
| for i, img in enumerate(images[:3]): | |
| print(f" -> Analyzing Page {i+1} with GPT-4o Vision...") | |
| # Base64 Encode | |
| import io | |
| buffered = io.BytesIO() | |
| img.save(buffered, format="JPEG") | |
| img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| # 3. Send to GPT-4o | |
| response = self.vision_model.invoke( | |
| [ | |
| HumanMessage( | |
| content=[ | |
| {"type": "text", "text": "Describe this image in detail. If it is a graph, extract the data points. If it is a table, transcribe it."}, | |
| {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_str}"}}, | |
| ] | |
| ) | |
| ] | |
| ) | |
| description = response.content | |
| doc = Document( | |
| page_content=f"IMAGE DESCRIPTION (Page {i+1}): {description}", | |
| metadata={ | |
| "source": os.path.basename(pdf_path), | |
| "page": i+1, | |
| "category": "visual_data" | |
| } | |
| ) | |
| documents.append(doc) | |
| if len(images) > 3: | |
| print(" ℹ️ Limited to first 3 pages for POC cost safety.") | |
| except Exception as e: | |
| print(f" ❌ Vision Error: {e}") | |
| return documents |