Spaces:
Sleeping
Sleeping
| import base64 | |
| import os | |
| from typing import Optional | |
| from openai import OpenAI | |
| class ImageSummarizer: | |
| """Summarizes images using OpenAI's vision API.""" | |
| def __init__(self, api_key: Optional[str] = None): | |
| """Initialize OpenAI client.""" | |
| self.client = OpenAI(api_key=api_key or os.getenv("OPENAI_API_KEY")) | |
| def summarize_image_base64(self, | |
| image_base64: str, | |
| image_format: str = "png") -> str: | |
| """ | |
| Summarize image using OpenAI vision. | |
| Args: | |
| image_base64: Base64 encoded image | |
| image_format: Image format (png, jpg, etc.) | |
| Returns: | |
| Image description/summary | |
| """ | |
| try: | |
| response = self.client.chat.completions.create( | |
| model="gpt-4o-mini", # or "gpt-4-vision-preview" | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/{image_format};base64,{image_base64}" | |
| } | |
| }, | |
| { | |
| "type": "text", | |
| "text": "Пожалуйста, опишите детально содержание этого изображения на русском языке. Укажите все видимые объекты, текст, диаграммы, графики и их взаимосвязь." | |
| } | |
| ] | |
| } | |
| ], | |
| max_tokens=500 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| print(f"Error summarizing image: {e}") | |
| return f"Изображение на странице (ошибка обработки: {str(e)})" | |
| def process_images_in_documents(documents_data: list, | |
| image_summarizer: ImageSummarizer) -> list: | |
| """ | |
| Process images in extracted PDF documents and add summaries. | |
| Args: | |
| documents_data: List of document content dictionaries | |
| image_summarizer: ImageSummarizer instance | |
| Returns: | |
| Updated documents with image summaries | |
| """ | |
| for doc in documents_data: | |
| for page in doc.get("pages", []): | |
| for image in page.get("images", []): | |
| if image.get("base64"): | |
| print(f"Summarizing image from page {page.get('page_number')}") | |
| summary = image_summarizer.summarize_image_base64( | |
| image.get("base64"), | |
| image.get("format", "png") | |
| ) | |
| image["summary"] = summary | |
| return documents_data |