File size: 3,107 Bytes
835ecb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import base64
import os
from typing import Optional
from openai import OpenAI

class ImageSummarizer:
    """Summarizes images using OpenAI's vision API."""
    
    def __init__(self, api_key: Optional[str] = None):
        """Initialize OpenAI client."""
        self.client = OpenAI(api_key=api_key or os.getenv("OPENAI_API_KEY"))
    
    def summarize_image_base64(self, 

                               image_base64: str,

                               image_format: str = "png") -> str:
        """

        Summarize image using OpenAI vision.

        

        Args:

            image_base64: Base64 encoded image

            image_format: Image format (png, jpg, etc.)

        

        Returns:

            Image description/summary

        """
        try:
            response = self.client.chat.completions.create(
                model="gpt-4o-mini",  # or "gpt-4-vision-preview"
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/{image_format};base64,{image_base64}"
                                }
                            },
                            {
                                "type": "text",
                                "text": "Пожалуйста, опишите детально содержание этого изображения на русском языке. Укажите все видимые объекты, текст, диаграммы, графики и их взаимосвязь."
                            }
                        ]
                    }
                ],
                max_tokens=500
            )
            
            return response.choices[0].message.content
        
        except Exception as e:
            print(f"Error summarizing image: {e}")
            return f"Изображение на странице (ошибка обработки: {str(e)})"


def process_images_in_documents(documents_data: list,

                                image_summarizer: ImageSummarizer) -> list:
    """

    Process images in extracted PDF documents and add summaries.

    

    Args:

        documents_data: List of document content dictionaries

        image_summarizer: ImageSummarizer instance

    

    Returns:

        Updated documents with image summaries

    """
    for doc in documents_data:
        for page in doc.get("pages", []):
            for image in page.get("images", []):
                if image.get("base64"):
                    print(f"Summarizing image from page {page.get('page_number')}")
                    summary = image_summarizer.summarize_image_base64(
                        image.get("base64"),
                        image.get("format", "png")
                    )
                    image["summary"] = summary
    
    return documents_data