File size: 13,869 Bytes
f1b4581
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
from typing import Generator, Dict, Any
import json
import requests
from .base import BaseModel

class MathpixModel(BaseModel):
    """
    Mathpix OCR model for processing images containing mathematical formulas,
    text, and tables.
    """
    
    def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None):
        """
        Initialize the Mathpix model.
        
        Args:
            api_key: Mathpix API key in format "app_id:app_key"
            temperature: Not used for Mathpix but kept for BaseModel compatibility
            system_prompt: Not used for Mathpix but kept for BaseModel compatibility
            
        Raises:
            ValueError: If the API key format is invalid
        """
        # 只传递必需的参数,不传递language参数
        super().__init__(api_key, temperature, system_prompt)
        try:
            self.app_id, self.app_key = api_key.split(':')
        except ValueError:
            raise ValueError("Mathpix API key must be in format 'app_id:app_key'")
        
        self.api_url = "https://api.mathpix.com/v3/text"
        self.headers = {
            "app_id": self.app_id,
            "app_key": self.app_key,
            "Content-Type": "application/json"
        }
        
        # Content type presets
        self.presets = {
            "math": {
                "formats": ["latex_normal", "latex_styled", "asciimath"],
                "data_options": {
                    "include_asciimath": True,
                    "include_latex": True,
                    "include_mathml": True
                },
                "ocr_options": {
                    "detect_formulas": True,
                    "enable_math_ocr": True,
                    "enable_handwritten": True,
                    "rm_spaces": True
                }
            },
            "text": {
                "formats": ["text"],
                "data_options": {
                    "include_latex": False,
                    "include_asciimath": False
                },
                "ocr_options": {
                    "enable_spell_check": True,
                    "enable_handwritten": True,
                    "rm_spaces": False
                }
            },
            "table": {
                "formats": ["text", "data"],
                "data_options": {
                    "include_latex": True
                },
                "ocr_options": {
                    "detect_tables": True,
                    "enable_spell_check": True,
                    "rm_spaces": True
                }
            },
            "full_text": {
                "formats": ["text"],
                "data_options": {
                    "include_latex": False,
                    "include_asciimath": False
                },
                "ocr_options": {
                    "enable_spell_check": True,
                    "enable_handwritten": True,
                    "rm_spaces": False,
                    "detect_paragraphs": True,
                    "enable_tables": False,
                    "enable_math_ocr": False
                }
            }
        }
        
        # Default to math preset
        self.current_preset = "math"

    def analyze_image(self, image_data: str, proxies: dict = None, content_type: str = None, 
                     confidence_threshold: float = 0.8, max_retries: int = 3) -> Generator[dict, None, None]:
        """
        Analyze an image using Mathpix OCR API.
        
        Args:
            image_data: Base64 encoded image data
            proxies: Optional proxy configuration
            content_type: Type of content to analyze ('math', 'text', or 'table')
            confidence_threshold: Minimum confidence score to accept (0.0 to 1.0)
            max_retries: Maximum number of retry attempts for failed requests
            
        Yields:
            dict: Response chunks with status and content
        """
        if content_type and content_type in self.presets:
            self.current_preset = content_type

        preset = self.presets[self.current_preset]
        
        try:
            # Prepare request payload
            payload = {
                "src": f"data:image/jpeg;base64,{image_data}",
                "formats": preset["formats"],
                "data_options": preset["data_options"],
                "ocr_options": preset["ocr_options"]
            }
            
            # Initialize retry counter
            retry_count = 0
            
            while retry_count < max_retries:
                try:
                    # Send request to Mathpix API with timeout
                    response = requests.post(
                        self.api_url,
                        headers=self.headers,
                        json=payload,
                        proxies=proxies,
                        timeout=25  # 25 second timeout
                    )
                    
                    # Handle specific API error codes
                    if response.status_code == 429:  # Rate limit exceeded
                        if retry_count < max_retries - 1:
                            retry_count += 1
                            continue
                        else:
                            raise requests.exceptions.RequestException("Rate limit exceeded")
                    
                    response.raise_for_status()
                    result = response.json()
                    
                    # Check confidence threshold
                    if 'confidence' in result and result['confidence'] < confidence_threshold:
                        yield {
                            "status": "warning",
                            "content": f"Low confidence score: {result['confidence']:.2%}"
                        }
                    
                    break  # Success, exit retry loop
                    
                except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
                    if retry_count < max_retries - 1:
                        retry_count += 1
                        continue
                    raise
            
            # Format the response
            formatted_response = self._format_response(result)
            
            # Yield initial status
            yield {
                "status": "started",
                "content": ""
            }
            
            # Yield the formatted response
            yield {
                "status": "completed",
                "content": formatted_response,
                "model": self.get_model_identifier()
            }
            
        except requests.exceptions.RequestException as e:
            yield {
                "status": "error",
                "error": f"Mathpix API error: {str(e)}"
            }
        except Exception as e:
            yield {
                "status": "error",
                "error": f"Error processing image: {str(e)}"
            }

    def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
        """
        Not implemented for Mathpix model as it only processes images.
        """
        yield {
            "status": "error",
            "error": "Text analysis is not supported by Mathpix model"
        }

    def get_default_system_prompt(self) -> str:
        """
        Not used for Mathpix model.
        """
        return ""

    def get_model_identifier(self) -> str:
        """
        Return the model identifier.
        """
        return "mathpix"

    def _format_response(self, result: Dict[str, Any]) -> str:
        """
        Format the Mathpix API response into a readable string.
        
        Args:
            result: Raw API response from Mathpix
            
        Returns:
            str: Formatted response string with all available formats
        """
        formatted_parts = []
        
        # Add confidence score if available
        if 'confidence' in result:
            formatted_parts.append(f"Confidence: {result['confidence']:.2%}\n")
        
        # Add text content
        if 'text' in result:
            formatted_parts.append("Text Content:")
            formatted_parts.append(result['text'])
            formatted_parts.append("")
        
        # Add LaTeX content
        if 'latex_normal' in result:
            formatted_parts.append("LaTeX (Normal):")
            formatted_parts.append(result['latex_normal'])
            formatted_parts.append("")
            
        if 'latex_styled' in result:
            formatted_parts.append("LaTeX (Styled):")
            formatted_parts.append(result['latex_styled'])
            formatted_parts.append("")
        
        # Add data formats (ASCII math, MathML)
        if 'data' in result and isinstance(result['data'], list):
            for item in result['data']:
                item_type = item.get('type', '')
                if item_type and 'value' in item:
                    formatted_parts.append(f"{item_type.upper()}:")
                    formatted_parts.append(item['value'])
                    formatted_parts.append("")
        
        # Add table data if present
        if 'tables' in result and result['tables']:
            formatted_parts.append("Tables Detected:")
            for i, table in enumerate(result['tables'], 1):
                formatted_parts.append(f"Table {i}:")
                if 'cells' in table:
                    # Format table as a grid
                    cells = table['cells']
                    if cells:
                        max_col = max(cell.get('col', 0) for cell in cells) + 1
                        max_row = max(cell.get('row', 0) for cell in cells) + 1
                        grid = [['' for _ in range(max_col)] for _ in range(max_row)]
                        
                        for cell in cells:
                            row = cell.get('row', 0)
                            col = cell.get('col', 0)
                            text = cell.get('text', '')
                            grid[row][col] = text
                        
                        # Format grid as table
                        col_widths = [max(len(str(grid[r][c])) for r in range(max_row)) for c in range(max_col)]
                        for row in grid:
                            row_str = ' | '.join(f"{str(cell):<{width}}" for cell, width in zip(row, col_widths))
                            formatted_parts.append(f"| {row_str} |")
                formatted_parts.append("")
        
        # Add error message if present
        if 'error' in result:
            error_msg = result['error']
            if isinstance(error_msg, dict):
                error_msg = error_msg.get('message', str(error_msg))
            formatted_parts.append(f"Error: {error_msg}")
        
        return "\n".join(formatted_parts).strip()

    def extract_full_text(self, image_data: str, proxies: dict = None, max_retries: int = 3) -> str:
        """
        专门用于提取图像中的全部文本内容,忽略数学公式和表格等其他元素。
        
        Args:
            image_data: Base64编码的图像数据
            proxies: 可选的代理配置
            max_retries: 请求失败时的最大重试次数
            
        Returns:
            str: 图像中提取的完整文本内容
        """
        try:
            # 准备请求负载,使用专为全文提取配置的参数
            payload = {
                "src": f"data:image/jpeg;base64,{image_data}",
                "formats": ["text"],
                "data_options": {
                    "include_latex": False,
                    "include_asciimath": False
                },
                "ocr_options": {
                    "enable_spell_check": True,
                    "enable_handwritten": True,
                    "rm_spaces": False,
                    "detect_paragraphs": True,
                    "enable_tables": False,
                    "enable_math_ocr": False
                }
            }
            
            # 初始化重试计数器
            retry_count = 0
            
            while retry_count < max_retries:
                try:
                    # 发送请求到Mathpix API
                    response = requests.post(
                        self.api_url,
                        headers=self.headers,
                        json=payload,
                        proxies=proxies,
                        timeout=30  # 30秒超时
                    )
                    
                    # 处理特定API错误代码
                    if response.status_code == 429:  # 超出速率限制
                        if retry_count < max_retries - 1:
                            retry_count += 1
                            continue
                        else:
                            raise requests.exceptions.RequestException("超出API速率限制")
                    
                    response.raise_for_status()
                    result = response.json()
                    
                    # 直接返回文本内容
                    if 'text' in result:
                        return result['text']
                    else:
                        return "未能提取到文本内容"
                    
                except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
                    if retry_count < max_retries - 1:
                        retry_count += 1
                        continue
                    raise
            
        except requests.exceptions.RequestException as e:
            return f"Mathpix API错误: {str(e)}"
        except Exception as e:
            return f"处理图像时出错: {str(e)}"