Spaces:

coco1990
/

Snap-Solver

Sleeping

renxsh

init

f1b4581 about 2 months ago

13.9 kB

	from typing import Generator, Dict, Any
	import json
	import requests
	from .base import BaseModel

	class MathpixModel(BaseModel):
	"""
	Mathpix OCR model for processing images containing mathematical formulas,
	text, and tables.
	"""

	def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None):
	"""
	Initialize the Mathpix model.

	Args:
	api_key: Mathpix API key in format "app_id:app_key"
	temperature: Not used for Mathpix but kept for BaseModel compatibility
	system_prompt: Not used for Mathpix but kept for BaseModel compatibility

	Raises:
	ValueError: If the API key format is invalid
	"""
	# 只传递必需的参数，不传递language参数
	super().__init__(api_key, temperature, system_prompt)
	try:
	self.app_id, self.app_key = api_key.split(':')
	except ValueError:
	raise ValueError("Mathpix API key must be in format 'app_id:app_key'")

	self.api_url = "https://api.mathpix.com/v3/text"
	self.headers = {
	"app_id": self.app_id,
	"app_key": self.app_key,
	"Content-Type": "application/json"
	}

	# Content type presets
	self.presets = {
	"math": {
	"formats": ["latex_normal", "latex_styled", "asciimath"],
	"data_options": {
	"include_asciimath": True,
	"include_latex": True,
	"include_mathml": True
	},
	"ocr_options": {
	"detect_formulas": True,
	"enable_math_ocr": True,
	"enable_handwritten": True,
	"rm_spaces": True
	}
	},
	"text": {
	"formats": ["text"],
	"data_options": {
	"include_latex": False,
	"include_asciimath": False
	},
	"ocr_options": {
	"enable_spell_check": True,
	"enable_handwritten": True,
	"rm_spaces": False
	}
	},
	"table": {
	"formats": ["text", "data"],
	"data_options": {
	"include_latex": True
	},
	"ocr_options": {
	"detect_tables": True,
	"enable_spell_check": True,
	"rm_spaces": True
	}
	},
	"full_text": {
	"formats": ["text"],
	"data_options": {
	"include_latex": False,
	"include_asciimath": False
	},
	"ocr_options": {
	"enable_spell_check": True,
	"enable_handwritten": True,
	"rm_spaces": False,
	"detect_paragraphs": True,
	"enable_tables": False,
	"enable_math_ocr": False
	}
	}
	}

	# Default to math preset
	self.current_preset = "math"

	def analyze_image(self, image_data: str, proxies: dict = None, content_type: str = None,
	confidence_threshold: float = 0.8, max_retries: int = 3) -> Generator[dict, None, None]:
	"""
	Analyze an image using Mathpix OCR API.

	Args:
	image_data: Base64 encoded image data
	proxies: Optional proxy configuration
	content_type: Type of content to analyze ('math', 'text', or 'table')
	confidence_threshold: Minimum confidence score to accept (0.0 to 1.0)
	max_retries: Maximum number of retry attempts for failed requests

	Yields:
	dict: Response chunks with status and content
	"""
	if content_type and content_type in self.presets:
	self.current_preset = content_type

	preset = self.presets[self.current_preset]

	try:
	# Prepare request payload
	payload = {
	"src": f"data:image/jpeg;base64,{image_data}",
	"formats": preset["formats"],
	"data_options": preset["data_options"],
	"ocr_options": preset["ocr_options"]
	}

	# Initialize retry counter
	retry_count = 0

	while retry_count < max_retries:
	try:
	# Send request to Mathpix API with timeout
	response = requests.post(
	self.api_url,
	headers=self.headers,
	json=payload,
	proxies=proxies,
	timeout=25 # 25 second timeout
	)

	# Handle specific API error codes
	if response.status_code == 429: # Rate limit exceeded
	if retry_count < max_retries - 1:
	retry_count += 1
	continue
	else:
	raise requests.exceptions.RequestException("Rate limit exceeded")

	response.raise_for_status()
	result = response.json()

	# Check confidence threshold
	if 'confidence' in result and result['confidence'] < confidence_threshold:
	yield {
	"status": "warning",
	"content": f"Low confidence score: {result['confidence']:.2%}"
	}

	break # Success, exit retry loop

	except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
	if retry_count < max_retries - 1:
	retry_count += 1
	continue
	raise

	# Format the response
	formatted_response = self._format_response(result)

	# Yield initial status
	yield {
	"status": "started",
	"content": ""
	}

	# Yield the formatted response
	yield {
	"status": "completed",
	"content": formatted_response,
	"model": self.get_model_identifier()
	}

	except requests.exceptions.RequestException as e:
	yield {
	"status": "error",
	"error": f"Mathpix API error: {str(e)}"
	}
	except Exception as e:
	yield {
	"status": "error",
	"error": f"Error processing image: {str(e)}"
	}

	def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
	"""
	Not implemented for Mathpix model as it only processes images.
	"""
	yield {
	"status": "error",
	"error": "Text analysis is not supported by Mathpix model"
	}

	def get_default_system_prompt(self) -> str:
	"""
	Not used for Mathpix model.
	"""
	return ""

	def get_model_identifier(self) -> str:
	"""
	Return the model identifier.
	"""
	return "mathpix"

	def _format_response(self, result: Dict[str, Any]) -> str:
	"""
	Format the Mathpix API response into a readable string.

	Args:
	result: Raw API response from Mathpix

	Returns:
	str: Formatted response string with all available formats
	"""
	formatted_parts = []

	# Add confidence score if available
	if 'confidence' in result:
	formatted_parts.append(f"Confidence: {result['confidence']:.2%}\n")

	# Add text content
	if 'text' in result:
	formatted_parts.append("Text Content:")
	formatted_parts.append(result['text'])
	formatted_parts.append("")

	# Add LaTeX content
	if 'latex_normal' in result:
	formatted_parts.append("LaTeX (Normal):")
	formatted_parts.append(result['latex_normal'])
	formatted_parts.append("")

	if 'latex_styled' in result:
	formatted_parts.append("LaTeX (Styled):")
	formatted_parts.append(result['latex_styled'])
	formatted_parts.append("")

	# Add data formats (ASCII math, MathML)
	if 'data' in result and isinstance(result['data'], list):
	for item in result['data']:
	item_type = item.get('type', '')
	if item_type and 'value' in item:
	formatted_parts.append(f"{item_type.upper()}:")
	formatted_parts.append(item['value'])
	formatted_parts.append("")

	# Add table data if present
	if 'tables' in result and result['tables']:
	formatted_parts.append("Tables Detected:")
	for i, table in enumerate(result['tables'], 1):
	formatted_parts.append(f"Table {i}:")
	if 'cells' in table:
	# Format table as a grid
	cells = table['cells']
	if cells:
	max_col = max(cell.get('col', 0) for cell in cells) + 1
	max_row = max(cell.get('row', 0) for cell in cells) + 1
	grid = [['' for _ in range(max_col)] for _ in range(max_row)]

	for cell in cells:
	row = cell.get('row', 0)
	col = cell.get('col', 0)
	text = cell.get('text', '')
	grid[row][col] = text

	# Format grid as table
	col_widths = [max(len(str(grid[r][c])) for r in range(max_row)) for c in range(max_col)]
	for row in grid:
	row_str = ' \| '.join(f"{str(cell):<{width}}" for cell, width in zip(row, col_widths))
	formatted_parts.append(f"\| {row_str} \|")
	formatted_parts.append("")

	# Add error message if present
	if 'error' in result:
	error_msg = result['error']
	if isinstance(error_msg, dict):
	error_msg = error_msg.get('message', str(error_msg))
	formatted_parts.append(f"Error: {error_msg}")

	return "\n".join(formatted_parts).strip()

	def extract_full_text(self, image_data: str, proxies: dict = None, max_retries: int = 3) -> str:
	"""
	专门用于提取图像中的全部文本内容，忽略数学公式和表格等其他元素。

	Args:
	image_data: Base64编码的图像数据
	proxies: 可选的代理配置
	max_retries: 请求失败时的最大重试次数

	Returns:
	str: 图像中提取的完整文本内容
	"""
	try:
	# 准备请求负载，使用专为全文提取配置的参数
	payload = {
	"src": f"data:image/jpeg;base64,{image_data}",
	"formats": ["text"],
	"data_options": {
	"include_latex": False,
	"include_asciimath": False
	},
	"ocr_options": {
	"enable_spell_check": True,
	"enable_handwritten": True,
	"rm_spaces": False,
	"detect_paragraphs": True,
	"enable_tables": False,
	"enable_math_ocr": False
	}
	}

	# 初始化重试计数器
	retry_count = 0

	while retry_count < max_retries:
	try:
	# 发送请求到Mathpix API
	response = requests.post(
	self.api_url,
	headers=self.headers,
	json=payload,
	proxies=proxies,
	timeout=30 # 30秒超时
	)

	# 处理特定API错误代码
	if response.status_code == 429: # 超出速率限制
	if retry_count < max_retries - 1:
	retry_count += 1
	continue
	else:
	raise requests.exceptions.RequestException("超出API速率限制")

	response.raise_for_status()
	result = response.json()

	# 直接返回文本内容
	if 'text' in result:
	return result['text']
	else:
	return "未能提取到文本内容"

	except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
	if retry_count < max_retries - 1:
	retry_count += 1
	continue
	raise

	except requests.exceptions.RequestException as e:
	return f"Mathpix API错误: {str(e)}"
	except Exception as e:
	return f"处理图像时出错: {str(e)}"