Spaces:

MCP-1st-Birthday
/

manim-mcp

Running

App Files Files Community

manim-mcp / manim_mcp /tools /vision.py

bhaveshgoel07

Complete NeuroAnim HF Spaces deployment - all source files

0805c5b 11 days ago

raw

history blame

2.62 kB

	"""
	Vision Tools for Manim MCP Server

	This module provides tools for analyzing animation frames using vision models.
	"""

	import logging
	from typing import Any, Dict, Optional

	from mcp.types import CallToolResult, TextContent

	from utils.hf_wrapper import HFInferenceWrapper, ModelConfig

	logger = logging.getLogger(__name__)


	async def analyze_frame(
	hf_wrapper: HFInferenceWrapper, arguments: Dict[str, Any]
	) -> CallToolResult:
	"""
	Analyze an animation frame using vision-language models.

	Uses a vision model to provide feedback on:
	- Visual clarity and composition
	- Educational effectiveness
	- Technical quality
	- Suggestions for improvement

	Args:
	hf_wrapper: HuggingFace inference wrapper instance
	arguments: Dictionary containing:
	- image_path (str): Path to the image file to analyze
	- analysis_type (str): Type of analysis (e.g., "quality", "educational_value", "clarity")
	- context (str, optional): Additional context about the animation
	- model (str, optional): Hugging Face vision model to use

	Returns:
	CallToolResult with the frame analysis feedback
	"""
	image_path = arguments["image_path"]
	analysis_type = arguments["analysis_type"]
	context = arguments.get("context", "")
	model = arguments.get("model")

	try:
	model_config = ModelConfig()
	selected_model = model or model_config.vision_models[0]

	# Read the image file
	with open(image_path, "rb") as f:
	image_bytes = f.read()

	# Build analysis prompt
	prompt = f"""
	Analyze this {analysis_type} for an educational animation frame.
	Context: {context}

	Provide specific feedback on:
	- {analysis_type.replace("_", " ").title()} assessment
	- Educational effectiveness
	- Visual clarity
	- Suggestions for improvement
	"""

	# Call vision model
	response = await hf_wrapper.vision_analysis(
	model=selected_model,
	image=image_bytes,
	text=prompt,
	)

	logger.info(f"Successfully analyzed frame: {image_path} ({analysis_type})")

	return CallToolResult(
	content=[
	TextContent(
	type="text",
	text=f"Frame Analysis ({analysis_type}):\n\n{response}",
	)
	]
	)

	except Exception as e:
	logger.error(f"Frame analysis failed: {str(e)}")
	return CallToolResult(
	content=[TextContent(type="text", text=f"Frame analysis failed: {str(e)}")],
	isError=True,
	)