Spaces:

MCP-1st-Birthday
/

manim-mcp

Running

App Files Files Community

manim-mcp / mcp_servers /creative.py

bhaveshgoel07

Deploy code fixes (clean history)

fff13d1 12 days ago

raw

history blame

28.7 kB

	"""
	Creative MCP Server

	This MCP server provides tools for creative tasks using Hugging Face models:
	- Concept Planning (Text LLM)
	- Code Generation (Coder LLM)
	- Vision Analysis (Vision-Language LLM)
	- Text-to-Speech (Audio model)
	"""

	import asyncio
	import base64
	import json
	import logging
	import os
	import sys
	import tempfile
	from pathlib import Path
	from typing import Any, Dict, List, Optional

	# Ensure project root (which contains the `utils` package) is on sys.path
	PROJECT_ROOT = Path(__file__).resolve().parent.parent
	if str(PROJECT_ROOT) not in sys.path:
	sys.path.insert(0, str(PROJECT_ROOT))

	from mcp.server import NotificationOptions, Server
	from mcp.server.models import InitializationOptions
	from mcp.server.stdio import stdio_server
	from mcp.types import (
	CallToolResult,
	ListToolsResult,
	TextContent,
	Tool,
	)

	from utils.hf_wrapper import HFInferenceWrapper, ModelConfig, get_hf_wrapper

	logger = logging.getLogger(__name__)

	# Create MCP server
	server = Server("neuroanim-creative")

	# Global HF wrapper instance
	hf_wrapper: Optional[HFInferenceWrapper] = None


	class CreativeTool:
	"""Base class for creative tools."""

	@staticmethod
	def get_hf_wrapper() -> HFInferenceWrapper:
	"""Get or create the HF wrapper instance."""
	global hf_wrapper
	if hf_wrapper is None:
	api_key = os.getenv("HUGGINGFACE_API_KEY")
	hf_wrapper = get_hf_wrapper(api_key=api_key)
	return hf_wrapper


	@server.list_tools()
	async def list_tools() -> ListToolsResult:
	"""List available creative tools."""
	tools = [
	Tool(
	name="plan_concept",
	description="Plan a STEM concept for animation using text LLM",
	inputSchema={
	"type": "object",
	"properties": {
	"topic": {
	"type": "string",
	"description": "The STEM topic to create an animation for",
	},
	"target_audience": {
	"type": "string",
	"enum": [
	"elementary",
	"middle_school",
	"high_school",
	"college",
	"general",
	],
	"description": "Target audience level",
	},
	"animation_length_minutes": {
	"type": "number",
	"description": "Desired animation length in minutes",
	},
	"model": {
	"type": "string",
	"description": "Hugging Face model to use (optional, will use default if not provided)",
	},
	},
	"required": ["topic", "target_audience"],
	},
	),
	Tool(
	name="generate_manim_code",
	description="Generate Manim Python code for an animation concept",
	inputSchema={
	"type": "object",
	"properties": {
	"concept": {
	"type": "string",
	"description": "The animation concept description",
	},
	"scene_description": {
	"type": "string",
	"description": "Detailed description of what should happen in the scene",
	},
	"visual_elements": {
	"type": "array",
	"items": {"type": "string"},
	"description": "List of visual elements to include",
	},
	"model": {
	"type": "string",
	"description": "Code model to use (optional, will use default if not provided)",
	},
	},
	"required": ["concept", "scene_description"],
	},
	),
	Tool(
	name="analyze_frame",
	description="Analyze an animation frame using vision model for quality assessment",
	inputSchema={
	"type": "object",
	"properties": {
	"image_path": {
	"type": "string",
	"description": "Path to the image file to analyze",
	},
	"analysis_type": {
	"type": "string",
	"enum": [
	"quality",
	"content",
	"educational_value",
	"clarity",
	],
	"description": "Type of analysis to perform",
	},
	"context": {
	"type": "string",
	"description": "Context about what should be in the image",
	},
	"model": {
	"type": "string",
	"description": "Vision model to use (optional, will use default if not provided)",
	},
	},
	"required": ["image_path", "analysis_type"],
	},
	),
	Tool(
	name="generate_narration",
	description="Generate narration script for an animation",
	inputSchema={
	"type": "object",
	"properties": {
	"concept": {
	"type": "string",
	"description": "The animation concept",
	},
	"scene_description": {
	"type": "string",
	"description": "Description of the scene to narrate",
	},
	"target_audience": {
	"type": "string",
	"enum": [
	"elementary",
	"middle_school",
	"high_school",
	"college",
	"general",
	],
	"description": "Target audience",
	},
	"duration_seconds": {
	"type": "number",
	"description": "Desired narration duration in seconds",
	},
	"model": {
	"type": "string",
	"description": "Text model to use (optional, will use default if not provided)",
	},
	},
	"required": ["concept", "scene_description", "target_audience"],
	},
	),
	Tool(
	name="generate_speech",
	description="Convert text narration to speech audio",
	inputSchema={
	"type": "object",
	"properties": {
	"text": {
	"type": "string",
	"description": "Text to convert to speech",
	},
	"voice": {
	"type": "string",
	"description": "Voice preference (optional)",
	},
	"output_path": {
	"type": "string",
	"description": "Path to save the audio file",
	},
	"model": {
	"type": "string",
	"description": "TTS model to use (optional, will use default if not provided)",
	},
	},
	"required": ["text", "output_path"],
	},
	),
	Tool(
	name="refine_animation",
	description="Refine and improve animation based on feedback",
	inputSchema={
	"type": "object",
	"properties": {
	"original_code": {
	"type": "string",
	"description": "Original Manim code",
	},
	"feedback": {
	"type": "string",
	"description": "Feedback or issues to address",
	},
	"improvement_goals": {
	"type": "array",
	"items": {"type": "string"},
	"description": "List of improvement goals",
	},
	"model": {
	"type": "string",
	"description": "Code model to use (optional, will use default if not provided)",
	},
	},
	"required": ["original_code", "feedback"],
	},
	),
	Tool(
	name="generate_quiz",
	description="Generate quiz questions based on animation content",
	inputSchema={
	"type": "object",
	"properties": {
	"concept": {
	"type": "string",
	"description": "The STEM concept covered in the animation",
	},
	"difficulty": {
	"type": "string",
	"enum": ["easy", "medium", "hard"],
	"description": "Quiz difficulty level",
	},
	"num_questions": {
	"type": "number",
	"description": "Number of questions to generate",
	},
	"question_types": {
	"type": "array",
	"items": {
	"type": "string",
	"enum": ["multiple_choice", "true_false", "short_answer"],
	},
	"description": "Types of questions to include",
	},
	"model": {
	"type": "string",
	"description": "Text model to use (optional, will use default if not provided)",
	},
	},
	"required": ["concept", "difficulty", "num_questions"],
	},
	),
	]

	return ListToolsResult(tools=tools)


	@server.call_tool()
	async def call_tool(tool_name: str, arguments: Dict[str, Any]) -> CallToolResult:
	"""Dispatch creative tool calls.

	The low-level MCP server passes `(tool_name, arguments)` into this
	handler, so we accept two positional arguments rather than a
	`CallToolRequest` instance.
	"""

	try:
	if tool_name == "plan_concept":
	return await plan_concept(arguments)
	elif tool_name == "generate_manim_code":
	return await generate_manim_code(arguments)
	elif tool_name == "analyze_frame":
	return await analyze_frame(arguments)
	elif tool_name == "generate_narration":
	return await generate_narration(arguments)
	elif tool_name == "generate_speech":
	return await generate_speech(arguments)
	elif tool_name == "refine_animation":
	return await refine_animation(arguments)
	elif tool_name == "generate_quiz":
	return await generate_quiz(arguments)
	else:
	return CallToolResult(
	content=[TextContent(type="text", text=f"Unknown tool: {tool_name}")],
	isError=True,
	)
	except Exception as e:
	logger.error(f"Error in tool {tool_name}: {e}")
	return CallToolResult(
	content=[TextContent(type="text", text=f"Error: {str(e)}")],
	isError=True,
	)


	async def plan_concept(arguments: Dict[str, Any]) -> CallToolResult:
	"""Plan a STEM concept for animation."""
	topic = arguments["topic"]
	target_audience = arguments["target_audience"]
	animation_length = arguments.get("animation_length_minutes", 2.0)
	model = arguments.get("model")

	try:
	wrapper = CreativeTool.get_hf_wrapper()
	model_config = ModelConfig()
	selected_model = model or model_config.text_models[0]

	prompt = f"""
	You are a STEM Curriculum Designer. Create a structured animation plan.

	Topic: {topic}
	Audience: {target_audience}
	Length: {animation_length} min

	Return a valid JSON object with exactly these keys:
	{{
	"learning_objectives": ["string", "string"],
	"visual_metaphors": ["string", "string"],
	"scene_flow": [
	{{
	"timestamp": "0:00-0:30",
	"action": "description of visual action",
	"voiceover": "key narration points"
	}}
	],
	"estimated_educational_value": "string"
	}}

	Do not include markdown formatting like ```json. Return raw JSON only.
	"""

	response = await wrapper.text_generation(
	model=selected_model,
	prompt=prompt,
	max_new_tokens=1024,
	temperature=0.7,
	)

	return CallToolResult(
	content=[
	TextContent(
	type="text",
	text=f"Animation Concept Plan:\n\n{response}",
	)
	]
	)

	except Exception as e:
	return CallToolResult(
	content=[
	TextContent(
	type="text",
	text=f"Concept planning failed: {str(e)}",
	)
	],
	isError=True,
	)


	async def generate_manim_code(arguments: Dict[str, Any]) -> CallToolResult:
	"""Generate Manim Python code."""
	concept = arguments["concept"]
	scene_description = arguments["scene_description"]
	visual_elements = arguments.get("visual_elements", [])
	model = arguments.get("model")
	previous_code = arguments.get("previous_code")
	error_message = arguments.get("error_message")

	try:
	wrapper = CreativeTool.get_hf_wrapper()
	model_config = ModelConfig()
	selected_model = model or model_config.code_models[0]

	# Build base prompt
	if previous_code and error_message:
	# This is a retry - include error feedback
	prompt = f"""
	You are an expert animation engineer using Manim Community Edition (v0.18.0+).

	The previous code attempt had an error. Your task is to FIX the code.

	PREVIOUS CODE:
	```python
	{previous_code}
	```

	ERROR ENCOUNTERED:
	{error_message}

	TASK: Fix the error in the code above. Pay special attention to:
	- Closing all parentheses, brackets, and braces
	- Completing all function calls
	- Proper indentation
	- Valid Python syntax

	Concept: {concept}
	Scene Description: {scene_description}
	Visual Elements: {", ".join(visual_elements)}

	STRICT CODE REQUIREMENTS:
	1. Header: MUST start with `from manim import *`
	2. Class Structure: Define a class inheriting from `MovingCameraScene` (use this instead of `Scene` to enable camera zoom/pan with `self.camera.frame`)
	3. Method: All logic must be inside the `def construct(self):` method
	4. SYNTAX: Ensure ALL parentheses, brackets, and function calls are properly closed
	5. Colors: Use ONLY valid Manim colors (WHITE, BLACK, RED, GREEN, BLUE, YELLOW, ORANGE, PINK, PURPLE, TEAL, GOLD, etc.)
	6. Text: Use `Text()` objects for strings
	7. Positioning: Use `.next_to()`, `.move_to()`, or `.shift()`
	8. Animations: Use Write(), Create(), FadeIn(), FadeOut(), Transform(), Flash(), Indicate() - capitalize properly!
	9. Pacing: Include `self.wait(1)` between animations

	OUTPUT FORMAT:
	Provide ONLY the complete, corrected Python code. No markdown blocks. No explanations.
	"""
	else:
	# First attempt - generate fresh code
	prompt = f"""
	You are an expert animation engineer using Manim Community Edition (v0.18.0+).
	Generate a complete, runnable Python script for the following request.

	Concept: {concept}
	Scene Description: {scene_description}
	Visual Elements: {", ".join(visual_elements)}

	STRICT CODE REQUIREMENTS:
	1. Header: MUST start with `from manim import *`
	2. Class Structure: Define a class inheriting from `MovingCameraScene` (e.g., `class GenScene(MovingCameraScene):`) - this enables camera operations like zoom/pan via `self.camera.frame`
	3. Method: All logic must be inside the `def construct(self):` method
	4. SYNTAX: Ensure ALL parentheses, brackets, and function calls are properly closed
	5. Colors: Use ONLY these valid Manim color constants:
	- Basic: WHITE, BLACK, GRAY, GREY, LIGHT_GRAY, DARK_GRAY
	- Primary: RED, GREEN, BLUE, YELLOW, ORANGE, PINK, PURPLE, TEAL, GOLD, MAROON
	- Variants: RED_A, RED_B, RED_C, RED_D, RED_E, GREEN_A, GREEN_B, GREEN_C, GREEN_D, GREEN_E,
	BLUE_A, BLUE_B, BLUE_C, BLUE_D, BLUE_E, YELLOW_A, YELLOW_B, YELLOW_C, YELLOW_D, YELLOW_E
	- NEVER use: DARK_GREEN, LIGHT_GREEN, DARK_BLUE, LIGHT_BLUE, DARK_RED, LIGHT_RED (these don't exist!)
	6. Text: Use `Text()` objects for strings. Avoid `Tex()` or `MathTex()` unless necessary
	7. Positioning: Use `.next_to()`, `.move_to()`, or `.shift()` to arrange elements
	8. Animations: Use ONLY these valid animations:
	- Write(), Create(), FadeIn(), FadeOut(), GrowFromCenter(), ShrinkToCenter()
	- Transform(), ReplacementTransform(), MoveToTarget(), ApplyMethod()
	- Rotate(), Indicate(), Flash(), ShowCreation() - DO NOT use lowercase like 'flash'
	- For custom effects use .animate.method() (e.g., obj.animate.scale(2), obj.animate.shift(UP))
	9. Pacing: Include `self.wait(1)` between major animation groups

	OUTPUT FORMAT:
	Provide ONLY the raw Python code. Do not wrap in markdown blocks (no ```python). Do not include conversational text.
	"""

	response = await wrapper.text_generation(
	model=selected_model,
	prompt=prompt,
	max_new_tokens=2048,
	temperature=0.3,
	)

	return CallToolResult(
	content=[
	TextContent(
	type="text",
	text=f"Generated Manim Code:\n\n```python\n{response}\n```",
	)
	]
	)

	except Exception as e:
	return CallToolResult(
	content=[
	TextContent(type="text", text=f"Code generation failed: {str(e)}")
	],
	isError=True,
	)


	async def analyze_frame(arguments: Dict[str, Any]) -> CallToolResult:
	"""Analyze an animation frame."""
	image_path = arguments["image_path"]
	analysis_type = arguments["analysis_type"]
	context = arguments.get("context", "")
	model = arguments.get("model")

	try:
	wrapper = CreativeTool.get_hf_wrapper()
	model_config = ModelConfig()
	selected_model = model or model_config.vision_models[0]

	with open(image_path, "rb") as f:
	image_bytes = f.read()

	prompt = f"""
	Analyze this {analysis_type} for an educational animation frame.
	Context: {context}

	Provide specific feedback on:
	{analysis_type.replace("_", " ").title()} assessment
	Educational effectiveness
	Visual clarity
	Suggestions for improvement
	"""

	response = await wrapper.vision_analysis(
	model=selected_model,
	image=image_bytes,
	text=prompt,
	)

	return CallToolResult(
	content=[
	TextContent(
	type="text",
	text=f"Frame Analysis ({analysis_type}):\n\n{response}",
	)
	]
	)

	except Exception as e:
	return CallToolResult(
	content=[TextContent(type="text", text=f"Frame analysis failed: {str(e)}")],
	isError=True,
	)


	async def generate_narration(arguments: Dict[str, Any]) -> CallToolResult:
	"""Generate narration script."""
	concept = arguments["concept"]
	scene_description = arguments["scene_description"]
	target_audience = arguments["target_audience"]
	duration = arguments.get("duration_seconds", 30)
	model = arguments.get("model")

	try:
	wrapper = CreativeTool.get_hf_wrapper()
	model_config = ModelConfig()
	selected_model = model or model_config.text_models[0]

	prompt = f"""
	Generate a narration script for an educational animation:

	Concept: {concept}
	Scene: {scene_description}
	Target Audience: {target_audience}
	Duration: {duration} seconds

	Requirements:
	1. Clear, engaging, and age-appropriate language
	2. Educational value aligned with learning objectives
	3. Natural speaking pace (approximately {duration / 150} words for {duration} seconds)
	4. Include pauses and emphasis markers where appropriate
	5. Make it interesting and memorable

	Format as a clean script ready for text-to-speech.
	"""

	response = await wrapper.text_generation(
	model=selected_model,
	prompt=prompt,
	max_new_tokens=512,
	temperature=0.6,
	)

	return CallToolResult(
	content=[
	TextContent(
	type="text",
	text=f"Narration Script:\n\n{response}",
	)
	]
	)

	except Exception as e:
	return CallToolResult(
	content=[
	TextContent(
	type="text",
	text=f"Narration generation failed: {str(e)}",
	)
	],
	isError=True,
	)


	async def generate_speech(arguments: Dict[str, Any]) -> CallToolResult:
	"""Convert text to speech."""
	text = arguments["text"]
	voice = arguments.get("voice")
	output_path = arguments["output_path"]
	model = arguments.get("model")

	try:
	wrapper = CreativeTool.get_hf_wrapper()
	model_config = ModelConfig()
	selected_model = model or model_config.tts_models[0]

	# Generate audio
	audio_bytes = await wrapper.text_to_speech(
	model=selected_model,
	text=text,
	voice=voice,
	)

	# Save to file
	success = await wrapper.save_audio_to_file(audio_bytes, output_path)

	if not success:
	raise Exception("Failed to save audio file")

	# Return audio info
	audio_info = {
	"output_path": output_path,
	"text_length": len(text),
	"estimated_duration": len(text) / 150, # Rough estimate
	"model_used": selected_model,
	}

	return CallToolResult(
	content=[
	TextContent(
	type="text",
	text=f"Speech generated successfully!\n\n{json.dumps(audio_info, indent=2)}",
	)
	]
	)

	except Exception as e:
	return CallToolResult(
	content=[
	TextContent(type="text", text=f"Speech generation failed: {str(e)}")
	],
	isError=True,
	)


	async def refine_animation(arguments: Dict[str, Any]) -> CallToolResult:
	"""Refine animation code based on feedback."""
	original_code = arguments["original_code"]
	feedback = arguments["feedback"]
	improvement_goals = arguments.get("improvement_goals", [])
	model = arguments.get("model")

	try:
	wrapper = CreativeTool.get_hf_wrapper()
	model_config = ModelConfig()
	selected_model = model or model_config.code_models[0]

	prompt = f"""
	You are a Manim Code Repair Agent. Your task is to rewrite the FULL Python script to fix issues or apply improvements.

	Previous Code:
	{original_code}

	User Feedback/Error:
	{feedback}

	Improvement Goals:
	{", ".join(improvement_goals)}

	INSTRUCTIONS:
	1. Output the COMPLETE corrected script, including `from manim import *`.
	2. Do not output diffs or partial snippets.
	3. Ensure the class inherits from `MovingCameraScene` and uses `def construct(self):`.
	4. Fix logic errors based on the feedback.
	5. Animations: Use ONLY valid animations like Write(), FadeIn(), FadeOut(), Create(), Flash(), Transform() - NEVER lowercase!
	6. Colors: Use ONLY these valid Manim color constants:
	- Basic: WHITE, BLACK, GRAY, GREY, LIGHT_GRAY, DARK_GRAY
	- Primary: RED, GREEN, BLUE, YELLOW, ORANGE, PINK, PURPLE, TEAL, GOLD, MAROON
	- Variants: RED_A, RED_B, RED_C, RED_D, RED_E, GREEN_A, GREEN_B, GREEN_C, GREEN_D, GREEN_E,
	BLUE_A, BLUE_B, BLUE_C, BLUE_D, BLUE_E, YELLOW_A, YELLOW_B, YELLOW_C, YELLOW_D, YELLOW_E
	- NEVER use: DARK_GREEN, LIGHT_GREEN, DARK_BLUE, LIGHT_BLUE, DARK_RED, LIGHT_RED (these don't exist!)
	- For darker/lighter variants, use the letter suffixes (e.g., GREEN_E for dark green, GREEN_A for light green).

	OUTPUT:
	Return ONLY the raw Python code. No markdown backticks. No explanation.
	"""

	response = await wrapper.text_generation(
	model=selected_model,
	prompt=prompt,
	max_new_tokens=2048,
	temperature=0.3,
	)

	return CallToolResult(
	content=[
	TextContent(
	type="text",
	text=f"Refined Manim Code:\n\n```python\n{response}\n```",
	)
	]
	)

	except Exception as e:
	return CallToolResult(
	content=[
	TextContent(type="text", text=f"Code refinement failed: {str(e)}")
	],
	isError=True,
	)


	async def generate_quiz(arguments: Dict[str, Any]) -> CallToolResult:
	"""Generate quiz questions."""
	concept = arguments["concept"]
	difficulty = arguments["difficulty"]
	num_questions = arguments["num_questions"]
	question_types = arguments.get("question_types", ["multiple_choice"])
	model = arguments.get("model")

	try:
	wrapper = CreativeTool.get_hf_wrapper()
	model_config = ModelConfig()
	selected_model = model or model_config.text_models[0]

	prompt = f"""
	Generate {num_questions} quiz questions for the following STEM concept:

	Concept: {concept}
	Difficulty: {difficulty}
	Question Types: {", ".join(question_types)}

	For each question provide:
	1. The question
	2. Possible answers (for multiple choice)
	3. Correct answer
	4. Brief explanation

	Format as JSON array of question objects.
	"""

	response = await wrapper.text_generation(
	model=selected_model,
	prompt=prompt,
	max_new_tokens=1024,
	temperature=0.5,
	)

	return CallToolResult(
	content=[
	TextContent(
	type="text",
	text=f"Generated Quiz Questions:\n\n{response}",
	)
	]
	)

	except Exception as e:
	return CallToolResult(
	content=[
	TextContent(type="text", text=f"Quiz generation failed: {str(e)}")
	],
	isError=True,
	)


	async def main():
	"""Main entry point for the creative MCP server."""
	# Set up logging
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	)

	async with stdio_server() as (read_stream, write_stream):
	await server.run(
	read_stream,
	write_stream,
	InitializationOptions(
	server_name="neuroanim-creative",
	server_version="0.1.0",
	capabilities=server.get_capabilities(
	notification_options=NotificationOptions(),
	experimental_capabilities={},
	),
	),
	)


	if __name__ == "__main__":
	asyncio.run(main())