""" Creative MCP Server This MCP server provides tools for creative tasks using Hugging Face models: - Concept Planning (Text LLM) - Code Generation (Coder LLM) - Vision Analysis (Vision-Language LLM) - Text-to-Speech (Audio model) """ import asyncio import base64 import json import logging import os import sys import tempfile from pathlib import Path from typing import Any, Dict, List, Optional # Ensure project root (which contains the `utils` package) is on sys.path PROJECT_ROOT = Path(__file__).resolve().parent.parent if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) from mcp.server import NotificationOptions, Server from mcp.server.models import InitializationOptions from mcp.server.stdio import stdio_server from mcp.types import ( CallToolResult, ListToolsResult, TextContent, Tool, ) from utils.hf_wrapper import HFInferenceWrapper, ModelConfig, get_hf_wrapper logger = logging.getLogger(__name__) # Create MCP server server = Server("neuroanim-creative") # Global HF wrapper instance hf_wrapper: Optional[HFInferenceWrapper] = None class CreativeTool: """Base class for creative tools.""" @staticmethod def get_hf_wrapper() -> HFInferenceWrapper: """Get or create the HF wrapper instance.""" global hf_wrapper if hf_wrapper is None: api_key = os.getenv("HUGGINGFACE_API_KEY") hf_wrapper = get_hf_wrapper(api_key=api_key) return hf_wrapper @server.list_tools() async def list_tools() -> ListToolsResult: """List available creative tools.""" tools = [ Tool( name="plan_concept", description="Plan a STEM concept for animation using text LLM", inputSchema={ "type": "object", "properties": { "topic": { "type": "string", "description": "The STEM topic to create an animation for", }, "target_audience": { "type": "string", "enum": [ "elementary", "middle_school", "high_school", "college", "general", ], "description": "Target audience level", }, "animation_length_minutes": { "type": "number", "description": "Desired animation length in minutes", }, "model": { "type": "string", "description": "Hugging Face model to use (optional, will use default if not provided)", }, }, "required": ["topic", "target_audience"], }, ), Tool( name="generate_manim_code", description="Generate Manim Python code for an animation concept", inputSchema={ "type": "object", "properties": { "concept": { "type": "string", "description": "The animation concept description", }, "scene_description": { "type": "string", "description": "Detailed description of what should happen in the scene", }, "visual_elements": { "type": "array", "items": {"type": "string"}, "description": "List of visual elements to include", }, "model": { "type": "string", "description": "Code model to use (optional, will use default if not provided)", }, }, "required": ["concept", "scene_description"], }, ), Tool( name="analyze_frame", description="Analyze an animation frame using vision model for quality assessment", inputSchema={ "type": "object", "properties": { "image_path": { "type": "string", "description": "Path to the image file to analyze", }, "analysis_type": { "type": "string", "enum": [ "quality", "content", "educational_value", "clarity", ], "description": "Type of analysis to perform", }, "context": { "type": "string", "description": "Context about what should be in the image", }, "model": { "type": "string", "description": "Vision model to use (optional, will use default if not provided)", }, }, "required": ["image_path", "analysis_type"], }, ), Tool( name="generate_narration", description="Generate narration script for an animation", inputSchema={ "type": "object", "properties": { "concept": { "type": "string", "description": "The animation concept", }, "scene_description": { "type": "string", "description": "Description of the scene to narrate", }, "target_audience": { "type": "string", "enum": [ "elementary", "middle_school", "high_school", "college", "general", ], "description": "Target audience", }, "duration_seconds": { "type": "number", "description": "Desired narration duration in seconds", }, "model": { "type": "string", "description": "Text model to use (optional, will use default if not provided)", }, }, "required": ["concept", "scene_description", "target_audience"], }, ), Tool( name="generate_speech", description="Convert text narration to speech audio", inputSchema={ "type": "object", "properties": { "text": { "type": "string", "description": "Text to convert to speech", }, "voice": { "type": "string", "description": "Voice preference (optional)", }, "output_path": { "type": "string", "description": "Path to save the audio file", }, "model": { "type": "string", "description": "TTS model to use (optional, will use default if not provided)", }, }, "required": ["text", "output_path"], }, ), Tool( name="refine_animation", description="Refine and improve animation based on feedback", inputSchema={ "type": "object", "properties": { "original_code": { "type": "string", "description": "Original Manim code", }, "feedback": { "type": "string", "description": "Feedback or issues to address", }, "improvement_goals": { "type": "array", "items": {"type": "string"}, "description": "List of improvement goals", }, "model": { "type": "string", "description": "Code model to use (optional, will use default if not provided)", }, }, "required": ["original_code", "feedback"], }, ), Tool( name="generate_quiz", description="Generate quiz questions based on animation content", inputSchema={ "type": "object", "properties": { "concept": { "type": "string", "description": "The STEM concept covered in the animation", }, "difficulty": { "type": "string", "enum": ["easy", "medium", "hard"], "description": "Quiz difficulty level", }, "num_questions": { "type": "number", "description": "Number of questions to generate", }, "question_types": { "type": "array", "items": { "type": "string", "enum": ["multiple_choice", "true_false", "short_answer"], }, "description": "Types of questions to include", }, "model": { "type": "string", "description": "Text model to use (optional, will use default if not provided)", }, }, "required": ["concept", "difficulty", "num_questions"], }, ), ] return ListToolsResult(tools=tools) @server.call_tool() async def call_tool(tool_name: str, arguments: Dict[str, Any]) -> CallToolResult: """Dispatch creative tool calls. The low-level MCP server passes `(tool_name, arguments)` into this handler, so we accept two positional arguments rather than a `CallToolRequest` instance. """ try: if tool_name == "plan_concept": return await plan_concept(arguments) elif tool_name == "generate_manim_code": return await generate_manim_code(arguments) elif tool_name == "analyze_frame": return await analyze_frame(arguments) elif tool_name == "generate_narration": return await generate_narration(arguments) elif tool_name == "generate_speech": return await generate_speech(arguments) elif tool_name == "refine_animation": return await refine_animation(arguments) elif tool_name == "generate_quiz": return await generate_quiz(arguments) else: return CallToolResult( content=[TextContent(type="text", text=f"Unknown tool: {tool_name}")], isError=True, ) except Exception as e: logger.error(f"Error in tool {tool_name}: {e}") return CallToolResult( content=[TextContent(type="text", text=f"Error: {str(e)}")], isError=True, ) async def plan_concept(arguments: Dict[str, Any]) -> CallToolResult: """Plan a STEM concept for animation.""" topic = arguments["topic"] target_audience = arguments["target_audience"] animation_length = arguments.get("animation_length_minutes", 2.0) model = arguments.get("model") try: wrapper = CreativeTool.get_hf_wrapper() model_config = ModelConfig() selected_model = model or model_config.text_models[0] prompt = f""" You are a STEM Curriculum Designer. Create a structured animation plan. Topic: {topic} Audience: {target_audience} Length: {animation_length} min Return a valid JSON object with exactly these keys: {{ "learning_objectives": ["string", "string"], "visual_metaphors": ["string", "string"], "scene_flow": [ {{ "timestamp": "0:00-0:30", "action": "description of visual action", "voiceover": "key narration points" }} ], "estimated_educational_value": "string" }} Do not include markdown formatting like ```json. Return raw JSON only. """ response = await wrapper.text_generation( model=selected_model, prompt=prompt, max_new_tokens=1024, temperature=0.7, ) return CallToolResult( content=[ TextContent( type="text", text=f"Animation Concept Plan:\n\n{response}", ) ] ) except Exception as e: return CallToolResult( content=[ TextContent( type="text", text=f"Concept planning failed: {str(e)}", ) ], isError=True, ) async def generate_manim_code(arguments: Dict[str, Any]) -> CallToolResult: """Generate Manim Python code.""" concept = arguments["concept"] scene_description = arguments["scene_description"] visual_elements = arguments.get("visual_elements", []) model = arguments.get("model") previous_code = arguments.get("previous_code") error_message = arguments.get("error_message") try: wrapper = CreativeTool.get_hf_wrapper() model_config = ModelConfig() selected_model = model or model_config.code_models[0] # Build base prompt if previous_code and error_message: # This is a retry - include error feedback prompt = f""" You are an expert animation engineer using Manim Community Edition (v0.18.0+). The previous code attempt had an error. Your task is to FIX the code. PREVIOUS CODE: ```python {previous_code} ``` ERROR ENCOUNTERED: {error_message} TASK: Fix the error in the code above. Pay special attention to: - Closing all parentheses, brackets, and braces - Completing all function calls - Proper indentation - Valid Python syntax Concept: {concept} Scene Description: {scene_description} Visual Elements: {", ".join(visual_elements)} STRICT CODE REQUIREMENTS: 1. Header: MUST start with `from manim import *` 2. Class Structure: Define a class inheriting from `MovingCameraScene` (use this instead of `Scene` to enable camera zoom/pan with `self.camera.frame`) 3. Method: All logic must be inside the `def construct(self):` method 4. SYNTAX: Ensure ALL parentheses, brackets, and function calls are properly closed 5. Colors: Use ONLY valid Manim colors (WHITE, BLACK, RED, GREEN, BLUE, YELLOW, ORANGE, PINK, PURPLE, TEAL, GOLD, etc.) 6. Text: Use `Text()` objects for strings 7. Positioning: Use `.next_to()`, `.move_to()`, or `.shift()` 8. Animations: Use Write(), Create(), FadeIn(), FadeOut(), Transform(), Flash(), Indicate() - capitalize properly! 9. Pacing: Include `self.wait(1)` between animations OUTPUT FORMAT: Provide ONLY the complete, corrected Python code. No markdown blocks. No explanations. """ else: # First attempt - generate fresh code prompt = f""" You are an expert animation engineer using Manim Community Edition (v0.18.0+). Generate a complete, runnable Python script for the following request. Concept: {concept} Scene Description: {scene_description} Visual Elements: {", ".join(visual_elements)} STRICT CODE REQUIREMENTS: 1. Header: MUST start with `from manim import *` 2. Class Structure: Define a class inheriting from `MovingCameraScene` (e.g., `class GenScene(MovingCameraScene):`) - this enables camera operations like zoom/pan via `self.camera.frame` 3. Method: All logic must be inside the `def construct(self):` method 4. SYNTAX: Ensure ALL parentheses, brackets, and function calls are properly closed 5. Colors: Use ONLY these valid Manim color constants: - Basic: WHITE, BLACK, GRAY, GREY, LIGHT_GRAY, DARK_GRAY - Primary: RED, GREEN, BLUE, YELLOW, ORANGE, PINK, PURPLE, TEAL, GOLD, MAROON - Variants: RED_A, RED_B, RED_C, RED_D, RED_E, GREEN_A, GREEN_B, GREEN_C, GREEN_D, GREEN_E, BLUE_A, BLUE_B, BLUE_C, BLUE_D, BLUE_E, YELLOW_A, YELLOW_B, YELLOW_C, YELLOW_D, YELLOW_E - NEVER use: DARK_GREEN, LIGHT_GREEN, DARK_BLUE, LIGHT_BLUE, DARK_RED, LIGHT_RED (these don't exist!) 6. Text: Use `Text()` objects for strings. Avoid `Tex()` or `MathTex()` unless necessary 7. Positioning: Use `.next_to()`, `.move_to()`, or `.shift()` to arrange elements 8. Animations: Use ONLY these valid animations: - Write(), Create(), FadeIn(), FadeOut(), GrowFromCenter(), ShrinkToCenter() - Transform(), ReplacementTransform(), MoveToTarget(), ApplyMethod() - Rotate(), Indicate(), Flash(), ShowCreation() - DO NOT use lowercase like 'flash' - For custom effects use .animate.method() (e.g., obj.animate.scale(2), obj.animate.shift(UP)) 9. Pacing: Include `self.wait(1)` between major animation groups OUTPUT FORMAT: Provide ONLY the raw Python code. Do not wrap in markdown blocks (no ```python). Do not include conversational text. """ response = await wrapper.text_generation( model=selected_model, prompt=prompt, max_new_tokens=2048, temperature=0.3, ) return CallToolResult( content=[ TextContent( type="text", text=f"Generated Manim Code:\n\n```python\n{response}\n```", ) ] ) except Exception as e: return CallToolResult( content=[ TextContent(type="text", text=f"Code generation failed: {str(e)}") ], isError=True, ) async def analyze_frame(arguments: Dict[str, Any]) -> CallToolResult: """Analyze an animation frame.""" image_path = arguments["image_path"] analysis_type = arguments["analysis_type"] context = arguments.get("context", "") model = arguments.get("model") try: wrapper = CreativeTool.get_hf_wrapper() model_config = ModelConfig() selected_model = model or model_config.vision_models[0] with open(image_path, "rb") as f: image_bytes = f.read() prompt = f""" Analyze this {analysis_type} for an educational animation frame. Context: {context} Provide specific feedback on: {analysis_type.replace("_", " ").title()} assessment Educational effectiveness Visual clarity Suggestions for improvement """ response = await wrapper.vision_analysis( model=selected_model, image=image_bytes, text=prompt, ) return CallToolResult( content=[ TextContent( type="text", text=f"Frame Analysis ({analysis_type}):\n\n{response}", ) ] ) except Exception as e: return CallToolResult( content=[TextContent(type="text", text=f"Frame analysis failed: {str(e)}")], isError=True, ) async def generate_narration(arguments: Dict[str, Any]) -> CallToolResult: """Generate narration script.""" concept = arguments["concept"] scene_description = arguments["scene_description"] target_audience = arguments["target_audience"] duration = arguments.get("duration_seconds", 30) model = arguments.get("model") try: wrapper = CreativeTool.get_hf_wrapper() model_config = ModelConfig() selected_model = model or model_config.text_models[0] prompt = f""" Generate a narration script for an educational animation: Concept: {concept} Scene: {scene_description} Target Audience: {target_audience} Duration: {duration} seconds Requirements: 1. Clear, engaging, and age-appropriate language 2. Educational value aligned with learning objectives 3. Natural speaking pace (approximately {duration / 150} words for {duration} seconds) 4. Include pauses and emphasis markers where appropriate 5. Make it interesting and memorable Format as a clean script ready for text-to-speech. """ response = await wrapper.text_generation( model=selected_model, prompt=prompt, max_new_tokens=512, temperature=0.6, ) return CallToolResult( content=[ TextContent( type="text", text=f"Narration Script:\n\n{response}", ) ] ) except Exception as e: return CallToolResult( content=[ TextContent( type="text", text=f"Narration generation failed: {str(e)}", ) ], isError=True, ) async def generate_speech(arguments: Dict[str, Any]) -> CallToolResult: """Convert text to speech.""" text = arguments["text"] voice = arguments.get("voice") output_path = arguments["output_path"] model = arguments.get("model") try: wrapper = CreativeTool.get_hf_wrapper() model_config = ModelConfig() selected_model = model or model_config.tts_models[0] # Generate audio audio_bytes = await wrapper.text_to_speech( model=selected_model, text=text, voice=voice, ) # Save to file success = await wrapper.save_audio_to_file(audio_bytes, output_path) if not success: raise Exception("Failed to save audio file") # Return audio info audio_info = { "output_path": output_path, "text_length": len(text), "estimated_duration": len(text) / 150, # Rough estimate "model_used": selected_model, } return CallToolResult( content=[ TextContent( type="text", text=f"Speech generated successfully!\n\n{json.dumps(audio_info, indent=2)}", ) ] ) except Exception as e: return CallToolResult( content=[ TextContent(type="text", text=f"Speech generation failed: {str(e)}") ], isError=True, ) async def refine_animation(arguments: Dict[str, Any]) -> CallToolResult: """Refine animation code based on feedback.""" original_code = arguments["original_code"] feedback = arguments["feedback"] improvement_goals = arguments.get("improvement_goals", []) model = arguments.get("model") try: wrapper = CreativeTool.get_hf_wrapper() model_config = ModelConfig() selected_model = model or model_config.code_models[0] prompt = f""" You are a Manim Code Repair Agent. Your task is to rewrite the FULL Python script to fix issues or apply improvements. Previous Code: {original_code} User Feedback/Error: {feedback} Improvement Goals: {", ".join(improvement_goals)} INSTRUCTIONS: 1. Output the COMPLETE corrected script, including `from manim import *`. 2. Do not output diffs or partial snippets. 3. Ensure the class inherits from `MovingCameraScene` and uses `def construct(self):`. 4. Fix logic errors based on the feedback. 5. Animations: Use ONLY valid animations like Write(), FadeIn(), FadeOut(), Create(), Flash(), Transform() - NEVER lowercase! 6. Colors: Use ONLY these valid Manim color constants: - Basic: WHITE, BLACK, GRAY, GREY, LIGHT_GRAY, DARK_GRAY - Primary: RED, GREEN, BLUE, YELLOW, ORANGE, PINK, PURPLE, TEAL, GOLD, MAROON - Variants: RED_A, RED_B, RED_C, RED_D, RED_E, GREEN_A, GREEN_B, GREEN_C, GREEN_D, GREEN_E, BLUE_A, BLUE_B, BLUE_C, BLUE_D, BLUE_E, YELLOW_A, YELLOW_B, YELLOW_C, YELLOW_D, YELLOW_E - NEVER use: DARK_GREEN, LIGHT_GREEN, DARK_BLUE, LIGHT_BLUE, DARK_RED, LIGHT_RED (these don't exist!) - For darker/lighter variants, use the letter suffixes (e.g., GREEN_E for dark green, GREEN_A for light green). OUTPUT: Return ONLY the raw Python code. No markdown backticks. No explanation. """ response = await wrapper.text_generation( model=selected_model, prompt=prompt, max_new_tokens=2048, temperature=0.3, ) return CallToolResult( content=[ TextContent( type="text", text=f"Refined Manim Code:\n\n```python\n{response}\n```", ) ] ) except Exception as e: return CallToolResult( content=[ TextContent(type="text", text=f"Code refinement failed: {str(e)}") ], isError=True, ) async def generate_quiz(arguments: Dict[str, Any]) -> CallToolResult: """Generate quiz questions.""" concept = arguments["concept"] difficulty = arguments["difficulty"] num_questions = arguments["num_questions"] question_types = arguments.get("question_types", ["multiple_choice"]) model = arguments.get("model") try: wrapper = CreativeTool.get_hf_wrapper() model_config = ModelConfig() selected_model = model or model_config.text_models[0] prompt = f""" Generate {num_questions} quiz questions for the following STEM concept: Concept: {concept} Difficulty: {difficulty} Question Types: {", ".join(question_types)} For each question provide: 1. The question 2. Possible answers (for multiple choice) 3. Correct answer 4. Brief explanation Format as JSON array of question objects. """ response = await wrapper.text_generation( model=selected_model, prompt=prompt, max_new_tokens=1024, temperature=0.5, ) return CallToolResult( content=[ TextContent( type="text", text=f"Generated Quiz Questions:\n\n{response}", ) ] ) except Exception as e: return CallToolResult( content=[ TextContent(type="text", text=f"Quiz generation failed: {str(e)}") ], isError=True, ) async def main(): """Main entry point for the creative MCP server.""" # Set up logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) async with stdio_server() as (read_stream, write_stream): await server.run( read_stream, write_stream, InitializationOptions( server_name="neuroanim-creative", server_version="0.1.0", capabilities=server.get_capabilities( notification_options=NotificationOptions(), experimental_capabilities={}, ), ), ) if __name__ == "__main__": asyncio.run(main())