""" Core Intelligence Logic - Avatar-based AI Tutor Engine """ from typing import Dict, Any, List from app.contracts import EngineRequest, EngineResponse, ErrorDetail from app.hf_client import HFClient from app.config import config import base64 class AvatarTutorEngine: """ Avatar-based AI Tutor Intelligence Engine Creates personalized AI tutor clones from user images and voice, then generates dynamic teaching videos with facial movements and lip-sync """ def __init__(self): self.hf_client = HFClient() self.engine_name = config.ENGINE_NAME async def run(self, request: EngineRequest) -> EngineResponse: """ Main execution method - creates avatar tutor Args: request: Standard EngineRequest Returns: Standard EngineResponse """ try: # Validate action if request.action != "create_avatar_tutor": return self._error_response( request, "INVALID_ACTION", f"Action '{request.action}' not supported. Use 'create_avatar_tutor'" ) # Extract required inputs image_ref = self._get_input_by_type(request, "image") audio_ref = self._get_input_by_type(request, "audio") course_content = self._get_course_content(request) # Validate required inputs if not image_ref: return self._error_response( request, "MISSING_IMAGE", "Image input is required for avatar creation" ) if not audio_ref: return self._error_response( request, "MISSING_AUDIO", "Voice audio sample is required for avatar creation" ) if not course_content: return self._error_response( request, "MISSING_CONTENT", "Course content is required for teaching script generation" ) # Get options temperature = request.options.get("temperature", 0.7) max_tokens = request.options.get("max_tokens", 2048) lesson_duration = request.options.get("lesson_duration", "5 minutes") # Step 1: Transcribe voice sample to understand characteristics voice_transcription = await self.hf_client.transcribe_audio( audio_url=audio_ref ) # Step 2: Generate personalized teaching script teaching_script = await self._generate_teaching_script( course_content=course_content, lesson_duration=lesson_duration, temperature=temperature, max_tokens=max_tokens, context=request.context ) # Step 3: Generate teaching audio using TTS teaching_audio = await self.hf_client.text_to_speech( text=teaching_script ) # Step 4: Generate talking head video with lip-sync avatar_video = await self.hf_client.generate_talking_head( image_url=image_ref, audio_data=teaching_audio ) # Build result result = { "avatar_video_url": avatar_video.get("video_url"), "avatar_video_data": avatar_video.get("video_data"), "teaching_script": teaching_script, "voice_sample_transcription": voice_transcription.get("text"), "lesson_duration": lesson_duration, "course_topic": course_content[:100] + "..." if len(course_content) > 100 else course_content, "avatar_features": { "facial_animations": True, "lip_sync": True, "natural_expressions": True, "head_movements": True } } # Build success response return EngineResponse( request_id=request.request_id, ok=True, status="success", engine=self.engine_name, action=request.action, result=result, messages=[ f"Avatar tutor created successfully", f"Generated {lesson_duration} teaching session", "Video includes dynamic facial movements and lip-sync" ], suggested_actions=[ "download_video", "generate_another_lesson", "customize_teaching_style" ] ) except Exception as e: return self._error_response( request, "ENGINE_ERROR", f"Failed to create avatar tutor: {str(e)}" ) def _get_input_by_type(self, request: EngineRequest, input_type: str) -> str: """Extract input reference by type""" for item in request.input.items: if item.type == input_type: return item.ref or item.text return None def _get_course_content(self, request: EngineRequest) -> str: """Extract course content from request""" # Try input.text first if request.input.text: return request.input.text.strip() # Try text items for item in request.input.items: if item.type == "text" and item.text: return item.text.strip() # Try context if "course_content" in request.context: return request.context["course_content"] return "" async def _generate_teaching_script( self, course_content: str, lesson_duration: str, temperature: float, max_tokens: int, context: Dict[str, Any] ) -> str: """ Generate personalized teaching script from course content Args: course_content: The content to teach lesson_duration: Desired lesson length temperature: LLM temperature max_tokens: Max tokens to generate context: Additional context Returns: Teaching script text """ # Build system prompt for teaching system_prompt = context.get( "system_prompt", "You are an expert educator creating engaging teaching scripts. " "Your scripts should be clear, conversational, and designed for video delivery. " "Include natural pauses, emphasis points, and engaging explanations." ) # Build user prompt user_prompt = f"""Create a {lesson_duration} teaching script for the following course content: {course_content} Requirements: - Make it conversational and engaging - Include clear explanations with examples - Add natural pauses for emphasis - Structure it for video delivery - Keep the tone friendly and encouraging - Focus on key concepts and practical understanding Generate the complete teaching script:""" messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ] response = await self.hf_client.chat_completion( messages=messages, temperature=temperature, max_tokens=max_tokens ) return response["choices"][0]["message"]["content"] def _error_response( self, request: EngineRequest, error_code: str, error_detail: str ) -> EngineResponse: """Build standardized error response""" return EngineResponse( request_id=request.request_id, ok=False, status="error", engine=self.engine_name, action=request.action, error=ErrorDetail(code=error_code, detail=error_detail) )