at-engine / engine.py
Godswill-IoT's picture
Upload 6 files
bb8a136 verified
"""
Core Intelligence Logic - Avatar-based AI Tutor Engine
"""
from typing import Dict, Any, List
from app.contracts import EngineRequest, EngineResponse, ErrorDetail
from app.hf_client import HFClient
from app.config import config
import base64
class AvatarTutorEngine:
"""
Avatar-based AI Tutor Intelligence Engine
Creates personalized AI tutor clones from user images and voice,
then generates dynamic teaching videos with facial movements and lip-sync
"""
def __init__(self):
self.hf_client = HFClient()
self.engine_name = config.ENGINE_NAME
async def run(self, request: EngineRequest) -> EngineResponse:
"""
Main execution method - creates avatar tutor
Args:
request: Standard EngineRequest
Returns:
Standard EngineResponse
"""
try:
# Validate action
if request.action != "create_avatar_tutor":
return self._error_response(
request,
"INVALID_ACTION",
f"Action '{request.action}' not supported. Use 'create_avatar_tutor'"
)
# Extract required inputs
image_ref = self._get_input_by_type(request, "image")
audio_ref = self._get_input_by_type(request, "audio")
course_content = self._get_course_content(request)
# Validate required inputs
if not image_ref:
return self._error_response(
request,
"MISSING_IMAGE",
"Image input is required for avatar creation"
)
if not audio_ref:
return self._error_response(
request,
"MISSING_AUDIO",
"Voice audio sample is required for avatar creation"
)
if not course_content:
return self._error_response(
request,
"MISSING_CONTENT",
"Course content is required for teaching script generation"
)
# Get options
temperature = request.options.get("temperature", 0.7)
max_tokens = request.options.get("max_tokens", 2048)
lesson_duration = request.options.get("lesson_duration", "5 minutes")
# Step 1: Transcribe voice sample to understand characteristics
voice_transcription = await self.hf_client.transcribe_audio(
audio_url=audio_ref
)
# Step 2: Generate personalized teaching script
teaching_script = await self._generate_teaching_script(
course_content=course_content,
lesson_duration=lesson_duration,
temperature=temperature,
max_tokens=max_tokens,
context=request.context
)
# Step 3: Generate teaching audio using TTS
teaching_audio = await self.hf_client.text_to_speech(
text=teaching_script
)
# Step 4: Generate talking head video with lip-sync
avatar_video = await self.hf_client.generate_talking_head(
image_url=image_ref,
audio_data=teaching_audio
)
# Build result
result = {
"avatar_video_url": avatar_video.get("video_url"),
"avatar_video_data": avatar_video.get("video_data"),
"teaching_script": teaching_script,
"voice_sample_transcription": voice_transcription.get("text"),
"lesson_duration": lesson_duration,
"course_topic": course_content[:100] + "..." if len(course_content) > 100 else course_content,
"avatar_features": {
"facial_animations": True,
"lip_sync": True,
"natural_expressions": True,
"head_movements": True
}
}
# Build success response
return EngineResponse(
request_id=request.request_id,
ok=True,
status="success",
engine=self.engine_name,
action=request.action,
result=result,
messages=[
f"Avatar tutor created successfully",
f"Generated {lesson_duration} teaching session",
"Video includes dynamic facial movements and lip-sync"
],
suggested_actions=[
"download_video",
"generate_another_lesson",
"customize_teaching_style"
]
)
except Exception as e:
return self._error_response(
request,
"ENGINE_ERROR",
f"Failed to create avatar tutor: {str(e)}"
)
def _get_input_by_type(self, request: EngineRequest, input_type: str) -> str:
"""Extract input reference by type"""
for item in request.input.items:
if item.type == input_type:
return item.ref or item.text
return None
def _get_course_content(self, request: EngineRequest) -> str:
"""Extract course content from request"""
# Try input.text first
if request.input.text:
return request.input.text.strip()
# Try text items
for item in request.input.items:
if item.type == "text" and item.text:
return item.text.strip()
# Try context
if "course_content" in request.context:
return request.context["course_content"]
return ""
async def _generate_teaching_script(
self,
course_content: str,
lesson_duration: str,
temperature: float,
max_tokens: int,
context: Dict[str, Any]
) -> str:
"""
Generate personalized teaching script from course content
Args:
course_content: The content to teach
lesson_duration: Desired lesson length
temperature: LLM temperature
max_tokens: Max tokens to generate
context: Additional context
Returns:
Teaching script text
"""
# Build system prompt for teaching
system_prompt = context.get(
"system_prompt",
"You are an expert educator creating engaging teaching scripts. "
"Your scripts should be clear, conversational, and designed for video delivery. "
"Include natural pauses, emphasis points, and engaging explanations."
)
# Build user prompt
user_prompt = f"""Create a {lesson_duration} teaching script for the following course content:
{course_content}
Requirements:
- Make it conversational and engaging
- Include clear explanations with examples
- Add natural pauses for emphasis
- Structure it for video delivery
- Keep the tone friendly and encouraging
- Focus on key concepts and practical understanding
Generate the complete teaching script:"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
response = await self.hf_client.chat_completion(
messages=messages,
temperature=temperature,
max_tokens=max_tokens
)
return response["choices"][0]["message"]["content"]
def _error_response(
self,
request: EngineRequest,
error_code: str,
error_detail: str
) -> EngineResponse:
"""Build standardized error response"""
return EngineResponse(
request_id=request.request_id,
ok=False,
status="error",
engine=self.engine_name,
action=request.action,
error=ErrorDetail(code=error_code, detail=error_detail)
)