File size: 8,587 Bytes
bb8a136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
"""

Core Intelligence Logic - Avatar-based AI Tutor Engine

"""
from typing import Dict, Any, List
from app.contracts import EngineRequest, EngineResponse, ErrorDetail
from app.hf_client import HFClient
from app.config import config
import base64


class AvatarTutorEngine:
    """

    Avatar-based AI Tutor Intelligence Engine

    

    Creates personalized AI tutor clones from user images and voice,

    then generates dynamic teaching videos with facial movements and lip-sync

    """
    
    def __init__(self):
        self.hf_client = HFClient()
        self.engine_name = config.ENGINE_NAME
    
    async def run(self, request: EngineRequest) -> EngineResponse:
        """

        Main execution method - creates avatar tutor

        

        Args:

            request: Standard EngineRequest

            

        Returns:

            Standard EngineResponse

        """
        try:
            # Validate action
            if request.action != "create_avatar_tutor":
                return self._error_response(
                    request,
                    "INVALID_ACTION",
                    f"Action '{request.action}' not supported. Use 'create_avatar_tutor'"
                )
            
            # Extract required inputs
            image_ref = self._get_input_by_type(request, "image")
            audio_ref = self._get_input_by_type(request, "audio")
            course_content = self._get_course_content(request)
            
            # Validate required inputs
            if not image_ref:
                return self._error_response(
                    request,
                    "MISSING_IMAGE",
                    "Image input is required for avatar creation"
                )
            
            if not audio_ref:
                return self._error_response(
                    request,
                    "MISSING_AUDIO",
                    "Voice audio sample is required for avatar creation"
                )
            
            if not course_content:
                return self._error_response(
                    request,
                    "MISSING_CONTENT",
                    "Course content is required for teaching script generation"
                )
            
            # Get options
            temperature = request.options.get("temperature", 0.7)
            max_tokens = request.options.get("max_tokens", 2048)
            lesson_duration = request.options.get("lesson_duration", "5 minutes")
            
            # Step 1: Transcribe voice sample to understand characteristics
            voice_transcription = await self.hf_client.transcribe_audio(
                audio_url=audio_ref
            )
            
            # Step 2: Generate personalized teaching script
            teaching_script = await self._generate_teaching_script(
                course_content=course_content,
                lesson_duration=lesson_duration,
                temperature=temperature,
                max_tokens=max_tokens,
                context=request.context
            )
            
            # Step 3: Generate teaching audio using TTS
            teaching_audio = await self.hf_client.text_to_speech(
                text=teaching_script
            )
            
            # Step 4: Generate talking head video with lip-sync
            avatar_video = await self.hf_client.generate_talking_head(
                image_url=image_ref,
                audio_data=teaching_audio
            )
            
            # Build result
            result = {
                "avatar_video_url": avatar_video.get("video_url"),
                "avatar_video_data": avatar_video.get("video_data"),
                "teaching_script": teaching_script,
                "voice_sample_transcription": voice_transcription.get("text"),
                "lesson_duration": lesson_duration,
                "course_topic": course_content[:100] + "..." if len(course_content) > 100 else course_content,
                "avatar_features": {
                    "facial_animations": True,
                    "lip_sync": True,
                    "natural_expressions": True,
                    "head_movements": True
                }
            }
            
            # Build success response
            return EngineResponse(
                request_id=request.request_id,
                ok=True,
                status="success",
                engine=self.engine_name,
                action=request.action,
                result=result,
                messages=[
                    f"Avatar tutor created successfully",
                    f"Generated {lesson_duration} teaching session",
                    "Video includes dynamic facial movements and lip-sync"
                ],
                suggested_actions=[
                    "download_video",
                    "generate_another_lesson",
                    "customize_teaching_style"
                ]
            )
            
        except Exception as e:
            return self._error_response(
                request,
                "ENGINE_ERROR",
                f"Failed to create avatar tutor: {str(e)}"
            )
    
    def _get_input_by_type(self, request: EngineRequest, input_type: str) -> str:
        """Extract input reference by type"""
        for item in request.input.items:
            if item.type == input_type:
                return item.ref or item.text
        return None
    
    def _get_course_content(self, request: EngineRequest) -> str:
        """Extract course content from request"""
        # Try input.text first
        if request.input.text:
            return request.input.text.strip()
        
        # Try text items
        for item in request.input.items:
            if item.type == "text" and item.text:
                return item.text.strip()
        
        # Try context
        if "course_content" in request.context:
            return request.context["course_content"]
        
        return ""
    
    async def _generate_teaching_script(

        self,

        course_content: str,

        lesson_duration: str,

        temperature: float,

        max_tokens: int,

        context: Dict[str, Any]

    ) -> str:
        """

        Generate personalized teaching script from course content

        

        Args:

            course_content: The content to teach

            lesson_duration: Desired lesson length

            temperature: LLM temperature

            max_tokens: Max tokens to generate

            context: Additional context

            

        Returns:

            Teaching script text

        """
        # Build system prompt for teaching
        system_prompt = context.get(
            "system_prompt",
            "You are an expert educator creating engaging teaching scripts. "
            "Your scripts should be clear, conversational, and designed for video delivery. "
            "Include natural pauses, emphasis points, and engaging explanations."
        )
        
        # Build user prompt
        user_prompt = f"""Create a {lesson_duration} teaching script for the following course content:



{course_content}



Requirements:

- Make it conversational and engaging

- Include clear explanations with examples

- Add natural pauses for emphasis

- Structure it for video delivery

- Keep the tone friendly and encouraging

- Focus on key concepts and practical understanding



Generate the complete teaching script:"""
        
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
        
        response = await self.hf_client.chat_completion(
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens
        )
        
        return response["choices"][0]["message"]["content"]
    
    def _error_response(

        self,

        request: EngineRequest,

        error_code: str,

        error_detail: str

    ) -> EngineResponse:
        """Build standardized error response"""
        return EngineResponse(
            request_id=request.request_id,
            ok=False,
            status="error",
            engine=self.engine_name,
            action=request.action,
            error=ErrorDetail(code=error_code, detail=error_detail)
        )