rairo commited on
Commit
7802371
·
verified ·
1 Parent(s): dfd07e7

Update lesson_gen.py

Browse files
Files changed (1) hide show
  1. lesson_gen.py +178 -162
lesson_gen.py CHANGED
@@ -9,51 +9,69 @@ import re
9
  from pathlib import Path
10
  import numpy as np
11
  import requests
 
 
 
12
 
13
  # LangChain for data sourcing
14
  from langchain_community.document_loaders import ArxivLoader
15
-
16
  # Google Gemini
17
  from langchain_google_genai import ChatGoogleGenerativeAI
18
 
19
- # Video, Audio, and Animation
20
- from moviepy.editor import *
21
  from PIL import Image, ImageDraw, ImageFont
22
  import matplotlib
23
  matplotlib.use('Agg') # Use non-interactive backend
24
  import matplotlib.pyplot as plt
25
  from matplotlib.animation import FuncAnimation, FFMpegWriter
26
 
27
- # --- Configuration ---
28
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(funcName)s] - %(message)s')
29
  FPS, WIDTH, HEIGHT = 24, 1280, 720
 
30
 
31
- # --- Helper Functions ---
32
  def deepgram_tts(txt: str, voice_model: str = 'aura-2-andromeda-en'):
33
- """Calls the Deepgram API to convert text to speech."""
34
  DG_KEY = os.getenv("DEEPGRAM_API_KEY")
35
  if not DG_KEY or not txt: return None
 
36
  try:
37
- r = requests.post(
38
- "https://api.deepgram.com/v1/speak",
39
- params={"model": voice_model},
40
- headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"},
41
- json={"text": txt},
42
- timeout=45
43
- )
44
  r.raise_for_status()
45
  return r.content
46
  except Exception as e:
47
  logging.error(f"Deepgram TTS failed: {e}")
48
  return None
49
 
50
- # --- AI & Content Generation ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def get_llm():
52
- """Initializes and returns the Gemini 2.5 Flash LLM."""
53
  return ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.5)
54
 
55
  def fetch_arxiv_papers(topic: str, count=3):
56
- """Fetches recent paper abstracts from arXiv related to a topic."""
57
  logging.info(f"Fetching {count} arXiv papers for topic: '{topic}'")
58
  try:
59
  loader = ArxivLoader(query=topic, load_max_docs=count, load_all_available_meta=True)
@@ -65,189 +83,187 @@ def fetch_arxiv_papers(topic: str, count=3):
65
  return []
66
 
67
  def generate_knowledge_base(topic: str, level: str, goal: str, arxiv_docs: list):
68
- """Synthesizes arXiv papers into a structured Knowledge Base for the course."""
69
  logging.info(f"Generating Knowledge Base for topic: {topic}")
70
  llm = get_llm()
71
-
72
  papers_context = "\n\n".join([f"Title: {doc.metadata.get('Title', 'N/A')}\nAbstract: {doc.page_content}" for doc in arxiv_docs])
73
-
74
- prompt = f"""
75
- You are an expert curriculum designer specializing in AI. Your task is to create a structured Knowledge Base for a personalized course on the topic: "{topic}".
76
-
77
- The learner's details are:
78
- - Skill Level: {level}
79
- - Learning Goal: {goal}
80
-
81
- Synthesize the following cutting-edge research from arXiv to create the course foundation:
82
- ---
83
- {papers_context}
84
- ---
85
-
86
- Based on the user's goal and level, and the provided research, generate a JSON object with the following structure:
87
- 1. "topic": The main topic.
88
- 2. "introduction": A brief, engaging introduction tailored to the learner's level.
89
- 3. "learning_path": An array of 5-7 key concepts that form the course outline. Each concept should be a string. Example: ["Introduction to Transformers", "The Attention Mechanism", "BERT and its Variants"].
90
- 4. "detailed_concepts": A dictionary where each key is a concept from the "learning_path" and the value is a detailed explanation (2-3 paragraphs) suitable for the learner's level.
91
-
92
- Return ONLY the valid JSON object, with no markdown formatting.
93
- """
94
  try:
95
- response = llm.invoke(prompt).content.strip()
96
- if response.startswith("```json"): response = response[7:-3]
97
- knowledge_base = json.loads(response)
98
- logging.info("Successfully generated Knowledge Base.")
99
- return knowledge_base
100
- except Exception as e:
101
- logging.error(f"Failed to generate Knowledge Base: {e}")
102
- raise
103
 
104
  def generate_lesson_from_knowledge_base(knowledge_base: dict, concept_to_cover: str):
105
- """Generates a script and quiz for a lesson, strategically inserting animation tags."""
106
  logging.info(f"Generating lesson for concept: '{concept_to_cover}'")
107
  llm = get_llm()
108
  concept_details = knowledge_base.get("detailed_concepts", {}).get(concept_to_cover, "")
109
-
110
- available_animations = ["Linear Regression"]
111
  animation_instruction = ""
112
- if concept_to_cover in available_animations:
113
- animation_tag = concept_to_cover.lower().replace(" ", "_")
114
- animation_instruction = f'When explaining the core mechanism of {concept_to_cover}, you MUST insert the tag `<animate_matplotlib: "{animation_tag}">` in the script. This is crucial for visualization.'
115
-
116
- prompt = f"""
117
- You are ProfAI, an engaging AI professor creating a lesson on "{concept_to_cover}".
118
- Detailed information:
119
- ---
120
- {concept_details}
121
- ---
122
- {animation_instruction}
123
-
124
- Generate a JSON object with "script" (a 60-90 second video script) and "quiz" (3 multiple-choice questions).
125
- The script should be conversational and easy to understand.
126
- Return ONLY the valid JSON object.
127
- """
128
  try:
129
- response = llm.invoke(prompt).content.strip()
130
- if response.startswith("```json"): response = response[7:-3]
131
  return json.loads(response)
132
- except Exception as e:
133
- logging.error(f"Failed to generate lesson content: {e}")
134
- raise
135
 
136
  def generate_remedial_lesson(failed_concept: str):
137
- """Generates a short, focused remedial lesson."""
138
  logging.info(f"Generating remedial lesson for concept: '{failed_concept}'")
139
  llm = get_llm()
140
- prompt = f"""
141
- You are ProfAI. A student struggled to understand the concept of "{failed_concept}".
142
- Your task is to create a short, remedial micro-lesson to help them.
143
-
144
- Generate a JSON object with two keys:
145
- 1. "script": A very simple, concise script (30-45 seconds) explaining "{failed_concept}" with a different analogy or a simpler approach.
146
- 2. "quiz": An array with ONE multiple-choice question to confirm their understanding.
147
-
148
- Return ONLY the valid JSON object.
149
- """
150
  try:
151
- response = llm.invoke(prompt).content.strip()
152
- if response.startswith("```json"): response = response[7:-3]
153
  return json.loads(response)
154
- except Exception as e:
155
- logging.error(f"Failed to generate remedial lesson: {e}")
156
- raise
157
 
158
- # --- Animation & Video Generation ---
159
- def animate_linear_regression(duration, temp_dir):
160
- """Generates a Matplotlib animation of Linear Regression."""
161
  logging.info("Generating Matplotlib animation for Linear Regression.")
162
- fig, ax = plt.subplots(figsize=(WIDTH/100, HEIGHT/100))
163
  np.random.seed(42)
164
- X = 2 * np.random.rand(100, 1)
165
- y = 4 + 3 * X + np.random.randn(100, 1)
166
  ax.scatter(X, y, alpha=0.6, label='Data Points')
167
-
168
  line, = ax.plot([], [], 'r-', lw=3, label='Regression Line')
169
- ax.set_xlim(0, 2)
170
- ax.set_ylim(0, 15)
171
- ax.set_title("Linear Regression: Finding the Best Fit Line", fontsize=20)
172
- ax.set_xlabel("Feature (X)", fontsize=14)
173
- ax.set_ylabel("Target (y)", fontsize=14)
174
- ax.grid(True, linestyle='--', alpha=0.6)
175
- ax.legend()
176
- plt.tight_layout()
177
-
178
- def init():
179
- line.set_data([], [])
180
- return line,
181
-
182
  def update(frame):
183
- # Animate the line converging to the best fit
184
- # This is a simplified animation for demonstration
185
  progress = frame / (duration * FPS)
186
- slope = 3 * progress
187
- intercept = 4
188
- x_vals = np.array([0, 2])
189
- y_vals = intercept + slope * x_vals
190
- line.set_data(x_vals, y_vals)
191
- return line,
192
-
193
  anim = FuncAnimation(fig, update, frames=int(duration * FPS), init_func=init, blit=True)
194
- output_path = temp_dir / f"anim_{uuid.uuid4().hex}.mp4"
195
  anim.save(str(output_path), writer=FFMpegWriter(fps=FPS))
196
  plt.close(fig)
197
- logging.info(f"Matplotlib animation saved to {output_path}")
198
- return VideoFileClip(str(output_path))
199
 
200
- def generate_matplotlib_animation(concept_tag: str, duration: float, temp_dir: Path):
201
- """Router to generate the correct Matplotlib animation based on a tag."""
202
  if concept_tag == "linear_regression":
203
- return animate_linear_regression(duration, temp_dir)
204
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
- def create_lesson_video(script: str, narration_audio_bytes: bytes):
207
- """Creates a complete lesson video, incorporating Matplotlib animations if tagged."""
208
- logging.info("Starting comprehensive video generation.")
 
 
 
 
 
209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  with tempfile.TemporaryDirectory() as temp_dir_str:
211
  temp_dir = Path(temp_dir_str)
212
- audio_path = temp_dir / "narration.mp3"
213
- audio_path.write_bytes(narration_audio_bytes)
214
- audio_clip = AudioFileClip(str(audio_path))
215
- total_duration = audio_clip.duration
216
-
217
- tag_pattern = r'(<animate_matplotlib: "([^"]+)">)'
218
- script_parts = re.split(tag_pattern, script)
219
-
220
- text_segments = [s for s in script_parts[::3] if s.strip()]
221
- tags = script_parts[2::3]
222
 
223
- final_clips = []
224
- running_time = 0
225
-
226
- # This allocation is simplified; a more robust method might time the audio parts.
227
- total_text_chars = sum(len(s) for s in text_segments)
228
- time_per_char = total_duration / total_text_chars if total_text_chars > 0 else 0
229
-
230
- # Create clips for each segment
231
- for i, text_part in enumerate(text_segments):
232
- part_duration = len(text_part) * time_per_char
233
- txt_clip = TextClip(text_part.strip(), fontsize=40, color='white', font='Arial-Bold', size=(WIDTH*0.8, None), method='caption').set_duration(part_duration)
234
- final_clips.append(txt_clip.set_start(running_time).set_position('center'))
235
- running_time += part_duration
 
 
 
 
236
 
237
- if i < len(tags):
238
- anim_duration = 7 # Fixed duration for matplotlib animations
239
- anim_clip = generate_matplotlib_animation(tags[i], anim_duration, temp_dir)
240
- if anim_clip:
241
- final_clips.append(anim_clip.set_duration(anim_duration).set_start(running_time).set_position('center'))
242
- running_time += anim_duration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
- final_duration = running_time
245
- bg_clip = ColorClip(size=(WIDTH, HEIGHT), color=(20, 20, 40)).set_duration(final_duration)
 
 
 
 
246
 
247
- final_video = CompositeVideoClip([bg_clip] + final_clips)
248
- final_video = final_video.set_audio(audio_clip.set_duration(final_duration))
249
 
250
- output_path = temp_dir / "final_video.mp4"
251
- final_video.write_videofile(str(output_path), codec='libx264', fps=FPS, threads=4, logger='bar')
252
 
253
- return Path(output_path).read_bytes()
 
9
  from pathlib import Path
10
  import numpy as np
11
  import requests
12
+ import subprocess
13
+ import shutil
14
+ import cv2
15
 
16
  # LangChain for data sourcing
17
  from langchain_community.document_loaders import ArxivLoader
 
18
  # Google Gemini
19
  from langchain_google_genai import ChatGoogleGenerativeAI
20
 
21
+ # Video, Audio, and Animation (using robust tools)
 
22
  from PIL import Image, ImageDraw, ImageFont
23
  import matplotlib
24
  matplotlib.use('Agg') # Use non-interactive backend
25
  import matplotlib.pyplot as plt
26
  from matplotlib.animation import FuncAnimation, FFMpegWriter
27
 
28
+ # --- 1. CONFIGURATION ---
29
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(funcName)s] - %(message)s')
30
  FPS, WIDTH, HEIGHT = 24, 1280, 720
31
+ PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
32
 
33
+ # --- 2. HELPER & UTILITY FUNCTIONS (Adapted from Sozo) ---
34
  def deepgram_tts(txt: str, voice_model: str = 'aura-2-andromeda-en'):
 
35
  DG_KEY = os.getenv("DEEPGRAM_API_KEY")
36
  if not DG_KEY or not txt: return None
37
+ clean_txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)
38
  try:
39
+ r = requests.post("https://api.deepgram.com/v1/speak", params={"model": voice_model}, headers={"Authorization": f"Token {DG_KEY}"}, json={"text": clean_txt}, timeout=45)
 
 
 
 
 
 
40
  r.raise_for_status()
41
  return r.content
42
  except Exception as e:
43
  logging.error(f"Deepgram TTS failed: {e}")
44
  return None
45
 
46
+ def audio_duration(path: str) -> float:
47
+ try:
48
+ res = subprocess.run(["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=nw=1:nk=1", path], text=True, capture_output=True, check=True)
49
+ return float(res.stdout.strip())
50
+ except Exception: return 5.0
51
+
52
+ def generate_silence_mp3(duration: float, out: Path):
53
+ subprocess.run(["ffmpeg", "-y", "-f", "lavfi", "-i", f"anullsrc=r=44100:cl=mono", "-t", f"{duration:.3f}", "-q:a", "9", str(out)], check=True, capture_output=True)
54
+
55
+ def concat_media(file_paths: list, output_path: Path):
56
+ valid_paths = [p for p in file_paths if Path(p).exists() and Path(p).stat().st_size > 100]
57
+ if not valid_paths: raise ValueError("No valid media files to concatenate.")
58
+ if len(valid_paths) == 1:
59
+ shutil.copy2(valid_paths[0], str(output_path))
60
+ return
61
+ list_file = output_path.with_suffix(".txt")
62
+ with open(list_file, 'w') as f:
63
+ for path in valid_paths: f.write(f"file '{Path(path).resolve()}'\n")
64
+ cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(list_file), "-c", "copy", str(output_path)]
65
+ try:
66
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
67
+ finally:
68
+ list_file.unlink(missing_ok=True)
69
+
70
+ # --- 3. AI & CONTENT GENERATION ---
71
  def get_llm():
 
72
  return ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.5)
73
 
74
  def fetch_arxiv_papers(topic: str, count=3):
 
75
  logging.info(f"Fetching {count} arXiv papers for topic: '{topic}'")
76
  try:
77
  loader = ArxivLoader(query=topic, load_max_docs=count, load_all_available_meta=True)
 
83
  return []
84
 
85
  def generate_knowledge_base(topic: str, level: str, goal: str, arxiv_docs: list):
86
+ # This function remains solid, no changes needed.
87
  logging.info(f"Generating Knowledge Base for topic: {topic}")
88
  llm = get_llm()
 
89
  papers_context = "\n\n".join([f"Title: {doc.metadata.get('Title', 'N/A')}\nAbstract: {doc.page_content}" for doc in arxiv_docs])
90
+ prompt = f"""You are an expert curriculum designer. Create a structured Knowledge Base for a personalized course on "{topic}". The learner's level is {level} and their goal is {goal}. Synthesize the following research: --- {papers_context} ---. Generate a JSON object with "topic", "introduction", a "learning_path" array of 5-7 key concepts, and "detailed_concepts" dictionary. Return ONLY the valid JSON object."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  try:
92
+ response = llm.invoke(prompt).content.strip().replace("```json", "").replace("```", "")
93
+ return json.loads(response)
94
+ except Exception as e: logging.error(f"Failed to generate Knowledge Base: {e}"); raise
 
 
 
 
 
95
 
96
  def generate_lesson_from_knowledge_base(knowledge_base: dict, concept_to_cover: str):
 
97
  logging.info(f"Generating lesson for concept: '{concept_to_cover}'")
98
  llm = get_llm()
99
  concept_details = knowledge_base.get("detailed_concepts", {}).get(concept_to_cover, "")
100
+
101
+ available_animations = ["Linear Regression", "Neural Network"]
102
  animation_instruction = ""
103
+ # Find a concept that is a substring of the concept_to_cover
104
+ for anim_concept in available_animations:
105
+ if anim_concept.lower() in concept_to_cover.lower():
106
+ animation_tag = anim_concept.lower().replace(" ", "_")
107
+ animation_instruction = f'When explaining the core mechanism of {anim_concept}, you MUST insert the tag `<visual: "{animation_tag}">` in the script. This is crucial for visualization.'
108
+ break
109
+
110
+ prompt = f"""You are ProfAI, an engaging AI professor. Create a lesson on "{concept_to_cover}". Detailed info: --- {concept_details} ---. {animation_instruction} The script must begin with a short, engaging introduction (1-2 sentences). Generate a JSON object with "script" (a 60-90 second video script) and "quiz" (3 multiple-choice questions). Return ONLY valid JSON."""
 
 
 
 
 
 
 
 
111
  try:
112
+ response = llm.invoke(prompt).content.strip().replace("```json", "").replace("```", "")
 
113
  return json.loads(response)
114
+ except Exception as e: logging.error(f"Failed to generate lesson content: {e}"); raise
 
 
115
 
116
  def generate_remedial_lesson(failed_concept: str):
 
117
  logging.info(f"Generating remedial lesson for concept: '{failed_concept}'")
118
  llm = get_llm()
119
+ prompt = f"""You are ProfAI. A student struggled with "{failed_concept}". Create a short, remedial micro-lesson. Generate JSON with "script" (a simple, 30-45 second explanation with a new analogy) and "quiz" (ONE multiple-choice question). Return ONLY valid JSON."""
 
 
 
 
 
 
 
 
 
120
  try:
121
+ response = llm.invoke(prompt).content.strip().replace("```json", "").replace("```", "")
 
122
  return json.loads(response)
123
+ except Exception as e: logging.error(f"Failed to generate remedial lesson: {e}"); raise
 
 
124
 
125
+ # --- 4. ANIMATION & VIDEO GENERATION (NEW ENGINE) ---
126
+ def animate_linear_regression(duration, output_path: Path):
 
127
  logging.info("Generating Matplotlib animation for Linear Regression.")
128
+ fig, ax = plt.subplots(figsize=(WIDTH/100, HEIGHT/100), dpi=120)
129
  np.random.seed(42)
130
+ X = 2 * np.random.rand(100, 1); y = 4 + 3 * X + np.random.randn(100, 1)
 
131
  ax.scatter(X, y, alpha=0.6, label='Data Points')
 
132
  line, = ax.plot([], [], 'r-', lw=3, label='Regression Line')
133
+ ax.set_xlim(0, 2); ax.set_ylim(0, 15)
134
+ ax.set_title("Linear Regression: Finding the Best Fit Line", fontsize=16)
135
+ ax.legend(); plt.tight_layout()
136
+ def init(): line.set_data([], []); return line,
 
 
 
 
 
 
 
 
 
137
  def update(frame):
 
 
138
  progress = frame / (duration * FPS)
139
+ slope, intercept = 3 * progress, 4
140
+ x_vals = np.array([0, 2]); y_vals = intercept + slope * x_vals
141
+ line.set_data(x_vals, y_vals); return line,
 
 
 
 
142
  anim = FuncAnimation(fig, update, frames=int(duration * FPS), init_func=init, blit=True)
 
143
  anim.save(str(output_path), writer=FFMpegWriter(fps=FPS))
144
  plt.close(fig)
 
 
145
 
146
+ def generate_matplotlib_animation(concept_tag: str, duration: float, temp_dir: Path) -> Path:
147
+ output_path = temp_dir / f"anim_{concept_tag}.mp4"
148
  if concept_tag == "linear_regression":
149
+ animate_linear_regression(duration, output_path)
150
+ return output_path
151
+ # Add more animation concepts here with 'elif concept_tag == "new_concept":'
152
+ raise ValueError(f"Animation for '{concept_tag}' not implemented.")
153
+
154
+ def search_and_download_pexels_video(query: str, duration: float, out_path: Path) -> str:
155
+ if not PEXELS_API_KEY:
156
+ logging.warning("PEXELS_API_KEY not set.")
157
+ return None
158
+ try:
159
+ response = requests.get("https://api.pexels.com/videos/search", headers={"Authorization": PEXELS_API_KEY}, params={"query": query, "per_page": 5, "orientation": "landscape"}, timeout=20)
160
+ response.raise_for_status()
161
+ videos = [v for f in v.get('video_files', []) if f.get('quality') == 'hd' and f.get('width') >= 1280 for v in response.json().get('videos', [])]
162
+ if not videos: return None
163
+
164
+ with requests.get(videos[0]['video_files'][0]['link'], stream=True, timeout=60) as r, tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_dl:
165
+ r.raise_for_status()
166
+ for chunk in r.iter_content(chunk_size=8192): temp_dl.write(chunk)
167
+ temp_dl_path = Path(temp_dl.name)
168
+
169
+ cmd = ["ffmpeg", "-y", "-stream_loop", "-1", "-i", str(temp_dl_path), "-vf", f"scale={WIDTH}:{HEIGHT}:force_original_aspect_ratio=decrease,pad={WIDTH}:{HEIGHT}:(ow-iw)/2:(oh-ih)/2,setsar=1", "-t", f"{duration:.3f}", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-an", str(out_path)]
170
+ subprocess.run(cmd, check=True, capture_output=True)
171
+ temp_dl_path.unlink()
172
+ return str(out_path)
173
+ except Exception as e:
174
+ logging.error(f"Pexels processing failed for query '{query}': {e}")
175
+ if 'temp_dl_path' in locals() and temp_dl_path.exists(): temp_dl_path.unlink()
176
+ return None
177
 
178
+ def create_title_card(text: str, duration: float, output_path: Path):
179
+ """Creates a simple video clip with centered text."""
180
+ frame = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)
181
+ frame[:] = (40, 20, 20) # Dark blue background
182
+ try:
183
+ font = ImageFont.truetype("arial.ttf", 60)
184
+ except IOError:
185
+ font = ImageFont.load_default()
186
 
187
+ img = Image.fromarray(frame)
188
+ draw = ImageDraw.Draw(img)
189
+ text_bbox = draw.textbbox((0, 0), text, font=font)
190
+ text_width = text_bbox[2] - text_bbox[0]
191
+ text_height = text_bbox[3] - text_bbox[1]
192
+ position = ((WIDTH - text_width) / 2, (HEIGHT - text_height) / 2)
193
+ draw.text(position, text, font=font, fill=(255, 255, 255))
194
+
195
+ final_frame = np.array(img)
196
+
197
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
198
+ out = cv2.VideoWriter(str(output_path), fourcc, FPS, (WIDTH, HEIGHT))
199
+ for _ in range(int(FPS * duration)):
200
+ out.write(final_frame)
201
+ out.release()
202
+ return str(output_path)
203
+
204
+ def generate_profai_video_from_script(script: str, topic: str):
205
+ logging.info("Starting new video generation process.")
206
  with tempfile.TemporaryDirectory() as temp_dir_str:
207
  temp_dir = Path(temp_dir_str)
 
 
 
 
 
 
 
 
 
 
208
 
209
+ # 1. Parse Script into Scenes
210
+ tag_pattern = r'(<visual: "([^"]+)">)'
211
+ script_parts = re.split(tag_pattern, script)
212
+ scenes = []
213
+ for i in range(0, len(script_parts), 3):
214
+ text = script_parts[i].strip()
215
+ tag = script_parts[i+2] if i+2 < len(script_parts) else None
216
+ if text: scenes.append({"text": text, "tag": tag})
217
+
218
+ # 2. Generate Audio and Visuals for each scene
219
+ video_parts, audio_parts = [], []
220
+ total_audio_duration = 0
221
+ for i, scene in enumerate(scenes):
222
+ narration_audio_bytes = deepgram_tts(scene['text'])
223
+ if not narration_audio_bytes:
224
+ logging.warning(f"TTS failed for scene {i}. Skipping.")
225
+ continue
226
 
227
+ audio_path = temp_dir / f"audio_{i}.mp3"
228
+ audio_path.write_bytes(narration_audio_bytes)
229
+ scene_audio_dur = audio_duration(str(audio_path))
230
+ audio_parts.append(str(audio_path))
231
+ total_audio_duration += scene_audio_dur
232
+
233
+ video_path = temp_dir / f"video_{i}.mp4"
234
+ visual_generated = False
235
+
236
+ # Try to generate specific visual from tag
237
+ if scene['tag']:
238
+ try:
239
+ logging.info(f"Attempting to generate animation for tag: {scene['tag']}")
240
+ generate_matplotlib_animation(scene['tag'], scene_audio_dur, video_path)
241
+ visual_generated = True
242
+ except Exception as e:
243
+ logging.warning(f"Animation failed for tag '{scene['tag']}': {e}. Triggering Pexels fallback.")
244
+
245
+ # Fallback or default visual generation
246
+ if not visual_generated:
247
+ query = scene['tag'] if scene['tag'] else f"{topic} abstract"
248
+ logging.info(f"Searching Pexels with query: '{query}'")
249
+ pexels_path = search_and_download_pexels_video(query, scene_audio_dur, video_path)
250
+ if not pexels_path:
251
+ logging.warning("Pexels failed. Creating a title card as final fallback.")
252
+ create_title_card(scene['text'], scene_audio_dur, video_path)
253
+
254
+ video_parts.append(str(video_path))
255
 
256
+ if not video_parts or not audio_parts: raise Exception("Failed to generate any video or audio parts.")
257
+
258
+ # 3. Concatenate and Finalize Video
259
+ silent_vid_path = temp_dir / "silent_video.mp4"
260
+ audio_mix_path = temp_dir / "full_audio.mp3"
261
+ final_vid_path = temp_dir / "final_video.mp4"
262
 
263
+ concat_media(video_parts, silent_vid_path)
264
+ concat_media(audio_parts, audio_mix_path)
265
 
266
+ cmd = ["ffmpeg", "-y", "-i", str(silent_vid_path), "-i", str(audio_mix_path), "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", str(final_vid_path)]
267
+ subprocess.run(cmd, check=True, capture_output=True)
268
 
269
+ return final_vid_path.read_bytes()