maria355 commited on
Commit
b37f161
·
verified ·
1 Parent(s): bb04d06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -38
app.py CHANGED
@@ -105,7 +105,8 @@ with col2:
105
  def generate_script_with_gemini(topic, length, style, tone, platform):
106
  """Generate video script using Gemini API"""
107
  try:
108
- model = genai.GenerativeModel('gemini-pro')
 
109
 
110
  prompt = f"""
111
  Create a detailed video script for the following specifications:
@@ -134,6 +135,7 @@ def generate_script_with_gemini(topic, length, style, tone, platform):
134
 
135
  Make sure the scenes add up to the total duration and are engaging for {platform}.
136
  Include specific visual descriptions that can be used to generate storyboard images.
 
137
  """
138
 
139
  response = model.generate_content(prompt)
@@ -154,32 +156,61 @@ def generate_script_with_gemini(topic, length, style, tone, platform):
154
  return None
155
  except Exception as e:
156
  st.error(f"Error generating script: {str(e)}")
157
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
  def generate_storyboard_image(scene_description, art_style):
160
- """Generate storyboard image using Stable Diffusion"""
161
  try:
162
- # Use Hugging Face Inference API for image generation
163
  client = InferenceClient(token=hf_token)
164
 
165
- # Enhance prompt based on art style
166
  style_prompts = {
167
- "Realistic": "photorealistic, high quality, detailed",
168
- "Cartoon": "cartoon style, animated, colorful, Disney-like",
169
- "Cinematic": "cinematic lighting, dramatic, film still, high contrast",
170
- "Minimalistic": "minimalist, clean, simple, geometric",
171
- "Sketch": "pencil sketch, hand-drawn, artistic, black and white",
172
- "Digital Art": "digital art, concept art, detailed, vibrant colors"
173
  }
174
 
175
- enhanced_prompt = f"{scene_description}, {style_prompts.get(art_style, '')}, storyboard frame, professional"
176
 
177
- image = client.text_to_image(
178
- enhanced_prompt,
179
- model="stabilityai/stable-diffusion-2-1"
180
- )
 
 
 
181
 
182
- return image
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  except Exception as e:
185
  st.error(f"Error generating image: {str(e)}")
@@ -283,6 +314,42 @@ def text_to_speech(text, language='en'):
283
  st.error(f"Error generating speech: {str(e)}")
284
  return None
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  # Main generation button
287
  if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
288
  if not video_topic:
@@ -291,6 +358,11 @@ if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
291
  with st.spinner("🤖 Generating script with AI..."):
292
  script_data = generate_script_with_gemini(video_topic, video_length, style, tone, platform)
293
 
 
 
 
 
 
294
  if script_data:
295
  st.session_state.generated_script = script_data
296
  st.success("✅ Script generated successfully!")
@@ -301,16 +373,22 @@ if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
301
  progress_bar = st.progress(0)
302
 
303
  for i, scene in enumerate(script_data['scenes']):
304
- image = generate_storyboard_image(
305
- scene['description'],
306
- art_style
307
- )
308
- images.append(image)
309
- progress_bar.progress((i + 1) / len(script_data['scenes']))
 
 
 
 
310
 
311
  st.session_state.storyboard_images = images
312
 
313
  st.success("✅ Storyboard images generated!")
 
 
314
 
315
  # Display results
316
  if st.session_state.generated_script:
@@ -343,19 +421,20 @@ if st.session_state.generated_script:
343
  # Add text-to-speech for dialogue
344
  if scene.get('dialogue'):
345
  if st.button(f"🔊 Play Audio - Scene {i}", key=f"audio_{i}"):
346
- audio_buffer = text_to_speech(scene['dialogue'])
347
- if audio_buffer:
348
- st.audio(audio_buffer.getvalue(), format='audio/mp3')
 
349
 
350
  with tab2:
351
  if st.session_state.storyboard_images:
352
  st.subheader("🎨 Storyboard Images")
353
 
354
  for i, (scene, image) in enumerate(zip(script_data['scenes'], st.session_state.storyboard_images)):
355
- if image:
356
- col1, col2 = st.columns([1, 2])
357
-
358
- with col1:
359
  st.image(image, caption=f"Scene {i+1}", use_column_width=True)
360
 
361
  # Refinement option
@@ -368,11 +447,24 @@ if st.session_state.generated_script:
368
  if new_image:
369
  st.session_state.storyboard_images[i] = new_image
370
  st.rerun()
371
-
372
- with col2:
373
- st.write(f"**Scene {i+1}: {scene.get('duration', 'N/A')}**")
374
- st.write(f"**Description:** {scene.get('description', 'N/A')}")
375
- st.write(f"**Dialogue:** {scene.get('dialogue', 'N/A')}")
 
 
 
 
 
 
 
 
 
 
 
 
 
376
 
377
  with tab3:
378
  st.subheader("🎥 Video Preview")
@@ -475,7 +567,7 @@ if st.session_state.generated_script:
475
 
476
  # Footer
477
  st.markdown("---")
478
- st.markdown("🤖 **Powered by**: Gemini AI • Stable Diffusion • Hugging Face")
479
  st.markdown("💡 **Tips**: Use detailed topic descriptions for better results. Experiment with different art styles!")
480
 
481
  # Sidebar info
@@ -493,13 +585,35 @@ with st.sidebar:
493
  st.markdown("### 🔧 Features")
494
  st.markdown("""
495
  - ✅ **AI Script Generation** with Gemini
496
- - ✅ **Visual Storyboards** with Stable Diffusion
497
  - ✅ **Text-to-Speech** for narration
498
  - ✅ **Multiple Export Formats**
499
  - ✅ **Scene Regeneration**
 
 
 
 
 
 
 
 
 
500
  """)
501
 
502
  if not MOVIEPY_AVAILABLE:
503
  st.markdown("---")
504
  st.markdown("### ℹ️ Note")
505
- st.markdown("Video preview feature disabled for faster deployment. GIF preview available!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def generate_script_with_gemini(topic, length, style, tone, platform):
106
  """Generate video script using Gemini API"""
107
  try:
108
+ # Use the current Gemini model
109
+ model = genai.GenerativeModel('gemini-1.5-flash')
110
 
111
  prompt = f"""
112
  Create a detailed video script for the following specifications:
 
135
 
136
  Make sure the scenes add up to the total duration and are engaging for {platform}.
137
  Include specific visual descriptions that can be used to generate storyboard images.
138
+ Make sure to return valid JSON only, no additional text or formatting.
139
  """
140
 
141
  response = model.generate_content(prompt)
 
156
  return None
157
  except Exception as e:
158
  st.error(f"Error generating script: {str(e)}")
159
+ # Try alternative model if the first one fails
160
+ try:
161
+ st.info("Trying alternative model...")
162
+ model = genai.GenerativeModel('gemini-1.5-pro')
163
+ response = model.generate_content(prompt)
164
+ response_text = response.text.strip()
165
+ if response_text.startswith("```json"):
166
+ response_text = response_text[7:-3]
167
+ elif response_text.startswith("```"):
168
+ response_text = response_text[3:-3]
169
+ script_data = json.loads(response_text)
170
+ return script_data
171
+ except:
172
+ return None
173
 
174
  def generate_storyboard_image(scene_description, art_style):
175
+ """Generate storyboard image using free Hugging Face models"""
176
  try:
177
+ # Initialize Hugging Face client
178
  client = InferenceClient(token=hf_token)
179
 
180
+ # Style-specific enhancements
181
  style_prompts = {
182
+ "Realistic": "photorealistic, high quality, detailed, professional photography",
183
+ "Cartoon": "cartoon style, animated, colorful, Disney-like, illustration",
184
+ "Cinematic": "cinematic lighting, dramatic, film still, high contrast, movie scene",
185
+ "Minimalistic": "minimalist, clean, simple, geometric, modern design",
186
+ "Sketch": "pencil sketch, hand-drawn, artistic, black and white line art",
187
+ "Digital Art": "digital art, concept art, detailed, vibrant colors, fantasy art"
188
  }
189
 
190
+ enhanced_prompt = f"{scene_description}, {style_prompts.get(art_style, '')}, storyboard frame, professional, high quality"
191
 
192
+ # Try multiple free models in case one fails
193
+ models_to_try = [
194
+ "black-forest-labs/FLUX.1-schnell",
195
+ "stabilityai/stable-diffusion-2-1",
196
+ "runwayml/stable-diffusion-v1-5",
197
+ "CompVis/stable-diffusion-v1-4"
198
+ ]
199
 
200
+ for model in models_to_try:
201
+ try:
202
+ image = client.text_to_image(
203
+ enhanced_prompt,
204
+ model=model
205
+ )
206
+ return image
207
+ except Exception as model_error:
208
+ st.warning(f"Model {model} failed, trying next...")
209
+ continue
210
+
211
+ # If all models fail, return None
212
+ st.error("All image generation models failed")
213
+ return None
214
 
215
  except Exception as e:
216
  st.error(f"Error generating image: {str(e)}")
 
314
  st.error(f"Error generating speech: {str(e)}")
315
  return None
316
 
317
+ def generate_fallback_script(topic, length, style, tone, platform):
318
+ """Generate a fallback script if Gemini fails"""
319
+ try:
320
+ # Parse length to get number of seconds
321
+ if "second" in length:
322
+ total_seconds = int(length.split()[0])
323
+ elif "minute" in length:
324
+ minutes = int(length.split()[0])
325
+ total_seconds = minutes * 60
326
+ else:
327
+ total_seconds = 60
328
+
329
+ # Calculate number of scenes (roughly 10-15 seconds per scene)
330
+ num_scenes = max(2, total_seconds // 12)
331
+ scene_duration = total_seconds // num_scenes
332
+
333
+ scenes = []
334
+ for i in range(num_scenes):
335
+ scene = {
336
+ "scene_number": i + 1,
337
+ "duration": f"{scene_duration} seconds",
338
+ "description": f"Scene {i+1} showing {topic} in {style.lower()} style",
339
+ "dialogue": f"Narration for scene {i+1} about {topic}",
340
+ "camera_angle": "Medium shot" if i % 2 == 0 else "Close-up",
341
+ "visual_elements": f"Key visuals related to {topic}"
342
+ }
343
+ scenes.append(scene)
344
+
345
+ return {
346
+ "title": f"{topic} - {style} Video",
347
+ "total_duration": length,
348
+ "scenes": scenes
349
+ }
350
+ except:
351
+ return None
352
+
353
  # Main generation button
354
  if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
355
  if not video_topic:
 
358
  with st.spinner("🤖 Generating script with AI..."):
359
  script_data = generate_script_with_gemini(video_topic, video_length, style, tone, platform)
360
 
361
+ # If Gemini fails, use fallback
362
+ if not script_data:
363
+ st.warning("Primary AI model failed, using fallback script generation...")
364
+ script_data = generate_fallback_script(video_topic, video_length, style, tone, platform)
365
+
366
  if script_data:
367
  st.session_state.generated_script = script_data
368
  st.success("✅ Script generated successfully!")
 
373
  progress_bar = st.progress(0)
374
 
375
  for i, scene in enumerate(script_data['scenes']):
376
+ with st.spinner(f"Generating image {i+1}/{len(script_data['scenes'])}..."):
377
+ image = generate_storyboard_image(
378
+ scene['description'],
379
+ art_style
380
+ )
381
+ images.append(image)
382
+ progress_bar.progress((i + 1) / len(script_data['scenes']))
383
+
384
+ # Add a small delay to avoid rate limiting
385
+ time.sleep(1)
386
 
387
  st.session_state.storyboard_images = images
388
 
389
  st.success("✅ Storyboard images generated!")
390
+ else:
391
+ st.error("Failed to generate script. Please try again or modify your request.")
392
 
393
  # Display results
394
  if st.session_state.generated_script:
 
421
  # Add text-to-speech for dialogue
422
  if scene.get('dialogue'):
423
  if st.button(f"🔊 Play Audio - Scene {i}", key=f"audio_{i}"):
424
+ with st.spinner("Generating audio..."):
425
+ audio_buffer = text_to_speech(scene['dialogue'])
426
+ if audio_buffer:
427
+ st.audio(audio_buffer.getvalue(), format='audio/mp3')
428
 
429
  with tab2:
430
  if st.session_state.storyboard_images:
431
  st.subheader("🎨 Storyboard Images")
432
 
433
  for i, (scene, image) in enumerate(zip(script_data['scenes'], st.session_state.storyboard_images)):
434
+ col1, col2 = st.columns([1, 2])
435
+
436
+ with col1:
437
+ if image:
438
  st.image(image, caption=f"Scene {i+1}", use_column_width=True)
439
 
440
  # Refinement option
 
447
  if new_image:
448
  st.session_state.storyboard_images[i] = new_image
449
  st.rerun()
450
+ else:
451
+ st.write("❌ Image generation failed for this scene")
452
+ if st.button(f"🔄 Try Again - Scene {i+1}", key=f"retry_{i}"):
453
+ with st.spinner(f"Trying to generate scene {i+1}..."):
454
+ new_image = generate_storyboard_image(
455
+ scene['description'],
456
+ art_style
457
+ )
458
+ if new_image:
459
+ st.session_state.storyboard_images[i] = new_image
460
+ st.rerun()
461
+
462
+ with col2:
463
+ st.write(f"**Scene {i+1}: {scene.get('duration', 'N/A')}**")
464
+ st.write(f"**Description:** {scene.get('description', 'N/A')}")
465
+ st.write(f"**Dialogue:** {scene.get('dialogue', 'N/A')}")
466
+ else:
467
+ st.info("No storyboard images generated yet. Click the generate button above.")
468
 
469
  with tab3:
470
  st.subheader("🎥 Video Preview")
 
567
 
568
  # Footer
569
  st.markdown("---")
570
+ st.markdown("🤖 **Powered by**: Gemini AI • Free Hugging Face Models • gTTS")
571
  st.markdown("💡 **Tips**: Use detailed topic descriptions for better results. Experiment with different art styles!")
572
 
573
  # Sidebar info
 
585
  st.markdown("### 🔧 Features")
586
  st.markdown("""
587
  - ✅ **AI Script Generation** with Gemini
588
+ - ✅ **Visual Storyboards** with Free HF Models
589
  - ✅ **Text-to-Speech** for narration
590
  - ✅ **Multiple Export Formats**
591
  - ✅ **Scene Regeneration**
592
+ - ✅ **GIF Preview Creation**
593
+ """)
594
+
595
+ st.markdown("---")
596
+ st.markdown("### 🆓 Free Models Used")
597
+ st.markdown("""
598
+ - **Script**: Gemini 1.5 Flash/Pro
599
+ - **Images**: FLUX.1, Stable Diffusion
600
+ - **Speech**: Google TTS
601
  """)
602
 
603
  if not MOVIEPY_AVAILABLE:
604
  st.markdown("---")
605
  st.markdown("### ℹ️ Note")
606
+ st.markdown("Video preview feature disabled for faster deployment. GIF preview available!")
607
+
608
+ # Additional configuration section
609
+ st.sidebar.markdown("---")
610
+ st.sidebar.markdown("### ⚙️ Configuration")
611
+ st.sidebar.markdown("""
612
+ **Required Environment Variables:**
613
+ - `GEMINI_API_KEY`: Your Gemini API key
614
+ - `HF_TOKEN`: Your Hugging Face token
615
+
616
+ **Free API Limits:**
617
+ - Gemini: 15 RPM, 32K TPM
618
+ - Hugging Face: Rate limited per model
619
+ """)