maria355 commited on
Commit
ae2847b
·
verified ·
1 Parent(s): f6e1d0e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +420 -0
app.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import google.generativeai as genai
3
+ from huggingface_hub import InferenceClient
4
+ import requests
5
+ from PIL import Image
6
+ import io
7
+ import json
8
+ import time
9
+ import cv2
10
+ import numpy as np
11
+ from moviepy.editor import ImageSequenceClip, concatenate_videoclips
12
+ import tempfile
13
+ import os
14
+ from gtts import gTTS
15
+ import base64
16
+
17
+ # Configure page
18
+ st.set_page_config(
19
+ page_title="AI Video Script & Storyboard Generator",
20
+ page_icon="🎬",
21
+ layout="wide"
22
+ )
23
+
24
+ # Initialize session state
25
+ if 'generated_script' not in st.session_state:
26
+ st.session_state.generated_script = None
27
+ if 'storyboard_images' not in st.session_state:
28
+ st.session_state.storyboard_images = []
29
+ if 'video_preview' not in st.session_state:
30
+ st.session_state.video_preview = None
31
+
32
+ # Sidebar for API configuration
33
+ st.sidebar.title("🔧 API Configuration")
34
+ gemini_api_key = st.sidebar.text_input("Gemini API Key", type="password", help="Get your API key from Google AI Studio")
35
+ hf_token = st.sidebar.text_input("Hugging Face Token", type="password", help="Get your token from Hugging Face")
36
+
37
+ if gemini_api_key:
38
+ genai.configure(api_key=gemini_api_key)
39
+
40
+ # Main title
41
+ st.title("🎬 AI Video Script & Storyboard Generator")
42
+ st.markdown("Create professional video scripts and visual storyboards with AI assistance")
43
+
44
+ # Input section
45
+ st.header("📝 Video Specifications")
46
+
47
+ col1, col2 = st.columns(2)
48
+
49
+ with col1:
50
+ video_topic = st.text_area(
51
+ "Video Topic",
52
+ placeholder="Enter your video topic or detailed description...",
53
+ height=100
54
+ )
55
+
56
+ video_length = st.selectbox(
57
+ "Video Length",
58
+ ["30 seconds", "1 minute", "2 minutes", "3 minutes", "5 minutes", "Custom"]
59
+ )
60
+
61
+ if video_length == "Custom":
62
+ custom_length = st.number_input("Custom length (seconds)", min_value=10, max_value=600, value=60)
63
+ video_length = f"{custom_length} seconds"
64
+
65
+ style = st.selectbox(
66
+ "Video Style",
67
+ ["Explainer", "Cinematic", "Tutorial", "Vlog", "Animation", "Documentary", "Commercial"]
68
+ )
69
+
70
+ with col2:
71
+ tone = st.selectbox(
72
+ "Tone/Emotion",
73
+ ["Professional", "Funny", "Serious", "Dramatic", "Inspirational", "Casual", "Educational"]
74
+ )
75
+
76
+ platform = st.selectbox(
77
+ "Target Platform",
78
+ ["YouTube", "TikTok", "Instagram Reels", "LinkedIn", "Presentation", "General"]
79
+ )
80
+
81
+ art_style = st.selectbox(
82
+ "Storyboard Art Style",
83
+ ["Realistic", "Cartoon", "Cinematic", "Minimalistic", "Sketch", "Digital Art"]
84
+ )
85
+
86
+ # Functions for AI generation
87
+ def generate_script_with_gemini(topic, length, style, tone, platform):
88
+ """Generate video script using Gemini API"""
89
+ if not gemini_api_key:
90
+ st.error("Please provide Gemini API key in the sidebar")
91
+ return None
92
+
93
+ try:
94
+ model = genai.GenerativeModel('gemini-pro')
95
+
96
+ prompt = f"""
97
+ Create a detailed video script for the following specifications:
98
+
99
+ Topic: {topic}
100
+ Length: {length}
101
+ Style: {style}
102
+ Tone: {tone}
103
+ Platform: {platform}
104
+
105
+ Format the output as JSON with the following structure:
106
+ {{
107
+ "title": "Video Title",
108
+ "total_duration": "{length}",
109
+ "scenes": [
110
+ {{
111
+ "scene_number": 1,
112
+ "duration": "10 seconds",
113
+ "description": "Visual description for storyboard",
114
+ "dialogue": "Script/narration text",
115
+ "camera_angle": "Wide shot/Close-up/etc",
116
+ "visual_elements": "Key visual elements to include"
117
+ }}
118
+ ]
119
+ }}
120
+
121
+ Make sure the scenes add up to the total duration and are engaging for {platform}.
122
+ Include specific visual descriptions that can be used to generate storyboard images.
123
+ """
124
+
125
+ response = model.generate_content(prompt)
126
+
127
+ # Clean the response text to extract JSON
128
+ response_text = response.text.strip()
129
+ if response_text.startswith("```json"):
130
+ response_text = response_text[7:-3]
131
+ elif response_text.startswith("```"):
132
+ response_text = response_text[3:-3]
133
+
134
+ script_data = json.loads(response_text)
135
+ return script_data
136
+
137
+ except Exception as e:
138
+ st.error(f"Error generating script: {str(e)}")
139
+ return None
140
+
141
+ def generate_storyboard_image(scene_description, art_style, hf_token):
142
+ """Generate storyboard image using Stable Diffusion"""
143
+ if not hf_token:
144
+ st.error("Please provide Hugging Face token")
145
+ return None
146
+
147
+ try:
148
+ # Use Hugging Face Inference API for image generation
149
+ client = InferenceClient(token=hf_token)
150
+
151
+ # Enhance prompt based on art style
152
+ style_prompts = {
153
+ "Realistic": "photorealistic, high quality, detailed",
154
+ "Cartoon": "cartoon style, animated, colorful, Disney-like",
155
+ "Cinematic": "cinematic lighting, dramatic, film still, high contrast",
156
+ "Minimalistic": "minimalist, clean, simple, geometric",
157
+ "Sketch": "pencil sketch, hand-drawn, artistic, black and white",
158
+ "Digital Art": "digital art, concept art, detailed, vibrant colors"
159
+ }
160
+
161
+ enhanced_prompt = f"{scene_description}, {style_prompts.get(art_style, '')}, storyboard frame, professional"
162
+
163
+ image = client.text_to_image(
164
+ enhanced_prompt,
165
+ model="stabilityai/stable-diffusion-2-1"
166
+ )
167
+
168
+ return image
169
+
170
+ except Exception as e:
171
+ st.error(f"Error generating image: {str(e)}")
172
+ return None
173
+
174
+ def create_video_preview(images, script_data):
175
+ """Create a basic video preview with Ken Burns effect"""
176
+ if not images or not script_data:
177
+ return None
178
+
179
+ try:
180
+ # Create temporary directory
181
+ temp_dir = tempfile.mkdtemp()
182
+
183
+ clips = []
184
+ for i, (image, scene) in enumerate(zip(images, script_data['scenes'])):
185
+ if image:
186
+ # Save image temporarily
187
+ img_path = os.path.join(temp_dir, f"scene_{i}.jpg")
188
+ image.save(img_path)
189
+
190
+ # Parse duration (extract number from string like "10 seconds")
191
+ duration_str = scene.get('duration', '5 seconds')
192
+ duration = float(duration_str.split()[0])
193
+
194
+ # Create image clip with Ken Burns effect
195
+ img_array = np.array(image)
196
+ h, w = img_array.shape[:2]
197
+
198
+ # Create zoom effect
199
+ zoom_factor = 1.2
200
+ start_size = (w, h)
201
+ end_size = (int(w * zoom_factor), int(h * zoom_factor))
202
+
203
+ clip = ImageSequenceClip([img_path], durations=[duration])
204
+ clip = clip.resize(height=480) # Standardize height
205
+ clips.append(clip)
206
+
207
+ if clips:
208
+ # Concatenate all clips
209
+ final_video = concatenate_videoclips(clips, method="compose")
210
+
211
+ # Save video
212
+ video_path = os.path.join(temp_dir, "preview.mp4")
213
+ final_video.write_videofile(
214
+ video_path,
215
+ fps=24,
216
+ codec='libx264',
217
+ audio_codec='aac',
218
+ verbose=False,
219
+ logger=None
220
+ )
221
+
222
+ return video_path
223
+
224
+ except Exception as e:
225
+ st.error(f"Error creating video preview: {str(e)}")
226
+ return None
227
+
228
+ def text_to_speech(text, language='en'):
229
+ """Convert text to speech using gTTS"""
230
+ try:
231
+ tts = gTTS(text=text, lang=language, slow=False)
232
+ audio_buffer = io.BytesIO()
233
+ tts.write_to_fp(audio_buffer)
234
+ audio_buffer.seek(0)
235
+ return audio_buffer
236
+ except Exception as e:
237
+ st.error(f"Error generating speech: {str(e)}")
238
+ return None
239
+
240
+ # Main generation button
241
+ if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
242
+ if not video_topic:
243
+ st.error("Please enter a video topic")
244
+ elif not gemini_api_key:
245
+ st.error("Please provide Gemini API key")
246
+ elif not hf_token:
247
+ st.error("Please provide Hugging Face token")
248
+ else:
249
+ with st.spinner("🤖 Generating script with AI..."):
250
+ script_data = generate_script_with_gemini(video_topic, video_length, style, tone, platform)
251
+
252
+ if script_data:
253
+ st.session_state.generated_script = script_data
254
+ st.success("✅ Script generated successfully!")
255
+
256
+ # Generate storyboard images
257
+ with st.spinner("🎨 Creating storyboard images..."):
258
+ images = []
259
+ progress_bar = st.progress(0)
260
+
261
+ for i, scene in enumerate(script_data['scenes']):
262
+ image = generate_storyboard_image(
263
+ scene['description'],
264
+ art_style,
265
+ hf_token
266
+ )
267
+ images.append(image)
268
+ progress_bar.progress((i + 1) / len(script_data['scenes']))
269
+
270
+ st.session_state.storyboard_images = images
271
+
272
+ st.success("✅ Storyboard images generated!")
273
+
274
+ # Display results
275
+ if st.session_state.generated_script:
276
+ script_data = st.session_state.generated_script
277
+
278
+ st.header("📜 Generated Script")
279
+ st.subheader(f"🎬 {script_data.get('title', 'Video Title')}")
280
+ st.write(f"**Duration:** {script_data.get('total_duration', 'N/A')}")
281
+
282
+ # Display script in tabs
283
+ tab1, tab2, tab3 = st.tabs(["📝 Script Details", "🖼️ Storyboard", "🎥 Preview"])
284
+
285
+ with tab1:
286
+ for i, scene in enumerate(script_data.get('scenes', []), 1):
287
+ with st.expander(f"Scene {i} - {scene.get('duration', 'N/A')}"):
288
+ col1, col2 = st.columns(2)
289
+
290
+ with col1:
291
+ st.write("**Visual Description:**")
292
+ st.write(scene.get('description', 'N/A'))
293
+ st.write("**Camera Angle:**")
294
+ st.write(scene.get('camera_angle', 'N/A'))
295
+
296
+ with col2:
297
+ st.write("**Dialogue/Narration:**")
298
+ st.write(scene.get('dialogue', 'N/A'))
299
+ st.write("**Visual Elements:**")
300
+ st.write(scene.get('visual_elements', 'N/A'))
301
+
302
+ # Add text-to-speech for dialogue
303
+ if scene.get('dialogue'):
304
+ if st.button(f"🔊 Play Audio - Scene {i}", key=f"audio_{i}"):
305
+ audio_buffer = text_to_speech(scene['dialogue'])
306
+ if audio_buffer:
307
+ st.audio(audio_buffer.getvalue(), format='audio/mp3')
308
+
309
+ with tab2:
310
+ if st.session_state.storyboard_images:
311
+ st.subheader("🎨 Storyboard Images")
312
+
313
+ for i, (scene, image) in enumerate(zip(script_data['scenes'], st.session_state.storyboard_images)):
314
+ if image:
315
+ col1, col2 = st.columns([1, 2])
316
+
317
+ with col1:
318
+ st.image(image, caption=f"Scene {i+1}", use_column_width=True)
319
+
320
+ # Refinement option
321
+ if st.button(f"🔄 Regenerate Scene {i+1}", key=f"regen_{i}"):
322
+ with st.spinner(f"Regenerating scene {i+1}..."):
323
+ new_image = generate_storyboard_image(
324
+ scene['description'],
325
+ art_style,
326
+ hf_token
327
+ )
328
+ if new_image:
329
+ st.session_state.storyboard_images[i] = new_image
330
+ st.experimental_rerun()
331
+
332
+ with col2:
333
+ st.write(f"**Scene {i+1}: {scene.get('duration', 'N/A')}**")
334
+ st.write(f"**Description:** {scene.get('description', 'N/A')}")
335
+ st.write(f"**Dialogue:** {scene.get('dialogue', 'N/A')}")
336
+
337
+ with tab3:
338
+ st.subheader("🎥 Video Preview")
339
+
340
+ if st.button("🎬 Create Video Preview"):
341
+ if st.session_state.storyboard_images:
342
+ with st.spinner("Creating video preview..."):
343
+ video_path = create_video_preview(
344
+ st.session_state.storyboard_images,
345
+ script_data
346
+ )
347
+
348
+ if video_path:
349
+ st.session_state.video_preview = video_path
350
+ st.success("Video preview created!")
351
+
352
+ if st.session_state.video_preview and os.path.exists(st.session_state.video_preview):
353
+ st.video(st.session_state.video_preview)
354
+
355
+ # Export options
356
+ st.subheader("📥 Export Options")
357
+
358
+ col1, col2, col3 = st.columns(3)
359
+
360
+ with col1:
361
+ if st.button("📄 Download Script (JSON)"):
362
+ script_json = json.dumps(script_data, indent=2)
363
+ st.download_button(
364
+ label="Download JSON",
365
+ data=script_json,
366
+ file_name="video_script.json",
367
+ mime="application/json"
368
+ )
369
+
370
+ with col2:
371
+ if st.button("🖼️ Download Storyboard Images"):
372
+ if st.session_state.storyboard_images:
373
+ # Create a zip file with all images
374
+ import zipfile
375
+ zip_buffer = io.BytesIO()
376
+
377
+ with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
378
+ for i, image in enumerate(st.session_state.storyboard_images):
379
+ if image:
380
+ img_buffer = io.BytesIO()
381
+ image.save(img_buffer, format='PNG')
382
+ zip_file.writestr(f"scene_{i+1}.png", img_buffer.getvalue())
383
+
384
+ st.download_button(
385
+ label="Download ZIP",
386
+ data=zip_buffer.getvalue(),
387
+ file_name="storyboard_images.zip",
388
+ mime="application/zip"
389
+ )
390
+
391
+ with col3:
392
+ if st.session_state.video_preview:
393
+ with open(st.session_state.video_preview, 'rb') as f:
394
+ st.download_button(
395
+ label="🎥 Download Video",
396
+ data=f.read(),
397
+ file_name="video_preview.mp4",
398
+ mime="video/mp4"
399
+ )
400
+
401
+ # Footer
402
+ st.markdown("---")
403
+ st.markdown("🤖 **Powered by**: Gemini AI • Stable Diffusion • Hugging Face")
404
+ st.markdown("💡 **Tips**: Use detailed topic descriptions for better results. Experiment with different art styles!")
405
+
406
+ # Sidebar info
407
+ with st.sidebar:
408
+ st.markdown("---")
409
+ st.markdown("### 📚 How to Use")
410
+ st.markdown("""
411
+ 1. **Set up APIs**: Add your Gemini and HuggingFace tokens
412
+ 2. **Define Video**: Enter topic, length, and style
413
+ 3. **Generate**: Click the generate button
414
+ 4. **Refine**: Regenerate individual scenes if needed
415
+ 5. **Export**: Download script, images, or video
416
+ """)
417
+
418
+ st.markdown("### 🔗 Get API Keys")
419
+ st.markdown("[Gemini API](https://makersuite.google.com/app/apikey)")
420
+ st.markdown("[Hugging Face Token](https://huggingface.co/settings/tokens)")