tthhanh commited on
Commit
8a15bcf
·
1 Parent(s): c1ab37f

fix: remove old script generator

Browse files
Files changed (2) hide show
  1. src/app/app.py +0 -23
  2. src/app/tools/script_generator.py +0 -210
src/app/app.py CHANGED
@@ -15,7 +15,6 @@ from tools.music_selector import music_selector
15
  from tools.video_script_generator import video_script_generator
16
  from workflow_ui import workflow_ui
17
  from tools.text_to_speech import text_to_speech_simple
18
- from tools.script_generator import script_generator
19
  from tools.subtitle_creator import subtitle_creator
20
 
21
 
@@ -366,28 +365,6 @@ Check out our LinkedIn post about Vidzly from the Agents & MCP Hackathon 2025!
366
  api_name="text_to_speech",
367
  )
368
 
369
- with gr.Tab("Script Generator"):
370
- gr.Interface(
371
- fn=script_generator,
372
- inputs=[
373
- gr.File(
374
- label="Video Materials (Required - upload multiple videos)",
375
- file_count="multiple",
376
- file_types=["video"],
377
- ),
378
- gr.Textbox(
379
- label="User Prompt (Optional)",
380
- placeholder="e.g., 'Create an energetic travel montage with upbeat pacing' or 'Make a dramatic product reveal video'",
381
- lines=3,
382
- info="Optional: Provide specific instructions or creative direction. If left empty, the AI will generate a script based on the video content analysis.",
383
- ),
384
- ],
385
- outputs=[gr.Textbox(label="Video Production Script (JSON)", lines=25)],
386
- title="Script Generator",
387
- description="Generate comprehensive video production scripts from multiple video materials. Upload your source videos and optionally provide creative direction. The AI will analyze the content and create a detailed script including scene breakdowns, timing, transitions, audio recommendations, and visual effects. Outputs both structured JSON and narrative formats.",
388
- api_name="script_generator",
389
- )
390
-
391
  with gr.Tab("Subtitle Creator"):
392
  gr.Interface(
393
  fn=subtitle_creator,
 
15
  from tools.video_script_generator import video_script_generator
16
  from workflow_ui import workflow_ui
17
  from tools.text_to_speech import text_to_speech_simple
 
18
  from tools.subtitle_creator import subtitle_creator
19
 
20
 
 
365
  api_name="text_to_speech",
366
  )
367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  with gr.Tab("Subtitle Creator"):
369
  gr.Interface(
370
  fn=subtitle_creator,
src/app/tools/script_generator.py DELETED
@@ -1,210 +0,0 @@
1
- import cv2
2
- import json
3
- import os
4
- import mimetypes
5
- import google.genai as genai
6
- from typing import Optional
7
-
8
-
9
- def script_generator(video_inputs, user_prompt: Optional[str] = None) -> str:
10
- """
11
- Generate a detailed video script based on multiple video materials.
12
- Uses Google Gemini's native video understanding to analyze material videos
13
- and create a comprehensive script for making a short video.
14
-
15
- Args:
16
- video_inputs: List of video file paths or Gradio video inputs
17
- user_prompt (str, optional): User's custom prompt/request. If not provided,
18
- AI will generate a script based on material analysis.
19
-
20
- Returns:
21
- str: JSON string containing a detailed video script with scene breakdowns,
22
- timing, transitions, and creative suggestions
23
- """
24
- try:
25
- # Handle various input formats
26
- if not video_inputs:
27
- return json.dumps({"error": "No video files provided"})
28
-
29
- # Normalize video inputs to list of paths
30
- video_paths = []
31
- if isinstance(video_inputs, list):
32
- for video_input in video_inputs:
33
- if isinstance(video_input, tuple):
34
- video_paths.append(video_input[0])
35
- elif isinstance(video_input, str):
36
- video_paths.append(video_input)
37
- elif isinstance(video_inputs, str):
38
- video_paths = [video_inputs]
39
- elif isinstance(video_inputs, tuple):
40
- video_paths = [video_inputs[0]]
41
- else:
42
- return json.dumps({"error": "Invalid video input format"})
43
-
44
- # Validate all video files exist and extract metadata
45
- videos_metadata = []
46
- for idx, video_path in enumerate(video_paths):
47
- if not os.path.exists(video_path):
48
- return json.dumps({"error": f"Video file not found: {video_path}"})
49
-
50
- cap = cv2.VideoCapture(video_path)
51
- if not cap.isOpened():
52
- return json.dumps({"error": f"Could not open video file: {video_path}"})
53
-
54
- video_fps = cap.get(cv2.CAP_PROP_FPS)
55
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
56
- duration = frame_count / video_fps if video_fps > 0 else 0
57
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
58
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
59
- cap.release()
60
-
61
- videos_metadata.append(
62
- {
63
- "index": idx,
64
- "filename": os.path.basename(video_path),
65
- "duration": round(duration, 2),
66
- "resolution": f"{width}x{height}",
67
- "fps": round(video_fps, 2),
68
- "frame_count": frame_count,
69
- }
70
- )
71
-
72
- # Check for API key
73
- api_key = os.getenv("GOOGLE_API_KEY")
74
- if not api_key:
75
- return json.dumps(
76
- {
77
- "error": "GOOGLE_API_KEY environment variable not set",
78
- "videos_analyzed": videos_metadata,
79
- "script": "AI script generation requires GOOGLE_API_KEY",
80
- }
81
- )
82
-
83
- # Initialize Gemini client
84
- client = genai.Client(api_key=api_key)
85
-
86
- # Prepare video parts for multimodal input
87
- video_parts = []
88
- for video_path in video_paths:
89
- with open(video_path, "rb") as f:
90
- video_data = f.read()
91
-
92
- mime_type, _ = mimetypes.guess_type(video_path)
93
- if not mime_type or not mime_type.startswith("video/"):
94
- mime_type = "video/mp4"
95
-
96
- video_metadata = genai.types.VideoMetadata(fps=2.0)
97
- video_blob = genai.types.Blob(data=video_data, mime_type=mime_type)
98
- video_part = genai.types.Part(
99
- inline_data=video_blob,
100
- videoMetadata=video_metadata,
101
- )
102
- video_parts.append(video_part)
103
-
104
- # Create comprehensive prompt
105
- if user_prompt and user_prompt.strip():
106
- # Use user's prompt
107
- base_prompt = f"""User Request: {user_prompt}
108
-
109
- Based on the user's request above and the provided video materials, create a detailed video production script."""
110
- else:
111
- # Generate default prompt
112
- base_prompt = """Analyze the provided video materials and create a comprehensive video production script for making an engaging short video."""
113
-
114
- full_prompt = f"""{base_prompt}
115
-
116
- I have {len(video_paths)} video file(s) as source material. Please analyze each video and create a detailed script that includes:
117
-
118
- 1. **Concept Overview**: Describe the overall theme, message, and creative direction for the final video.
119
-
120
- 2. **Target Duration**: Recommend optimal video length based on content (typically 15-60 seconds for short-form).
121
-
122
- 3. **Scene Breakdown**: For each scene in the final video, specify:
123
- - Scene number and description
124
- - Which source video to use (reference by index: 0, 1, 2, etc.)
125
- - Exact start and end timestamps from the source video
126
- - Duration of the scene
127
- - Visual description and key moments
128
- - Suggested transitions (e.g., "fade", "crossfade", "wipe", "zoom")
129
-
130
- 4. **Audio Recommendations**:
131
- - Background music mood and style
132
- - BPM (beats per minute) suggestion
133
- - Volume levels and audio effects
134
- - Any voiceover or text-to-speech suggestions
135
-
136
- 5. **Text Overlays**: Suggest any text, captions, or titles to add, including:
137
- - Text content
138
- - Timing (when to appear)
139
- - Style suggestions (font, size, position, animation)
140
-
141
- 6. **Visual Effects**: Recommend any filters, color grading, speed adjustments, or special effects.
142
-
143
- 7. **Pacing & Flow**: Explain the rhythm and flow of the video, including any build-ups, climaxes, or emotional arcs.
144
-
145
- 8. **Call-to-Action**: Suggest ending elements (e.g., logo, text, link, subscribe prompt).
146
-
147
- Please provide the script in a structured JSON format that includes:
148
- - "concept": overall theme and message
149
- - "target_duration": recommended total duration in seconds
150
- - "total_duration": sum of all scene durations
151
- - "scenes": array of scene objects with fields:
152
- - "scene_id": integer
153
- - "source_video": index of source video (0-based)
154
- - "start_time": start timestamp in source video (seconds)
155
- - "end_time": end timestamp in source video (seconds)
156
- - "duration": scene duration (seconds)
157
- - "description": what happens in this scene
158
- - "transition_in": transition effect when entering scene
159
- - "transition_out": transition effect when exiting scene
160
- - "audio": object with "mood", "style", "bpm", "volume"
161
- - "text_overlays": array of text overlay objects
162
- - "visual_effects": array of suggested effects
163
- - "call_to_action": string description
164
-
165
- Also provide a human-readable narrative version of the script."""
166
-
167
- # Call Gemini API with all video materials
168
- contents = [full_prompt] + video_parts
169
- response = client.models.generate_content(
170
- model="gemini-2.5-flash-lite",
171
- contents=contents, # type: ignore
172
- )
173
-
174
- # Parse response
175
- script_text: str = response.text if response.text else ""
176
-
177
- # Try to extract JSON if present
178
- json_match = None
179
- if "```json" in script_text:
180
- # Extract JSON code block
181
- import re
182
-
183
- json_pattern = r"```json\s*([\s\S]*?)\s*```"
184
- match = re.search(json_pattern, script_text)
185
- if match:
186
- json_match = match.group(1)
187
-
188
- # Structure the response
189
- result = {
190
- "videos_analyzed": videos_metadata,
191
- "user_prompt": (
192
- user_prompt if user_prompt else "Auto-generated based on materials"
193
- ),
194
- "script_narrative": script_text,
195
- }
196
-
197
- # If we found structured JSON, try to parse and include it
198
- if json_match:
199
- try:
200
- structured_script = json.loads(json_match)
201
- result["structured_script"] = structured_script
202
- except json.JSONDecodeError:
203
- result["structured_script_parse_error"] = (
204
- "Could not parse JSON from response"
205
- )
206
-
207
- return json.dumps(result, indent=2)
208
-
209
- except Exception as e:
210
- return json.dumps({"error": f"Error generating script: {str(e)}"})