Samuelblue commited on
Commit
d750341
Β·
verified Β·
1 Parent(s): 90e7cd5

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +442 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import json
4
+ import os
5
+ import tempfile
6
+ from typing import List, Dict, Any, Optional, Tuple
7
+ import re
8
+ import shlex
9
+
10
+ class FFmpegAgent:
11
+ """AI-powered FFmpeg agent that generates and executes FFmpeg commands from natural language."""
12
+
13
+ def __init__(self):
14
+ self.command_history = []
15
+ self.temp_dir = tempfile.mkdtemp()
16
+
17
+ def parse_natural_language(self, text: str) -> Dict[str, Any]:
18
+ """Parse natural language into FFmpeg command parameters."""
19
+ text = text.lower().strip()
20
+
21
+ # Initialize parameters
22
+ params = {
23
+ 'input_file': None,
24
+ 'output_file': None,
25
+ 'operations': [],
26
+ 'quality': 'medium',
27
+ 'format': None,
28
+ 'codec': None,
29
+ 'resolution': None,
30
+ 'fps': None,
31
+ 'bitrate': None,
32
+ 'duration': None,
33
+ 'start_time': None,
34
+ 'audio_ops': []
35
+ }
36
+
37
+ # Extract file paths
38
+ file_pattern = r'["\']([^"\']+\.(mp4|avi|mov|mkv|flv|webm|mp3|wav|flac|aac))["\']'
39
+ files = re.findall(file_pattern, text)
40
+ if files:
41
+ params['input_file'] = files[0][0]
42
+ if len(files) > 1:
43
+ params['output_file'] = files[1][0]
44
+
45
+ # Extract operations
46
+ if any(word in text for word in ['resize', 'scale', 'resolution']):
47
+ # Extract resolution
48
+ res_pattern = r'(\d{3,4})x(\d{3,4})'
49
+ res_match = re.search(res_pattern, text)
50
+ if res_match:
51
+ params['resolution'] = f"{res_match.group(1)}x{res_match.group(2)}"
52
+ elif '720p' in text:
53
+ params['resolution'] = "1280x720"
54
+ elif '1080p' in text:
55
+ params['resolution'] = "1920x1080"
56
+ elif '4k' in text:
57
+ params['resolution'] = "3840x2160"
58
+
59
+ if any(word in text for word in ['compress', 'reduce', 'smaller']):
60
+ params['operations'].append('compress')
61
+ if 'high' in text:
62
+ params['quality'] = 'high'
63
+ elif 'low' in text:
64
+ params['quality'] = 'low'
65
+
66
+ if any(word in text for word in ['convert', 'format']):
67
+ # Extract format
68
+ format_pattern = r'\.(mp4|avi|mov|mkv|flv|webm|mp3|wav|flac|aac)'
69
+ if params['output_file']:
70
+ format_match = re.search(format_pattern, params['output_file'])
71
+ if format_match:
72
+ params['format'] = format_match.group(1)
73
+
74
+ if 'fps' in text:
75
+ fps_pattern = r'(\d+)\s*fps'
76
+ fps_match = re.search(fps_pattern, text)
77
+ if fps_match:
78
+ params['fps'] = int(fps_match.group(1))
79
+
80
+ if 'trim' in text or 'cut' in text:
81
+ params['operations'].append('trim')
82
+ # Extract time patterns
83
+ time_pattern = r'(\d{1,2}):(\d{2}):(\d{2})'
84
+ times = re.findall(time_pattern, text)
85
+ if len(times) >= 1:
86
+ params['start_time'] = f"{times[0][0]}:{times[0][1]}:{times[0][2]}"
87
+ if len(times) >= 2:
88
+ duration = self._calculate_duration(times[0], times[1])
89
+ params['duration'] = duration
90
+
91
+ if any(word in text for word in ['extract audio', 'audio only']):
92
+ params['operations'].append('extract_audio')
93
+
94
+ if 'mute' in text:
95
+ params['audio_ops'].append('mute')
96
+
97
+ if 'normalize' in text:
98
+ params['audio_ops'].append('normalize')
99
+
100
+ return params
101
+
102
+ def _calculate_duration(self, start: Tuple, end: Tuple) -> str:
103
+ """Calculate duration between two timestamps."""
104
+ start_seconds = int(start[0]) * 3600 + int(start[1]) * 60 + int(start[2])
105
+ end_seconds = int(end[0]) * 3600 + int(end[1]) * 60 + int(end[2])
106
+ duration_seconds = end_seconds - start_seconds
107
+ hours = duration_seconds // 3600
108
+ minutes = (duration_seconds % 3600) // 60
109
+ seconds = duration_seconds % 60
110
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
111
+
112
+ def generate_command(self, params: Dict[str, Any]) -> str:
113
+ """Generate FFmpeg command from parameters."""
114
+ if not params['input_file']:
115
+ return "Error: No input file specified"
116
+
117
+ cmd = ['ffmpeg', '-i', shlex.quote(params['input_file'])]
118
+
119
+ # Add start time if specified
120
+ if params['start_time']:
121
+ cmd.extend(['-ss', params['start_time']])
122
+
123
+ # Add duration if specified
124
+ if params['duration']:
125
+ cmd.extend(['-t', params['duration']])
126
+
127
+ # Video operations
128
+ if params['resolution']:
129
+ cmd.extend(['-vf', f"scale={params['resolution']}"])
130
+
131
+ if params['fps']:
132
+ cmd.extend(['-r', str(params['fps'])])
133
+
134
+ # Quality settings
135
+ if params['quality'] == 'high':
136
+ cmd.extend(['-crf', '18'])
137
+ elif params['quality'] == 'low':
138
+ cmd.extend(['-crf', '28'])
139
+ else:
140
+ cmd.extend(['-crf', '23'])
141
+
142
+ # Audio operations
143
+ if 'mute' in params['audio_ops']:
144
+ cmd.extend(['-an'])
145
+
146
+ if 'normalize' in params['audio_ops']:
147
+ cmd.extend(['-af', 'loudnorm=I=-16:LRA=11:TP=-1.5'])
148
+
149
+ if params['operations'] == ['extract_audio']:
150
+ cmd.extend(['-vn', '-acodec', 'copy'])
151
+
152
+ # Output file
153
+ if params['output_file']:
154
+ cmd.append(shlex.quote(params['output_file']))
155
+ else:
156
+ # Generate output filename
157
+ input_name = os.path.splitext(params['input_file'])[0]
158
+ output_ext = params['format'] or 'mp4'
159
+ cmd.append(f"{input_name}_processed.{output_ext}")
160
+
161
+ # Add overwrite flag
162
+ cmd.append('-y')
163
+
164
+ return ' '.join(cmd)
165
+
166
+ def execute_command(self, command: str) -> Tuple[str, str]:
167
+ """Execute FFmpeg command and return output."""
168
+ try:
169
+ # Parse command safely
170
+ cmd_args = shlex.split(command)
171
+
172
+ # Execute command
173
+ result = subprocess.run(
174
+ cmd_args,
175
+ capture_output=True,
176
+ text=True,
177
+ timeout=300 # 5 minute timeout
178
+ )
179
+
180
+ # Store in history
181
+ self.command_history.append({
182
+ 'command': command,
183
+ 'stdout': result.stdout,
184
+ 'stderr': result.stderr,
185
+ 'returncode': result.returncode
186
+ })
187
+
188
+ if result.returncode == 0:
189
+ return "βœ… Command executed successfully!", result.stdout
190
+ else:
191
+ return f"❌ Error (code {result.returncode})", result.stderr
192
+
193
+ except subprocess.TimeoutExpired:
194
+ return "❌ Error: Command timed out after 5 minutes", ""
195
+ except Exception as e:
196
+ return f"❌ Error: {str(e)}", ""
197
+
198
+ def get_preset_commands(self) -> List[Dict[str, str]]:
199
+ """Get list of preset commands with descriptions."""
200
+ return [
201
+ {
202
+ "name": "Compress Video",
203
+ "description": "Compress video for web sharing",
204
+ "template": "Compress '{input}' to '{output}' with medium quality"
205
+ },
206
+ {
207
+ "name": "Resize to 1080p",
208
+ "description": "Resize video to 1080p resolution",
209
+ "template": "Resize '{input}' to 1920x1080 and save as '{output}'"
210
+ },
211
+ {
212
+ "name": "Extract Audio",
213
+ "description": "Extract audio from video file",
214
+ "template": "Extract audio from '{input}' and save as '{output}.mp3'"
215
+ },
216
+ {
217
+ "name": "Trim Video",
218
+ "description": "Trim video between timestamps",
219
+ "template": "Trim '{input}' from 00:00:10 to 00:00:30 and save as '{output}'"
220
+ },
221
+ {
222
+ "name": "Convert to MP4",
223
+ "description": "Convert any video to MP4 format",
224
+ "template": "Convert '{input}' to MP4 format and save as '{output}'"
225
+ },
226
+ {
227
+ "name": "Change FPS",
228
+ "description": "Change video frame rate",
229
+ "template": "Change '{input}' to 30 fps and save as '{output}'"
230
+ }
231
+ ]
232
+
233
+ # Initialize the agent
234
+ agent = FFmpegAgent()
235
+
236
+ def process_request(user_input: str, input_file: Optional[str] = None, output_file: Optional[str] = None) -> Tuple[str, str, str]:
237
+ """Process user request and generate/execute FFmpeg command."""
238
+
239
+ # If files are provided, update the input
240
+ if input_file:
241
+ user_input = user_input.replace("{input}", f'"{input_file}"')
242
+ if output_file:
243
+ user_input = user_input.replace("{output}", f'"{output_file}"')
244
+
245
+ # Parse the natural language
246
+ params = agent.parse_natural_language(user_input)
247
+
248
+ # Generate command
249
+ command = agent.generate_command(params)
250
+
251
+ # Format the parsed parameters for display
252
+ param_text = json.dumps(params, indent=2, default=str)
253
+
254
+ return command, param_text, ""
255
+
256
+ def execute_ffmpeg_command(command: str) -> Tuple[str, str]:
257
+ """Execute the generated FFmpeg command."""
258
+ status, output = agent.execute_command(command)
259
+ return status, output
260
+
261
+ def load_preset(preset_name: str) -> str:
262
+ """Load a preset command template."""
263
+ presets = agent.get_preset_commands()
264
+ for preset in presets:
265
+ if preset["name"] == preset_name:
266
+ return preset["template"]
267
+ return ""
268
+
269
+ def get_command_history() -> str:
270
+ """Get formatted command history."""
271
+ if not agent.command_history:
272
+ return "No commands executed yet."
273
+
274
+ history_text = []
275
+ for i, cmd in enumerate(agent.command_history[-10:], 1):
276
+ status = "βœ…" if cmd["returncode"] == 0 else "❌"
277
+ history_text.append(f"{i}. {status} {cmd['command']}")
278
+ if cmd["stderr"]:
279
+ history_text.append(f" Error: {cmd['stderr'][:100]}...")
280
+
281
+ return "\n".join(history_text)
282
+
283
+ # Create the Gradio interface
284
+ with gr.Blocks() as demo:
285
+ gr.Markdown("# 🎬 AI FFmpeg Agent")
286
+ gr.Markdown("Generate and execute FFmpeg commands using natural language. [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
287
+
288
+ with gr.Row():
289
+ with gr.Column(scale=2):
290
+ gr.Markdown("## πŸ“ Input")
291
+
292
+ # User input
293
+ user_input = gr.Textbox(
294
+ label="Describe what you want to do with your media file",
295
+ placeholder="e.g., Resize 'video.mp4' to 720p and compress it, or use the presets below",
296
+ lines=3
297
+ )
298
+
299
+ # File inputs
300
+ with gr.Row():
301
+ input_file = gr.File(
302
+ label="Input File (Optional)",
303
+ file_types=["video", "audio"]
304
+ )
305
+ output_filename = gr.Textbox(
306
+ label="Output Filename (Optional)",
307
+ placeholder="output.mp4"
308
+ )
309
+
310
+ # Preset buttons
311
+ gr.Markdown("### πŸš€ Quick Presets")
312
+ with gr.Row():
313
+ preset_choices = [p["name"] for p in agent.get_preset_commands()]
314
+ preset_dropdown = gr.Dropdown(
315
+ choices=preset_choices,
316
+ label="Select Preset",
317
+ value=None
318
+ )
319
+
320
+ # Generate button
321
+ generate_btn = gr.Button("πŸ”§ Generate Command", variant="primary")
322
+
323
+ with gr.Column(scale=1):
324
+ gr.Markdown("## βš™οΈ Parsed Parameters")
325
+ params_display = gr.Code(
326
+ label="Parsed Parameters",
327
+ language="json",
328
+ lines=10
329
+ )
330
+
331
+ # Generated command section
332
+ gr.Markdown("## 🎯 Generated Command")
333
+ command_output = gr.Code(
334
+ label="FFmpeg Command",
335
+ language="bash",
336
+ lines=3
337
+ )
338
+
339
+ with gr.Row():
340
+ execute_btn = gr.Button("▢️ Execute Command", variant="primary")
341
+ clear_btn = gr.Button("πŸ—‘οΈ Clear")
342
+
343
+ # Execution results
344
+ gr.Markdown("## πŸ“Š Execution Results")
345
+ with gr.Row():
346
+ with gr.Column():
347
+ status_output = gr.Textbox(
348
+ label="Status",
349
+ interactive=False
350
+ )
351
+ with gr.Column():
352
+ result_output = gr.Code(
353
+ label="Output",
354
+ language="text",
355
+ lines=10
356
+ )
357
+
358
+ # Command history
359
+ with gr.Accordion("πŸ“š Command History", open=False):
360
+ history_output = gr.Code(
361
+ label="Recent Commands",
362
+ language="text",
363
+ lines=8,
364
+ value=get_command_history()
365
+ )
366
+ refresh_history_btn = gr.Button("πŸ”„ Refresh History")
367
+
368
+ # Examples
369
+ gr.Markdown("## πŸ’‘ Examples")
370
+ gr.Examples(
371
+ examples=[
372
+ ["Resize 'video.mp4' to 720p and compress it"],
373
+ ["Extract audio from 'movie.mkv' and save as 'audio.mp3'"],
374
+ ["Convert 'video.avi' to MP4 format with high quality"],
375
+ ["Trim 'clip.mp4' from 00:00:10 to 00:00:30"],
376
+ ["Change 'video.mov' to 30 fps and save as 'output.mp4'"],
377
+ ["Compress 'large_video.mp4' for web sharing"],
378
+ ["Mute the audio in 'video_with_sound.mp4'"]
379
+ ],
380
+ inputs=[user_input],
381
+ label="Try these examples:"
382
+ )
383
+
384
+ # Event handlers
385
+ def handle_preset_change(preset_name):
386
+ return load_preset(preset_name) if preset_name else ""
387
+
388
+ preset_dropdown.change(
389
+ handle_preset_change,
390
+ inputs=[preset_dropdown],
391
+ outputs=[user_input]
392
+ )
393
+
394
+ generate_btn.click(
395
+ process_request,
396
+ inputs=[user_input, input_file, output_filename],
397
+ outputs=[command_output, params_display, status_output]
398
+ )
399
+
400
+ execute_btn.click(
401
+ execute_ffmpeg_command,
402
+ inputs=[command_output],
403
+ outputs=[status_output, result_output]
404
+ )
405
+
406
+ clear_btn.click(
407
+ lambda: ("", "", "", "", ""),
408
+ outputs=[user_input, command_output, params_display, status_output, result_output]
409
+ )
410
+
411
+ refresh_history_btn.click(
412
+ get_command_history,
413
+ outputs=[history_output]
414
+ )
415
+
416
+ # Auto-refresh history when a command is executed
417
+ def execute_and_refresh(command):
418
+ status, output = execute_ffmpeg_command(command)
419
+ return status, output, get_command_history()
420
+
421
+ execute_btn.click(
422
+ execute_and_refresh,
423
+ inputs=[command_output],
424
+ outputs=[status_output, result_output, history_output],
425
+ show_progress="full"
426
+ )
427
+
428
+ demo.launch(
429
+ theme=gr.themes.Soft(
430
+ primary_hue="blue",
431
+ secondary_hue="indigo",
432
+ neutral_hue="slate",
433
+ font=gr.themes.GoogleFont("Inter"),
434
+ text_size="lg",
435
+ spacing_size="lg",
436
+ radius_size="md"
437
+ ),
438
+ footer_links=[
439
+ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
440
+ {"label": "FFmpeg Documentation", "url": "https://ffmpeg.org/documentation.html"}
441
+ ]
442
+ )
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=6.0
2
+ requests
3
+ Pillow