aheedsajid commited on
Commit
1ce9e98
Β·
verified Β·
1 Parent(s): eb71023

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +571 -0
  2. requirements.txt +11 -0
app.py ADDED
@@ -0,0 +1,571 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import os
4
+ import tempfile
5
+ import shutil
6
+ from pathlib import Path
7
+ import json
8
+ import datetime
9
+ import csv
10
+ from pydub import AudioSegment
11
+ import numpy as np
12
+ import torch
13
+ import gc
14
+ from dotenv import load_dotenv
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+
19
+ # Import NeMo for transcription (you'll need to install: pip install nemo_toolkit[asr])
20
+ try:
21
+ from nemo.collections.asr.models import ASRModel
22
+ NEMO_AVAILABLE = True
23
+ except ImportError:
24
+ NEMO_AVAILABLE = False
25
+ print("Warning: NeMo not available. Auto-transcription will be disabled.")
26
+
27
+ class AutomatedSubtitleBurner:
28
+ def __init__(self):
29
+ self.temp_dir = tempfile.mkdtemp()
30
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
31
+ self.model = None
32
+
33
+ # Load transcription model if available
34
+ if NEMO_AVAILABLE:
35
+ try:
36
+ MODEL_NAME = os.getenv('MODEL_NAME')
37
+ if MODEL_NAME:
38
+ self.model = ASRModel.from_pretrained(model_name=MODEL_NAME)
39
+ self.model.eval()
40
+ except Exception as e:
41
+ self.model = None
42
+
43
+ def extract_audio_from_video(self, video_path):
44
+ """Extract audio from video file for transcription"""
45
+ try:
46
+ audio_path = os.path.join(self.temp_dir, "extracted_audio.wav")
47
+
48
+ # Use FFmpeg to extract audio
49
+ cmd = [
50
+ 'ffmpeg', '-y', '-i', video_path,
51
+ '-vn', # No video
52
+ '-acodec', 'pcm_s16le', # Audio codec
53
+ '-ar', '16000', # Sample rate
54
+ '-ac', '1', # Mono
55
+ audio_path
56
+ ]
57
+
58
+ subprocess.run(cmd, capture_output=True, check=True)
59
+ return audio_path
60
+
61
+ except Exception as e:
62
+ print(f"Error extracting audio: {e}")
63
+ return None
64
+
65
+ def format_srt_time(self, seconds: float) -> str:
66
+ """Converts seconds to SRT time format HH:MM:SS,mmm"""
67
+ sanitized_total_seconds = max(0.0, seconds)
68
+ delta = datetime.timedelta(seconds=sanitized_total_seconds)
69
+ total_int_seconds = int(delta.total_seconds())
70
+
71
+ hours = total_int_seconds // 3600
72
+ remainder_seconds_after_hours = total_int_seconds % 3600
73
+ minutes = remainder_seconds_after_hours // 60
74
+ seconds_part = remainder_seconds_after_hours % 60
75
+ milliseconds = delta.microseconds // 1000
76
+
77
+ return f"{hours:02d}:{minutes:02d}:{seconds_part:02d},{milliseconds:03d}"
78
+
79
+ def generate_srt_content(self, word_timestamps: list) -> str:
80
+ """Generates SRT formatted string from word timestamps"""
81
+ srt_content = []
82
+ for i, ts in enumerate(word_timestamps):
83
+ start_time = self.format_srt_time(ts['start'])
84
+ end_time = self.format_srt_time(ts['end'])
85
+ text = ts['word']
86
+ srt_content.append(str(i + 1))
87
+ srt_content.append(f"{start_time} --> {end_time}")
88
+ srt_content.append(text)
89
+ srt_content.append("")
90
+ return "\n".join(srt_content)
91
+
92
+ def transcribe_audio(self, audio_path, progress_callback=None):
93
+ """Transcribe audio to get word-level timestamps"""
94
+ if not self.model or not NEMO_AVAILABLE:
95
+ return None, "Transcription model not available"
96
+
97
+ try:
98
+ if progress_callback:
99
+ progress_callback(0.1, "Loading audio...")
100
+
101
+ # Load and preprocess audio
102
+ audio = AudioSegment.from_file(audio_path)
103
+ duration_sec = audio.duration_seconds
104
+
105
+ if progress_callback:
106
+ progress_callback(0.2, "Preprocessing audio...")
107
+
108
+ # Ensure audio is in correct format
109
+ if audio.frame_rate != 16000:
110
+ audio = audio.set_frame_rate(16000)
111
+ if audio.channels != 1:
112
+ audio = audio.set_channels(1)
113
+
114
+ # Save preprocessed audio
115
+ processed_path = os.path.join(self.temp_dir, "processed_audio.wav")
116
+ audio.export(processed_path, format="wav")
117
+
118
+ if progress_callback:
119
+ progress_callback(0.3, "Starting transcription...")
120
+
121
+ # Configure model for long audio if needed
122
+ long_audio_settings_applied = False
123
+ if duration_sec > 480: # 8 minutes
124
+ try:
125
+ print("Applying long audio settings for transcription...")
126
+ self.model.change_attention_model("rel_pos_local_attn", [256, 256])
127
+ self.model.change_subsampling_conv_chunking_factor(1)
128
+ long_audio_settings_applied = True
129
+ except Exception as e:
130
+ print(f"Warning: Could not apply long audio settings: {e}")
131
+
132
+ # Move model to appropriate device and precision
133
+ self.model.to(self.device)
134
+ self.model.to(torch.bfloat16)
135
+
136
+ if progress_callback:
137
+ progress_callback(0.5, "Transcribing (this may take a while)...")
138
+
139
+ # Transcribe with timestamps
140
+ output = self.model.transcribe([processed_path], timestamps=True)
141
+
142
+ if progress_callback:
143
+ progress_callback(0.8, "Processing transcription results...")
144
+
145
+ if not output or not output[0] or not hasattr(output[0], 'timestamp'):
146
+ return None, "Transcription failed - no output generated"
147
+
148
+ # Get word-level timestamps
149
+ word_timestamps = output[0].timestamp.get('word', [])
150
+
151
+ if not word_timestamps:
152
+ return None, "No word-level timestamps generated"
153
+
154
+ # Generate SRT content
155
+ srt_content = self.generate_srt_content(word_timestamps)
156
+
157
+ if progress_callback:
158
+ progress_callback(1.0, "Transcription complete!")
159
+
160
+ return srt_content, "Transcription successful!"
161
+
162
+ except torch.cuda.OutOfMemoryError:
163
+ return None, "CUDA out of memory. Please try a shorter video or use CPU."
164
+ except Exception as e:
165
+ return None, f"Transcription error: {str(e)}"
166
+ finally:
167
+ # Cleanup model settings and memory
168
+ try:
169
+ if long_audio_settings_applied and self.model:
170
+ self.model.change_attention_model("rel_pos")
171
+ self.model.change_subsampling_conv_chunking_factor(-1)
172
+
173
+ if self.model and self.device == 'cuda':
174
+ self.model.cpu()
175
+ gc.collect()
176
+ if self.device == 'cuda':
177
+ torch.cuda.empty_cache()
178
+ except Exception as e:
179
+ print(f"Warning: Error during cleanup: {e}")
180
+
181
+ def auto_generate_srt(self, video_file, progress=gr.Progress()):
182
+ """Automatically generate SRT from video"""
183
+ if not video_file:
184
+ return "", "Please provide a video file"
185
+
186
+ if not self.model or not NEMO_AVAILABLE:
187
+ return "", "Transcription model not available. Please install NeMo toolkit."
188
+
189
+ try:
190
+ progress(0.05, desc="Extracting audio from video...")
191
+
192
+ # Extract audio from video
193
+ audio_path = self.extract_audio_from_video(video_file)
194
+ if not audio_path:
195
+ return "", "Failed to extract audio from video"
196
+
197
+ progress(0.1, desc="Audio extracted, starting transcription...")
198
+
199
+ # Transcribe audio
200
+ def progress_callback(value, desc):
201
+ progress(0.1 + (value * 0.8), desc=desc)
202
+
203
+ srt_content, message = self.transcribe_audio(audio_path, progress_callback)
204
+
205
+ progress(0.95, desc="Finalizing...")
206
+
207
+ if srt_content:
208
+ progress(1.0, desc="SRT generation complete!")
209
+ return srt_content, message
210
+ else:
211
+ return "", message
212
+
213
+ except Exception as e:
214
+ return "", f"Error generating SRT: {str(e)}"
215
+
216
+ def create_styled_srt(self, srt_content, font_size=24, font_color="white",
217
+ outline_color="black", outline_width=1):
218
+ """Create a styled SRT file with ASS-style formatting"""
219
+ lines = srt_content.strip().split('\n')
220
+ styled_lines = []
221
+
222
+ i = 0
223
+ while i < len(lines):
224
+ if lines[i].strip().isdigit(): # Subtitle number
225
+ styled_lines.append(lines[i])
226
+ i += 1
227
+
228
+ if i < len(lines): # Timestamp
229
+ styled_lines.append(lines[i])
230
+ i += 1
231
+
232
+ # Collect all text lines for this subtitle
233
+ text_lines = []
234
+ while i < len(lines) and lines[i].strip() != "":
235
+ text_lines.append(lines[i])
236
+ i += 1
237
+
238
+ # Apply styling to text
239
+ if text_lines:
240
+ styled_text = ' '.join(text_lines)
241
+ # Add basic styling tags
242
+ styled_text = f"<font size='{font_size}' color='{font_color}'>{styled_text}</font>"
243
+ styled_lines.append(styled_text)
244
+
245
+ styled_lines.append("") # Empty line separator
246
+ else:
247
+ i += 1
248
+
249
+ return '\n'.join(styled_lines)
250
+
251
+ def get_video_info(self, video_path):
252
+ """Get video information using ffprobe"""
253
+ try:
254
+ cmd = [
255
+ 'ffprobe', '-v', 'quiet', '-print_format', 'json',
256
+ '-show_format', '-show_streams', video_path
257
+ ]
258
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
259
+ info = json.loads(result.stdout)
260
+
261
+ # Find video stream
262
+ video_stream = None
263
+ for stream in info['streams']:
264
+ if stream['codec_type'] == 'video':
265
+ video_stream = stream
266
+ break
267
+
268
+ if video_stream:
269
+ width = int(video_stream['width'])
270
+ height = int(video_stream['height'])
271
+ duration = float(video_stream.get('duration', 0))
272
+ return width, height, duration
273
+
274
+ except Exception as e:
275
+ print(f"Error getting video info: {e}")
276
+
277
+ return 1920, 1080, 0 # Default values
278
+
279
+ def burn_subtitles(self, video_file, srt_content, font_size=24, font_color="white",
280
+ position="bottom_center", outline_color="black", outline_width=1,
281
+ progress=gr.Progress()):
282
+ """Burn subtitles into video using FFmpeg"""
283
+
284
+ if not video_file or not srt_content.strip():
285
+ return None, "Please provide both video file and SRT content"
286
+
287
+ try:
288
+ progress(0.1, desc="Preparing files...")
289
+
290
+ # Create temporary SRT file
291
+ srt_path = os.path.join(self.temp_dir, "subtitles.srt")
292
+ styled_srt = self.create_styled_srt(srt_content, font_size, font_color,
293
+ outline_color, outline_width)
294
+
295
+ with open(srt_path, 'w', encoding='utf-8') as f:
296
+ f.write(styled_srt)
297
+
298
+ progress(0.2, desc="Getting video information...")
299
+
300
+ # Get video info
301
+ width, height, duration = self.get_video_info(video_file)
302
+
303
+ progress(0.3, desc="Starting subtitle burning...")
304
+
305
+ # Output file
306
+ output_filename = f"output_with_subtitles_{font_size}px.mp4"
307
+ output_path = os.path.join(self.temp_dir, output_filename)
308
+
309
+ # Build FFmpeg command with subtitle filter
310
+ cmd = [
311
+ 'ffmpeg', '-y', # Overwrite output files
312
+ '-i', video_file,
313
+ '-vf', f"""subtitles='{srt_path}':force_style='FontSize={font_size},PrimaryColour=&H{self.color_to_bgr_hex(font_color)},OutlineColour=&H{self.color_to_bgr_hex(outline_color)},Outline={outline_width},Alignment=2'""",
314
+ '-c:a', 'copy', # Copy audio without re-encoding
315
+ '-c:v', 'libx264', # Video codec
316
+ '-preset', 'medium', # Encoding preset
317
+ '-crf', '23', # Quality setting
318
+ output_path
319
+ ]
320
+
321
+ progress(0.4, desc="Processing video (this may take a while)...")
322
+
323
+ # Run FFmpeg
324
+ process = subprocess.Popen(
325
+ cmd,
326
+ stdout=subprocess.PIPE,
327
+ stderr=subprocess.PIPE,
328
+ universal_newlines=True
329
+ )
330
+
331
+ # Monitor progress
332
+ while True:
333
+ output = process.stderr.readline()
334
+ if output == '' and process.poll() is not None:
335
+ break
336
+ if output and 'time=' in output:
337
+ # Try to extract time for progress
338
+ try:
339
+ time_str = output.split('time=')[1].split()[0]
340
+ time_parts = time_str.split(':')
341
+ current_seconds = (float(time_parts[0]) * 3600 +
342
+ float(time_parts[1]) * 60 +
343
+ float(time_parts[2]))
344
+ if duration > 0:
345
+ prog = 0.4 + (current_seconds / duration) * 0.5
346
+ progress(min(prog, 0.9), desc=f"Processing: {time_str}")
347
+ except:
348
+ pass
349
+
350
+ progress(0.95, desc="Finalizing...")
351
+
352
+ return_code = process.poll()
353
+ if return_code == 0:
354
+ progress(1.0, desc="Complete!")
355
+ return output_path, "Video processed successfully!"
356
+ else:
357
+ error_output = process.stderr.read()
358
+ return None, f"FFmpeg error: {error_output}"
359
+
360
+ except Exception as e:
361
+ return None, f"Error processing video: {str(e)}"
362
+
363
+ def color_to_bgr_hex(self, color):
364
+ """Convert color name to BGR hex for FFmpeg"""
365
+ color_map = {
366
+ 'white': 'FFFFFF',
367
+ 'black': '000000',
368
+ 'red': '0000FF',
369
+ 'green': '00FF00',
370
+ 'blue': 'FF0000',
371
+ 'yellow': '00FFFF',
372
+ 'cyan': 'FFFF00',
373
+ 'magenta': 'FF00FF',
374
+ 'orange': '0080FF',
375
+ 'purple': '800080',
376
+ 'pink': 'FFB6C1',
377
+ 'gray': '808080',
378
+ 'grey': '808080'
379
+ }
380
+ return color_map.get(color.lower(), 'FFFFFF')
381
+
382
+ def preview_subtitles(self, srt_content, font_size, font_color, position):
383
+ """Generate a preview of how subtitles will look"""
384
+ if not srt_content.strip():
385
+ return "No SRT content provided"
386
+
387
+ lines = srt_content.strip().split('\n')
388
+ preview_lines = []
389
+
390
+ # Extract first few subtitles for preview
391
+ subtitle_count = 0
392
+ i = 0
393
+
394
+ while i < len(lines) and subtitle_count < 3:
395
+ if lines[i].strip().isdigit():
396
+ subtitle_num = lines[i].strip()
397
+ i += 1
398
+
399
+ if i < len(lines):
400
+ timestamp = lines[i].strip()
401
+ i += 1
402
+
403
+ text_lines = []
404
+ while i < len(lines) and lines[i].strip() != "":
405
+ text_lines.append(lines[i].strip())
406
+ i += 1
407
+
408
+ if text_lines:
409
+ text = ' '.join(text_lines)
410
+ preview_lines.append(f"#{subtitle_num} [{timestamp}]")
411
+ preview_lines.append(f"Text: \"{text}\"")
412
+ preview_lines.append(f"Style: {font_size}px {font_color} at {position}")
413
+ preview_lines.append("---")
414
+ subtitle_count += 1
415
+ else:
416
+ i += 1
417
+
418
+ return '\n'.join(preview_lines) if preview_lines else "No valid subtitles found"
419
+
420
+ # Initialize the subtitle burner
421
+ burner = AutomatedSubtitleBurner()
422
+
423
+ # Create Gradio interface
424
+ def create_interface():
425
+ with gr.Blocks(title="Automated AI Subtitle Video Captions", theme=gr.themes.Soft()) as demo:
426
+ gr.Markdown("# 🎬 Automated SRT Subtitle Video Burner")
427
+ gr.Markdown("Upload a video and either auto-generate subtitles or paste your own SRT content!")
428
+
429
+ if not NEMO_AVAILABLE:
430
+ gr.Markdown("⚠️ **Note**: Auto-transcription is disabled. Install NeMo toolkit for automatic SRT generation.")
431
+
432
+ with gr.Row():
433
+ with gr.Column(scale=1):
434
+ gr.Markdown("### πŸ“ Input")
435
+ video_input = gr.File(
436
+ label="Upload Video File",
437
+ file_types=[".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv"],
438
+ type="filepath"
439
+ )
440
+
441
+ with gr.Row():
442
+ if NEMO_AVAILABLE and burner.model:
443
+ auto_generate_btn = gr.Button("πŸ€– Auto-Generate SRT", variant="secondary")
444
+ else:
445
+ auto_generate_btn = gr.Button("πŸ€– Auto-Generate SRT (Disabled)", variant="secondary", interactive=False)
446
+
447
+ srt_input = gr.Textbox(
448
+ label="SRT Content (Auto-generated or Manual)",
449
+ placeholder="SRT content will appear here after auto-generation, or paste your own...",
450
+ lines=12,
451
+ max_lines=20
452
+ )
453
+
454
+ with gr.Column(scale=1):
455
+ gr.Markdown("### 🎨 Subtitle Styling")
456
+
457
+ font_size = gr.Slider(
458
+ minimum=8,
459
+ maximum=72,
460
+ value=24,
461
+ step=1,
462
+ label="Font Size (px)"
463
+ )
464
+
465
+ font_color = gr.Dropdown(
466
+ choices=["white", "black", "red", "green", "blue", "yellow",
467
+ "cyan", "magenta", "orange", "purple", "pink", "gray"],
468
+ value="white",
469
+ label="Font Color"
470
+ )
471
+
472
+ position = gr.Dropdown(
473
+ choices=["top_left", "top_center", "top_right",
474
+ "center_left", "center", "center_right",
475
+ "bottom_left", "bottom_center", "bottom_right"],
476
+ value="bottom_center",
477
+ label="Position"
478
+ )
479
+
480
+ outline_color = gr.Dropdown(
481
+ choices=["black", "white", "red", "green", "blue", "yellow",
482
+ "cyan", "magenta", "orange", "purple", "pink", "gray"],
483
+ value="black",
484
+ label="Outline Color"
485
+ )
486
+
487
+ outline_width = gr.Slider(
488
+ minimum=0,
489
+ maximum=5,
490
+ value=1,
491
+ step=1,
492
+ label="Outline Width"
493
+ )
494
+
495
+ with gr.Row():
496
+ with gr.Column():
497
+ gr.Markdown("### πŸ‘οΈ Preview")
498
+ preview_output = gr.Textbox(
499
+ label="Subtitle Preview",
500
+ lines=8,
501
+ interactive=False
502
+ )
503
+
504
+ preview_btn = gr.Button("πŸ” Preview Subtitles", variant="secondary")
505
+
506
+ with gr.Row():
507
+ process_btn = gr.Button("πŸ”₯ Burn Subtitles to Video", variant="primary", size="lg")
508
+
509
+ with gr.Row():
510
+ with gr.Column():
511
+ output_video = gr.File(label="Download Processed Video")
512
+ status_output = gr.Textbox(label="Status", interactive=False)
513
+
514
+ # Event handlers
515
+ if NEMO_AVAILABLE and burner.model:
516
+ auto_generate_btn.click(
517
+ fn=burner.auto_generate_srt,
518
+ inputs=[video_input],
519
+ outputs=[srt_input, status_output],
520
+ show_progress=True
521
+ )
522
+
523
+ preview_btn.click(
524
+ fn=burner.preview_subtitles,
525
+ inputs=[srt_input, font_size, font_color, position],
526
+ outputs=preview_output
527
+ )
528
+
529
+ process_btn.click(
530
+ fn=burner.burn_subtitles,
531
+ inputs=[video_input, srt_input, font_size, font_color, position,
532
+ outline_color, outline_width],
533
+ outputs=[output_video, status_output],
534
+ show_progress=True
535
+ )
536
+
537
+ # Auto-preview when inputs change
538
+ for input_component in [srt_input, font_size, font_color, position]:
539
+ input_component.change(
540
+ fn=burner.preview_subtitles,
541
+ inputs=[srt_input, font_size, font_color, position],
542
+ outputs=preview_output
543
+ )
544
+
545
+ return demo
546
+
547
+ if __name__ == "__main__":
548
+ # Check if FFmpeg is available
549
+ try:
550
+ subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
551
+ print("βœ… FFmpeg found!")
552
+ except (subprocess.CalledProcessError, FileNotFoundError):
553
+ print("❌ FFmpeg not found! Please install FFmpeg and make sure it's in your PATH.")
554
+ print("Download from: https://ffmpeg.org/download.html")
555
+ exit(1)
556
+
557
+ # Check transcription capability
558
+ if NEMO_AVAILABLE and burner.model:
559
+ print("βœ… Auto-transcription enabled!")
560
+ else:
561
+ print("⚠️ Auto-transcription disabled. Install NeMo toolkit for automatic SRT generation:")
562
+ print("pip install nemo_toolkit[asr]")
563
+
564
+ # Launch the interface
565
+ demo = create_interface()
566
+ demo.launch(
567
+ server_name="0.0.0.0",
568
+ server_port=7860,
569
+ share=True,
570
+ debug=True
571
+ )
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Cython
2
+ git+https://github.com/NVIDIA/NeMo.git@main#egg=nemo_toolkit[asr]
3
+ numpy<2.0
4
+ gradio
5
+ spaces
6
+ ffmpeg
7
+ pydub
8
+ ffmpeg-python
9
+ python-dotenv==1.0.0
10
+ torch
11
+ torchaudio