lvvignesh2122 commited on
Commit
04f489d
·
verified ·
1 Parent(s): 3aef6de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -43
app.py CHANGED
@@ -9,69 +9,125 @@ from faster_whisper import WhisperModel
9
 
10
  # --- Helper functions ---
11
  def _format_timestamp(seconds: float) -> str:
12
- ms = int(round(seconds * 1000))
13
- hours = ms // 3600000
14
- ms_rem = ms % 3600000
15
- minutes = ms_rem // 60000
16
- ms_rem = ms_rem % 60000
17
- secs = ms_rem // 1000
18
- millis = ms_rem % 1000
19
- return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
20
-
21
-
22
 
23
 
24
  def segments_to_srt(segments: list) -> str:
25
- lines = []
26
- for i, seg in enumerate(segments, start=1):
27
- start_ts = _format_timestamp(seg['start'])
28
- end_ts = _format_timestamp(seg['end'])
29
- text = seg['text'].replace('\n', ' ').strip()
30
- if not text:
31
- continue
32
- block = f"{i}\n{start_ts} --> {end_ts}\n{text}\n"
33
- lines.append(block)
34
- return "\n".join(lines)
35
-
36
-
37
 
38
 
39
  # --- Configuration ---
40
- MODEL_NAME = "Systran/faster-whisper-small" # small model for HF free CPU
41
  DEVICE = "cpu"
42
  OUTPUT_DIR = Path("outputs/subtitles")
43
  OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
44
 
45
-
46
  print(f"Loading model {MODEL_NAME} on {DEVICE} ...")
47
  model = WhisperModel(MODEL_NAME, device=DEVICE)
48
  print("Model loaded.")
49
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
- def extract_audio(input_path: str, out_path: str):
54
- try:
55
- _ = (
56
- ffmpeg
57
- .input(input_path)
58
- .output(out_path, format='wav', acodec='pcm_s16le', ac=1, ar='16000')
59
- .overwrite_output()
60
- .run(quiet=True)
61
- )
62
- except ffmpeg.Error as e:
63
- stderr = getattr(e, 'stderr', None)
64
- msg = stderr.decode() if stderr else str(e)
65
- raise RuntimeError(f"ffmpeg error: {msg}")
66
 
 
 
67
 
 
 
 
68
 
 
 
69
 
70
- def transcribe_file_to_srt(file_obj, language: str = "en"):
71
- filename = getattr(file_obj, 'name', getattr(file_obj, 'filename', f"upload_{uuid.uuid4()}.bin"))
72
- input_filepath = Path(filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
 
75
- tmp_dir = Path(tempfile.mkdtemp(prefix="subgen_"))
76
- saved_input = tmp_dir / (str(uuid.uuid4()) + input_filepath.suffix)
77
- demo.launch(share=True)
 
9
 
10
  # --- Helper functions ---
11
  def _format_timestamp(seconds: float) -> str:
12
+ ms = int(round(seconds * 1000))
13
+ hours = ms // 3600000
14
+ ms_rem = ms % 3600000
15
+ minutes = ms_rem // 60000
16
+ ms_rem = ms_rem % 60000
17
+ secs = ms_rem // 1000
18
+ millis = ms_rem % 1000
19
+ return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
 
 
20
 
21
 
22
  def segments_to_srt(segments: list) -> str:
23
+ lines = []
24
+ for i, seg in enumerate(segments, start=1):
25
+ start_ts = _format_timestamp(seg['start'])
26
+ end_ts = _format_timestamp(seg['end'])
27
+ text = seg['text'].replace('\n', ' ').strip()
28
+ if not text:
29
+ continue
30
+ block = f"{i}\n{start_ts} --> {end_ts}\n{text}\n"
31
+ lines.append(block)
32
+ return "\n".join(lines)
 
 
33
 
34
 
35
  # --- Configuration ---
36
+ MODEL_NAME = "Systran/faster-whisper-small" # small model for HF free CPU
37
  DEVICE = "cpu"
38
  OUTPUT_DIR = Path("outputs/subtitles")
39
  OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
40
 
 
41
  print(f"Loading model {MODEL_NAME} on {DEVICE} ...")
42
  model = WhisperModel(MODEL_NAME, device=DEVICE)
43
  print("Model loaded.")
44
 
45
 
46
+ def extract_audio(input_path: str, out_path: str):
47
+ try:
48
+ (
49
+ ffmpeg
50
+ .input(input_path)
51
+ .output(out_path, format='wav', acodec='pcm_s16le', ac=1, ar='16000')
52
+ .overwrite_output()
53
+ .run(quiet=True)
54
+ )
55
+ except ffmpeg.Error as e:
56
+ stderr = getattr(e, 'stderr', None)
57
+ msg = stderr.decode() if stderr else str(e)
58
+ raise RuntimeError(f"ffmpeg error: {msg}")
59
 
60
 
61
+ def transcribe_file_to_srt(file_obj, language: str = "en"):
62
+ filename = getattr(file_obj, 'name', getattr(file_obj, 'filename', f"upload_{uuid.uuid4()}.bin"))
63
+ input_filepath = Path(filename)
 
 
 
 
 
 
 
 
 
 
64
 
65
+ tmp_dir = Path(tempfile.mkdtemp(prefix="subgen_"))
66
+ saved_input = tmp_dir / (str(uuid.uuid4()) + input_filepath.suffix)
67
 
68
+ file_obj.seek(0)
69
+ with open(saved_input, 'wb') as f:
70
+ f.write(file_obj.read())
71
 
72
+ wav_path = tmp_dir / (saved_input.stem + ".wav")
73
+ extract_audio(str(saved_input), str(wav_path))
74
 
75
+ print("Starting transcription... This may take a while depending on file length and model.")
76
+ segments, info = model.transcribe(str(wav_path), language=language)
77
+
78
+ segs = []
79
+ for seg in segments:
80
+ start = getattr(seg, 'start', seg.get('start') if isinstance(seg, dict) else None)
81
+ end = getattr(seg, 'end', seg.get('end') if isinstance(seg, dict) else None)
82
+ text = getattr(seg, 'text', seg.get('text') if isinstance(seg, dict) else '')
83
+ if start is None or end is None:
84
+ continue
85
+ segs.append({
86
+ 'start': float(start),
87
+ 'end': float(end),
88
+ 'text': str(text).strip()
89
+ })
90
+
91
+ srt_text = segments_to_srt(segs)
92
+ out_name = f"subtitles_{saved_input.stem}.srt"
93
+ out_path = OUTPUT_DIR / out_name
94
+
95
+ with open(out_path, 'w', encoding='utf-8') as f:
96
+ f.write(srt_text)
97
+
98
+ print(f"Saved SRT to {out_path}")
99
+ return out_path
100
+
101
+
102
+ # --- Gradio UI ---
103
+ def generate_and_return(file):
104
+ try:
105
+ srt_path = transcribe_file_to_srt(file, language="en")
106
+ return srt_path
107
+ except Exception as e:
108
+ return None, str(e)
109
+
110
+
111
+ with gr.Blocks(title="AI Subtitle Generator — English (.srt)") as demo:
112
+ gr.Markdown("# 🎬 AI Subtitle Generator (English)\nUpload a video or audio file and download the generated .srt subtitles.")
113
+
114
+ with gr.Row():
115
+ inp = gr.File(label="Upload video or audio file (.mp4, .mkv, .mp3, ...)")
116
+ out = gr.File(label="Download generated .srt file")
117
+
118
+ generate_btn = gr.Button("Generate Subtitles")
119
+ status = gr.Textbox(label="Status", interactive=False)
120
+
121
+ def on_click(file):
122
+ status.value = "Processing..."
123
+ path = transcribe_file_to_srt(file, language='en')
124
+ status.value = f"Done — saved: {path}"
125
+ return path
126
+
127
+ generate_btn.click(on_click, inputs=[inp], outputs=[out, status])
128
+
129
+ gr.Markdown("---\n⚙️ **Note:** Make sure ffmpeg is installed. This may take time on CPU. Use a smaller model for faster processing.")
130
 
131
 
132
+ if __name__ == "__main__":
133
+ demo.launch(share=True)