Pardeep0675's picture
Update app.py
e4e0f2c verified
import gradio as gr
from faster_whisper import WhisperModel
# 1. LOAD THE OPTIMIZED MODEL
# "small" is a great balance. You can try "medium" if you have a GPU.
# "int8" makes it run fast on CPU.
model_size = "small"
model = WhisperModel(model_size, device="cpu", compute_type="int8")
def generate_subtitles(audio_file, task_type, language_setting):
if audio_file is None:
return "Error: No file uploaded."
print(f"Processing... Task: {task_type}, Language: {language_setting}")
# 2. Configure Language Code
# faster-whisper expects language codes like "hi", "pa", "en"
lang_map = {
"Auto Detect": None,
"Hindi": "hi",
"Punjabi": "pa",
"Urdu": "ur",
"Bengali": "bn",
"Tamil": "ta",
"Telugu": "te",
"Marathi": "mr",
"Gujarati": "gu",
"English": "en"
}
selected_lang = lang_map.get(language_setting)
# 3. RUN FASTER-WHISPER
# It returns segments (the text) and info (the detected language)
segments, info = model.transcribe(
audio_file,
task=task_type,
language=selected_lang,
beam_size=5
)
# 4. Convert to SRT Format
srt_content = ""
for i, segment in enumerate(segments):
start = format_timestamp(segment.start)
end = format_timestamp(segment.end)
text = segment.text.strip()
srt_content += f"{i+1}\n{start} --> {end}\n{text}\n\n"
return srt_content
# Helper to format seconds -> 00:00:00,000
def format_timestamp(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = seconds % 60
milliseconds = int((seconds - int(seconds)) * 1000)
return f"{hours:02}:{minutes:02}:{int(seconds):02},{milliseconds:03}"
# 5. The User Interface
with gr.Blocks() as demo:
with gr.Row():
file_input = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio/Video")
with gr.Column():
task_input = gr.Dropdown(
choices=["transcribe", "translate"],
value="transcribe",
label="Action"
)
lang_input = gr.Dropdown(
choices=["Auto Detect", "Hindi", "Punjabi", "Urdu", "Bengali", "Tamil", "English"],
value="Auto Detect",
label="Language"
)
output_text = gr.Textbox(label="Result (.SRT)", lines=15)
submit_btn = gr.Button("Generate Subtitles (Fast)", variant="primary")
submit_btn.click(
fn=generate_subtitles,
inputs=[file_input, task_input, lang_input],
outputs=output_text,
api_name="predict"
)
demo.launch()