Multilingual-ASR / gradio_ui.py
adiitya29's picture
fix: loaded the larger model for better results and added the downloading functionality for transcribed text
9622244
import gradio as gr
from app.asr_model import load_model, transcribe_audio
from app.language_detection import detect_language_from_text
from app.history import save_to_history, export_history, get_history, save_transcript_as_txt
import pandas as pd
def process_audio(audio_path):
if audio_path is None:
return "No audio uploaded.", "Unknown", None
print(f"\n--- New Request ---")
print(f"Processing audio: {audio_path}")
print("Transcribing...")
transcript = transcribe_audio(audio_path)
print(f"Transcription complete: {transcript[:80]}")
print("Detecting language...")
lang = detect_language_from_text(transcript)
print("Saving to history...")
save_to_history(audio_path, transcript, lang)
# Create a downloadable .txt file for the transcript
txt_path = save_transcript_as_txt(transcript)
print("Done!\n")
return transcript, lang, gr.update(value=txt_path, visible=True)
def export_history_wrapper():
path = export_history("csv")
if path:
return gr.update(value=path, visible=True)
return gr.update(visible=False)
def load_history_table():
"""Load history and return as a list of lists for gr.Dataframe."""
history = get_history()
if not history:
return []
rows = []
for entry in history:
rows.append([
entry.get("timestamp", "")[:19].replace("T", " "),
entry.get("audio_file", ""),
entry.get("language", ""),
entry.get("transcript", "")
])
return pd.DataFrame(rows, columns=["Timestamp", "Audio File", "Language", "Transcript"])
def create_ui():
with gr.Blocks(title="Multilingual ASR") as demo:
gr.Markdown("# πŸŽ™οΈ Multilingual Automatic Speech Recognition")
with gr.Tabs():
with gr.TabItem("πŸ“ Transcribe"):
gr.Markdown("Upload an audio file to get a text transcription using Wav2Vec.")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(type="filepath", label="Upload Audio")
transcribe_btn = gr.Button("Transcribe", variant="primary")
with gr.Column():
lang_output = gr.Textbox(label="Detected Language")
transcript_output = gr.Textbox(label="Transcription", lines=10)
download_txt = gr.File(label="⬇️ Download Transcript (.txt)", visible=False)
transcribe_btn.click(
fn=process_audio,
inputs=audio_input,
outputs=[transcript_output, lang_output, download_txt]
)
with gr.TabItem("πŸ“‹ History"):
gr.Markdown("Your past transcriptions (newest first).")
with gr.Row():
refresh_btn = gr.Button("πŸ”„ Refresh History")
export_btn = gr.Button("⬇️ Export as CSV")
history_table = gr.Dataframe(
headers=["Timestamp", "Audio File", "Language", "Transcript"],
datatype=["str", "str", "str", "str"],
value=load_history_table,
wrap=True,
label="Transcription History"
)
csv_file_output = gr.File(label="Download CSV", visible=False)
refresh_btn.click(fn=load_history_table, outputs=history_table)
export_btn.click(fn=export_history_wrapper, outputs=csv_file_output)
return demo
if __name__ == "__main__":
demo = create_ui()
demo.launch()