typhoon-asr-api / app.py
poompengcharoen's picture
Refactor audio transcription logic and improve user interface. Added clear transcription functionality and updated requirements for dependencies.
fe95a59
import gradio as gr
from typhoon_asr import transcribe
import os
# Global variable to store transcription results
last_transcription = None
def transcribe_audio(audio_path):
"""Transcribe the audio using typhoon_asr"""
global last_transcription
if not audio_path:
return "❌ No audio to transcribe. Please upload or record audio first.", ""
try:
# Show loading message
status_msg = "πŸ”„ Transcribing audio..."
# Perform transcription (basic only)
print(f"DEBUG: Starting transcription of {audio_path}")
result = transcribe(audio_path)
print(f"DEBUG: Transcription result: {result}")
last_transcription = result
# Handle different result formats
if isinstance(result, dict) and 'text' in result:
if hasattr(result['text'], 'text'):
transcription_text = result['text'].text
else:
transcription_text = result['text']
else:
transcription_text = str(result)
status_msg = "βœ… Transcription completed!"
return status_msg, transcription_text
except Exception as e:
error_msg = f"❌ Transcription failed: {str(e)}"
print(f"DEBUG: Error occurred: {error_msg}")
return error_msg, ""
def clear_transcription():
"""Clear the transcription"""
global last_transcription
last_transcription = None
return "πŸ—‘οΈ Transcription cleared", ""
def audio_uploaded(audio_path):
"""Called when audio is uploaded - update status and enable button"""
if audio_path:
return f"βœ… Audio uploaded! Ready to transcribe.", gr.Button(interactive=True)
else:
return "❌ No audio uploaded", gr.Button(interactive=False)
# Create the Gradio interface
with gr.Blocks(title="Typhoon ASR API") as demo:
gr.Markdown("# 🎀 Typhoon ASR Real-Time Transcription")
gr.Markdown("Upload an audio file or record to get Thai speech transcription")
# Audio input component
audio_input = gr.Audio(
sources=["upload", "microphone"],
type="filepath",
label="Upload Audio File or Record"
)
# Status display
status_text = gr.Textbox(
label="Status",
value="Upload or record audio to get started",
interactive=False
)
# Transcription buttons
gr.Markdown("### Transcription")
with gr.Row():
transcribe_btn = gr.Button("🎯 Transcribe", variant="primary", interactive=False)
clear_btn = gr.Button("πŸ—‘οΈ Clear Result", variant="secondary")
# Transcription result
transcription_output = gr.Textbox(
label="Transcription Result",
lines=10,
placeholder="Transcription will appear here after uploading/recording and clicking transcribe...",
interactive=False
)
# Event handlers
# When audio changes (uploaded/recorded), update status and enable button
audio_input.change(
fn=audio_uploaded,
inputs=[audio_input],
outputs=[status_text, transcribe_btn]
)
# Transcription button click
transcribe_btn.click(
fn=transcribe_audio,
inputs=[audio_input],
outputs=[status_text, transcription_output]
)
clear_btn.click(
fn=clear_transcription,
outputs=[status_text, transcription_output]
)
# For API access - this function can be called externally
def api_transcribe(audio_file_path):
"""API endpoint for external calls"""
return transcribe_audio(audio_file_path)
if __name__ == "__main__":
demo.launch()