poompengcharoen commited on
Commit
fe95a59
Β·
1 Parent(s): a618605

Refactor audio transcription logic and improve user interface. Added clear transcription functionality and updated requirements for dependencies.

Browse files
Files changed (2) hide show
  1. app.py +78 -60
  2. requirements.txt +13 -2
app.py CHANGED
@@ -1,91 +1,109 @@
1
  import gradio as gr
2
  from typhoon_asr import transcribe
3
- import tempfile
4
  import os
5
 
6
- def transcribe_audio(audio_file):
7
- """Transcribe audio file using Typhoon ASR"""
8
- print(f"DEBUG: Audio file received: {audio_file}")
9
-
10
- if audio_file is None:
11
- return "❌ Please upload an audio file"
12
 
13
- if not os.path.exists(audio_file):
14
- return f"❌ File not found: {audio_file}"
15
 
16
  try:
17
- print(f"DEBUG: Starting transcription of {audio_file}")
18
-
19
- # Check file size
20
- file_size = os.path.getsize(audio_file)
21
- print(f"DEBUG: File size: {file_size} bytes")
22
 
23
- # Transcribe using Typhoon ASR
24
- print("DEBUG: Calling transcribe function...")
25
- result = transcribe(audio_file, with_timestamps=True)
26
  print(f"DEBUG: Transcription result: {result}")
27
 
28
- # Format the result
29
- text = result['text']
30
- timestamps = result.get('timestamps', [])
31
 
32
- # Create formatted output
33
- output = f"**βœ… Transcription Complete:**\n{text}\n\n"
 
 
 
 
 
 
34
 
35
- if timestamps:
36
- output += "**Word-level Timestamps:**\n"
37
- for ts in timestamps:
38
- output += f"[{ts['start']:.2f}s - {ts['end']:.2f}s] {ts['word']}\n"
39
-
40
- print("DEBUG: Transcription successful")
41
- return output
42
 
43
  except Exception as e:
44
- error_msg = f"❌ Error: {str(e)}"
45
  print(f"DEBUG: Error occurred: {error_msg}")
46
- return error_msg
 
 
 
 
 
 
47
 
48
- # Create Gradio interface
 
 
 
 
 
 
 
49
  with gr.Blocks(title="Typhoon ASR API") as demo:
50
  gr.Markdown("# 🎀 Typhoon ASR Real-Time Transcription")
51
- gr.Markdown("Upload an audio file to get Thai speech transcription with word-level timestamps")
 
 
 
 
 
 
 
52
 
 
 
 
 
 
 
 
 
 
53
  with gr.Row():
54
- with gr.Column():
55
- audio_input = gr.Audio(
56
- label="Upload Audio File",
57
- type="filepath",
58
- sources=["upload", "microphone"]
59
- )
60
- transcribe_btn = gr.Button("🎯 Transcribe", variant="primary", size="lg")
61
-
62
- with gr.Column():
63
- output = gr.Markdown(label="Transcription Result")
64
 
65
- # Add a test button
66
- test_btn = gr.Button("πŸ§ͺ Test Connection", variant="secondary")
 
 
 
 
 
67
 
68
- def test_connection():
69
- return "βœ… Connection test successful! The app is working."
 
 
 
 
 
70
 
71
- # Connect the buttons
72
  transcribe_btn.click(
73
  fn=transcribe_audio,
74
  inputs=[audio_input],
75
- outputs=[output]
76
- )
77
-
78
- test_btn.click(
79
- fn=test_connection,
80
- inputs=[],
81
- outputs=[output]
82
  )
83
 
84
- # Add examples
85
- gr.Examples(
86
- examples=[],
87
- inputs=[audio_input],
88
- label="Example audio files (upload your own)"
89
  )
90
 
91
  # For API access - this function can be called externally
 
1
  import gradio as gr
2
  from typhoon_asr import transcribe
 
3
  import os
4
 
5
+ # Global variable to store transcription results
6
+ last_transcription = None
7
+
8
+ def transcribe_audio(audio_path):
9
+ """Transcribe the audio using typhoon_asr"""
10
+ global last_transcription
11
 
12
+ if not audio_path:
13
+ return "❌ No audio to transcribe. Please upload or record audio first.", ""
14
 
15
  try:
16
+ # Show loading message
17
+ status_msg = "πŸ”„ Transcribing audio..."
 
 
 
18
 
19
+ # Perform transcription (basic only)
20
+ print(f"DEBUG: Starting transcription of {audio_path}")
21
+ result = transcribe(audio_path)
22
  print(f"DEBUG: Transcription result: {result}")
23
 
24
+ last_transcription = result
 
 
25
 
26
+ # Handle different result formats
27
+ if isinstance(result, dict) and 'text' in result:
28
+ if hasattr(result['text'], 'text'):
29
+ transcription_text = result['text'].text
30
+ else:
31
+ transcription_text = result['text']
32
+ else:
33
+ transcription_text = str(result)
34
 
35
+ status_msg = "βœ… Transcription completed!"
36
+ return status_msg, transcription_text
 
 
 
 
 
37
 
38
  except Exception as e:
39
+ error_msg = f"❌ Transcription failed: {str(e)}"
40
  print(f"DEBUG: Error occurred: {error_msg}")
41
+ return error_msg, ""
42
+
43
+ def clear_transcription():
44
+ """Clear the transcription"""
45
+ global last_transcription
46
+ last_transcription = None
47
+ return "πŸ—‘οΈ Transcription cleared", ""
48
 
49
+ def audio_uploaded(audio_path):
50
+ """Called when audio is uploaded - update status and enable button"""
51
+ if audio_path:
52
+ return f"βœ… Audio uploaded! Ready to transcribe.", gr.Button(interactive=True)
53
+ else:
54
+ return "❌ No audio uploaded", gr.Button(interactive=False)
55
+
56
+ # Create the Gradio interface
57
  with gr.Blocks(title="Typhoon ASR API") as demo:
58
  gr.Markdown("# 🎀 Typhoon ASR Real-Time Transcription")
59
+ gr.Markdown("Upload an audio file or record to get Thai speech transcription")
60
+
61
+ # Audio input component
62
+ audio_input = gr.Audio(
63
+ sources=["upload", "microphone"],
64
+ type="filepath",
65
+ label="Upload Audio File or Record"
66
+ )
67
 
68
+ # Status display
69
+ status_text = gr.Textbox(
70
+ label="Status",
71
+ value="Upload or record audio to get started",
72
+ interactive=False
73
+ )
74
+
75
+ # Transcription buttons
76
+ gr.Markdown("### Transcription")
77
  with gr.Row():
78
+ transcribe_btn = gr.Button("🎯 Transcribe", variant="primary", interactive=False)
79
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Result", variant="secondary")
 
 
 
 
 
 
 
 
80
 
81
+ # Transcription result
82
+ transcription_output = gr.Textbox(
83
+ label="Transcription Result",
84
+ lines=10,
85
+ placeholder="Transcription will appear here after uploading/recording and clicking transcribe...",
86
+ interactive=False
87
+ )
88
 
89
+ # Event handlers
90
+ # When audio changes (uploaded/recorded), update status and enable button
91
+ audio_input.change(
92
+ fn=audio_uploaded,
93
+ inputs=[audio_input],
94
+ outputs=[status_text, transcribe_btn]
95
+ )
96
 
97
+ # Transcription button click
98
  transcribe_btn.click(
99
  fn=transcribe_audio,
100
  inputs=[audio_input],
101
+ outputs=[status_text, transcription_output]
 
 
 
 
 
 
102
  )
103
 
104
+ clear_btn.click(
105
+ fn=clear_transcription,
106
+ outputs=[status_text, transcription_output]
 
 
107
  )
108
 
109
  # For API access - this function can be called externally
requirements.txt CHANGED
@@ -1,2 +1,13 @@
1
- typhoon-asr
2
- gradio>=4.0.0
 
 
 
 
 
 
 
 
 
 
 
 
1
+ typhoon-asr==0.1.0
2
+ gradio==5.44.1
3
+ torch==2.8.0
4
+ nemo-toolkit==2.4.0
5
+ librosa==0.11.0
6
+ soundfile==0.13.1
7
+ numpy==1.26.4
8
+ scipy==1.15.3
9
+ transformers==4.51.3
10
+ huggingface-hub==0.34.4
11
+ fastapi==0.116.1
12
+ uvicorn==0.35.0
13
+ python-multipart==0.0.20