poompengcharoen commited on
Commit
0002e81
Β·
1 Parent(s): f04f941

Initial commit

Browse files
Files changed (3) hide show
  1. README.md +42 -5
  2. app.py +69 -0
  3. requirements.txt +2 -0
README.md CHANGED
@@ -1,12 +1,49 @@
1
  ---
2
- title: Typhoon Asr Api
3
- emoji: πŸ‘
4
- colorFrom: indigo
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.49.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Typhoon ASR API
3
+ emoji: 🎀
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
  sdk_version: 5.49.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ # Typhoon ASR Real-Time API
13
+
14
+ This Space provides a free API for Thai speech recognition using the Typhoon ASR Real-Time model.
15
+
16
+ ## Features
17
+
18
+ - 🎯 **Real-time Thai transcription**
19
+ - ⏱️ **Word-level timestamps**
20
+ - 🎀 **Microphone input support**
21
+ - πŸ“ **File upload support**
22
+ - πŸ”„ **API endpoint for external calls**
23
+
24
+ ## Usage
25
+
26
+ 1. **Upload an audio file** or **record directly**
27
+ 2. **Click "Transcribe"** to get Thai transcription
28
+ 3. **View results** with word-level timestamps
29
+
30
+ ## API Endpoint
31
+
32
+ This Space provides an API endpoint that can be called from external applications:
33
+
34
+ ```
35
+ POST https://YOUR_USERNAME-typhoon-asr-api.hf.space/api/predict
36
+ ```
37
+
38
+ ## Supported Audio Formats
39
+
40
+ - WAV, MP3, FLAC, OGG, OPUS
41
+ - Any audio format supported by the Typhoon ASR model
42
+
43
+ ## Model Information
44
+
45
+ - **Model:** Typhoon ASR Real-Time
46
+ - **Language:** Thai
47
+ - **Architecture:** FastConformer-Transducer
48
+ - **Performance:** 4097x real-time processing speed
49
+ - **Accuracy:** CER 0.0984
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from typhoon_asr import transcribe
3
+ import tempfile
4
+ import os
5
+
6
+ def transcribe_audio(audio_file):
7
+ """Transcribe audio file using Typhoon ASR"""
8
+ if audio_file is None:
9
+ return "Please upload an audio file"
10
+
11
+ try:
12
+ # Transcribe using Typhoon ASR
13
+ result = transcribe(audio_file, with_timestamps=True)
14
+
15
+ # Format the result
16
+ text = result['text']
17
+ timestamps = result.get('timestamps', [])
18
+
19
+ # Create formatted output
20
+ output = f"**Transcription:**\n{text}\n\n"
21
+
22
+ if timestamps:
23
+ output += "**Word-level Timestamps:**\n"
24
+ for ts in timestamps:
25
+ output += f"[{ts['start']:.2f}s - {ts['end']:.2f}s] {ts['word']}\n"
26
+
27
+ return output
28
+
29
+ except Exception as e:
30
+ return f"Error: {str(e)}"
31
+
32
+ # Create Gradio interface
33
+ with gr.Blocks(title="Typhoon ASR API") as demo:
34
+ gr.Markdown("# 🎀 Typhoon ASR Real-Time Transcription")
35
+ gr.Markdown("Upload an audio file to get Thai speech transcription with word-level timestamps")
36
+
37
+ with gr.Row():
38
+ with gr.Column():
39
+ audio_input = gr.Audio(
40
+ label="Upload Audio File",
41
+ type="filepath",
42
+ sources=["upload", "microphone"]
43
+ )
44
+ transcribe_btn = gr.Button("🎯 Transcribe", variant="primary", size="lg")
45
+
46
+ with gr.Column():
47
+ output = gr.Markdown(label="Transcription Result")
48
+
49
+ # Connect the button to the function
50
+ transcribe_btn.click(
51
+ fn=transcribe_audio,
52
+ inputs=[audio_input],
53
+ outputs=[output]
54
+ )
55
+
56
+ # Add examples
57
+ gr.Examples(
58
+ examples=[],
59
+ inputs=[audio_input],
60
+ label="Example audio files (upload your own)"
61
+ )
62
+
63
+ # For API access - this function can be called externally
64
+ def api_transcribe(audio_file_path):
65
+ """API endpoint for external calls"""
66
+ return transcribe_audio(audio_file_path)
67
+
68
+ if __name__ == "__main__":
69
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ typhoon-asr
2
+ gradio>=4.0.0