Spaces:
Runtime error
Runtime error
Commit Β·
3ef0477
1
Parent(s): c00bf70
update
Browse files- README.md +43 -0
- api_example.py +58 -0
- api_examples.md +243 -0
- app.py +67 -2
- requirements.txt +3 -0
- test_api.py +119 -0
README.md
CHANGED
|
@@ -31,11 +31,28 @@ This Hugging Face Space provides automatic speech recognition (ASR) for Uzbek la
|
|
| 31 |
|
| 32 |
## π Usage
|
| 33 |
|
|
|
|
|
|
|
| 34 |
1. **Record Audio**: Click the microphone icon to record directly in your browser
|
| 35 |
2. **Upload Audio**: Or upload an existing audio file
|
| 36 |
3. **Transcribe**: Click the "Transcribe" button to convert speech to text
|
| 37 |
4. **View Results**: The transcribed text will appear in the output box
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
## π§ Local Development
|
| 40 |
|
| 41 |
To run this application locally:
|
|
@@ -63,6 +80,13 @@ The application will be available at `http://localhost:7860`
|
|
| 63 |
- torchaudio>=2.0.0
|
| 64 |
- accelerate>=0.20.0
|
| 65 |
- huggingface_hub>=0.16.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
## π Logging
|
| 68 |
|
|
@@ -85,8 +109,27 @@ Contributions are welcome! Feel free to:
|
|
| 85 |
|
| 86 |
This project is licensed under the Apache 2.0 License.
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
## π Resources
|
| 89 |
|
|
|
|
| 90 |
- [Hugging Face Spaces Documentation](https://huggingface.co/docs/hub/spaces-config-reference)
|
|
|
|
| 91 |
- [Gradio Documentation](https://gradio.app/docs)
|
| 92 |
- [Whisper Model Card](https://huggingface.co/jmshd/whisper-uz)
|
|
|
|
| 31 |
|
| 32 |
## π Usage
|
| 33 |
|
| 34 |
+
### Web Interface
|
| 35 |
+
|
| 36 |
1. **Record Audio**: Click the microphone icon to record directly in your browser
|
| 37 |
2. **Upload Audio**: Or upload an existing audio file
|
| 38 |
3. **Transcribe**: Click the "Transcribe" button to convert speech to text
|
| 39 |
4. **View Results**: The transcribed text will appear in the output box
|
| 40 |
|
| 41 |
+
### API Access
|
| 42 |
+
|
| 43 |
+
This Space provides a REST API for programmatic access. You can submit audio files and receive transcriptions programmatically.
|
| 44 |
+
|
| 45 |
+
**Quick Example:**
|
| 46 |
+
```python
|
| 47 |
+
from gradio_client import Client
|
| 48 |
+
|
| 49 |
+
client = Client("YOUR_USERNAME/whisper-uzbek-stt")
|
| 50 |
+
result = client.predict("path/to/audio.mp3", api_name="/predict")
|
| 51 |
+
print(result)
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
For detailed API documentation and examples, see [api_examples.md](api_examples.md)
|
| 55 |
+
|
| 56 |
## π§ Local Development
|
| 57 |
|
| 58 |
To run this application locally:
|
|
|
|
| 80 |
- torchaudio>=2.0.0
|
| 81 |
- accelerate>=0.20.0
|
| 82 |
- huggingface_hub>=0.16.0
|
| 83 |
+
- scipy>=1.10.0
|
| 84 |
+
- numpy>=1.24.0
|
| 85 |
+
|
| 86 |
+
### For API Client Usage
|
| 87 |
+
```bash
|
| 88 |
+
pip install gradio-client
|
| 89 |
+
```
|
| 90 |
|
| 91 |
## π Logging
|
| 92 |
|
|
|
|
| 109 |
|
| 110 |
This project is licensed under the Apache 2.0 License.
|
| 111 |
|
| 112 |
+
## π API Features
|
| 113 |
+
|
| 114 |
+
- **REST API**: Full Gradio API support
|
| 115 |
+
- **Multiple Formats**: MP3, WAV, M4A, FLAC, etc.
|
| 116 |
+
- **Auto-Resampling**: Handles any sample rate (auto-converts to 16kHz)
|
| 117 |
+
- **Stereo to Mono**: Automatic conversion
|
| 118 |
+
- **Error Handling**: Comprehensive error messages
|
| 119 |
+
- **Progress Tracking**: Real-time processing updates
|
| 120 |
+
|
| 121 |
+
## π Project Files
|
| 122 |
+
|
| 123 |
+
- `app.py` - Main application with Gradio interface and API
|
| 124 |
+
- `requirements.txt` - Python dependencies
|
| 125 |
+
- `api_example.py` - Python client example
|
| 126 |
+
- `api_examples.md` - Comprehensive API documentation
|
| 127 |
+
- `.gitignore` - Git ignore rules
|
| 128 |
+
|
| 129 |
## π Resources
|
| 130 |
|
| 131 |
+
- [API Examples Documentation](api_examples.md)
|
| 132 |
- [Hugging Face Spaces Documentation](https://huggingface.co/docs/hub/spaces-config-reference)
|
| 133 |
+
- [Gradio API Documentation](https://gradio.app/docs/client)
|
| 134 |
- [Gradio Documentation](https://gradio.app/docs)
|
| 135 |
- [Whisper Model Card](https://huggingface.co/jmshd/whisper-uz)
|
api_example.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Example Python client for using the Whisper Uzbek STT API
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from gradio_client import Client
|
| 6 |
+
import sys
|
| 7 |
+
|
| 8 |
+
def transcribe_audio(audio_file_path, space_url):
|
| 9 |
+
"""
|
| 10 |
+
Transcribe audio file using the Whisper API
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
audio_file_path: Path to the audio file
|
| 14 |
+
space_url: URL of the Hugging Face Space (e.g., "username/space-name")
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
str: Transcribed text
|
| 18 |
+
"""
|
| 19 |
+
try:
|
| 20 |
+
print(f"Connecting to {space_url}...")
|
| 21 |
+
client = Client(space_url)
|
| 22 |
+
|
| 23 |
+
print(f"Uploading and transcribing {audio_file_path}...")
|
| 24 |
+
result = client.predict(
|
| 25 |
+
audio_file_path,
|
| 26 |
+
api_name="/predict"
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
return result
|
| 30 |
+
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f"Error: {str(e)}")
|
| 33 |
+
return None
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def main():
|
| 37 |
+
# Example usage
|
| 38 |
+
SPACE_URL = "YOUR_USERNAME/whisper-uzbek-stt" # Replace with your Space URL
|
| 39 |
+
|
| 40 |
+
if len(sys.argv) < 2:
|
| 41 |
+
print("Usage: python api_example.py <audio_file_path>")
|
| 42 |
+
print("Example: python api_example.py sample.mp3")
|
| 43 |
+
sys.exit(1)
|
| 44 |
+
|
| 45 |
+
audio_file = sys.argv[1]
|
| 46 |
+
|
| 47 |
+
result = transcribe_audio(audio_file, SPACE_URL)
|
| 48 |
+
|
| 49 |
+
if result:
|
| 50 |
+
print("\n" + "="*50)
|
| 51 |
+
print("TRANSCRIPTION:")
|
| 52 |
+
print("="*50)
|
| 53 |
+
print(result)
|
| 54 |
+
print("="*50)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
if __name__ == "__main__":
|
| 58 |
+
main()
|
api_examples.md
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# API Usage Examples
|
| 2 |
+
|
| 3 |
+
This document provides examples of how to use the Whisper Uzbek STT API programmatically.
|
| 4 |
+
|
| 5 |
+
## Prerequisites
|
| 6 |
+
|
| 7 |
+
Install the Gradio client:
|
| 8 |
+
```bash
|
| 9 |
+
pip install gradio-client
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
## Python Examples
|
| 15 |
+
|
| 16 |
+
### Basic Usage
|
| 17 |
+
|
| 18 |
+
```python
|
| 19 |
+
from gradio_client import Client
|
| 20 |
+
|
| 21 |
+
# Connect to your Space
|
| 22 |
+
client = Client("YOUR_USERNAME/whisper-uzbek-stt")
|
| 23 |
+
|
| 24 |
+
# Transcribe an audio file
|
| 25 |
+
result = client.predict(
|
| 26 |
+
"path/to/audio.mp3",
|
| 27 |
+
api_name="/predict"
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
print(result)
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
### Advanced Usage with Error Handling
|
| 34 |
+
|
| 35 |
+
```python
|
| 36 |
+
from gradio_client import Client
|
| 37 |
+
import os
|
| 38 |
+
|
| 39 |
+
def transcribe_audio(audio_path, space_url):
|
| 40 |
+
"""Transcribe audio with error handling"""
|
| 41 |
+
|
| 42 |
+
if not os.path.exists(audio_path):
|
| 43 |
+
raise FileNotFoundError(f"Audio file not found: {audio_path}")
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
client = Client(space_url)
|
| 47 |
+
result = client.predict(audio_path, api_name="/predict")
|
| 48 |
+
return result
|
| 49 |
+
except Exception as e:
|
| 50 |
+
print(f"Transcription error: {e}")
|
| 51 |
+
return None
|
| 52 |
+
|
| 53 |
+
# Usage
|
| 54 |
+
space_url = "YOUR_USERNAME/whisper-uzbek-stt"
|
| 55 |
+
transcription = transcribe_audio("uzbek_speech.wav", space_url)
|
| 56 |
+
|
| 57 |
+
if transcription:
|
| 58 |
+
print(f"Transcription: {transcription}")
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
### Batch Processing
|
| 62 |
+
|
| 63 |
+
```python
|
| 64 |
+
from gradio_client import Client
|
| 65 |
+
import os
|
| 66 |
+
from pathlib import Path
|
| 67 |
+
|
| 68 |
+
def batch_transcribe(audio_files, space_url):
|
| 69 |
+
"""Transcribe multiple audio files"""
|
| 70 |
+
|
| 71 |
+
client = Client(space_url)
|
| 72 |
+
results = {}
|
| 73 |
+
|
| 74 |
+
for audio_file in audio_files:
|
| 75 |
+
try:
|
| 76 |
+
print(f"Processing: {audio_file}")
|
| 77 |
+
result = client.predict(audio_file, api_name="/predict")
|
| 78 |
+
results[audio_file] = result
|
| 79 |
+
print(f"β Done: {audio_file}")
|
| 80 |
+
except Exception as e:
|
| 81 |
+
print(f"β Failed: {audio_file} - {e}")
|
| 82 |
+
results[audio_file] = None
|
| 83 |
+
|
| 84 |
+
return results
|
| 85 |
+
|
| 86 |
+
# Usage
|
| 87 |
+
audio_files = [
|
| 88 |
+
"audio1.mp3",
|
| 89 |
+
"audio2.wav",
|
| 90 |
+
"audio3.m4a"
|
| 91 |
+
]
|
| 92 |
+
|
| 93 |
+
space_url = "YOUR_USERNAME/whisper-uzbek-stt"
|
| 94 |
+
results = batch_transcribe(audio_files, space_url)
|
| 95 |
+
|
| 96 |
+
# Print results
|
| 97 |
+
for file, transcription in results.items():
|
| 98 |
+
print(f"\n{file}:")
|
| 99 |
+
print(f" {transcription}")
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
---
|
| 103 |
+
|
| 104 |
+
## JavaScript/Node.js Example
|
| 105 |
+
|
| 106 |
+
```javascript
|
| 107 |
+
const fs = require('fs');
|
| 108 |
+
const axios = require('axios');
|
| 109 |
+
const FormData = require('form-data');
|
| 110 |
+
|
| 111 |
+
async function transcribeAudio(audioPath, spaceUrl) {
|
| 112 |
+
const form = new FormData();
|
| 113 |
+
form.append('data', JSON.stringify([audioPath]));
|
| 114 |
+
|
| 115 |
+
try {
|
| 116 |
+
const response = await axios.post(
|
| 117 |
+
`${spaceUrl}/api/predict`,
|
| 118 |
+
form,
|
| 119 |
+
{
|
| 120 |
+
headers: form.getHeaders()
|
| 121 |
+
}
|
| 122 |
+
);
|
| 123 |
+
|
| 124 |
+
return response.data.data[0];
|
| 125 |
+
} catch (error) {
|
| 126 |
+
console.error('Error:', error.message);
|
| 127 |
+
return null;
|
| 128 |
+
}
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
// Usage
|
| 132 |
+
const spaceUrl = 'https://huggingface.co/spaces/YOUR_USERNAME/whisper-uzbek-stt';
|
| 133 |
+
const audioPath = './audio.mp3';
|
| 134 |
+
|
| 135 |
+
transcribeAudio(audioPath, spaceUrl)
|
| 136 |
+
.then(result => console.log('Transcription:', result));
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
---
|
| 140 |
+
|
| 141 |
+
## cURL Example
|
| 142 |
+
|
| 143 |
+
### Upload and Transcribe
|
| 144 |
+
|
| 145 |
+
```bash
|
| 146 |
+
curl -X POST "https://YOUR_USERNAME-whisper-uzbek-stt.hf.space/api/predict" \
|
| 147 |
+
-H "Content-Type: application/json" \
|
| 148 |
+
-d '{
|
| 149 |
+
"data": ["path/to/audio.mp3"]
|
| 150 |
+
}'
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
+
### Using a File Upload
|
| 154 |
+
|
| 155 |
+
```bash
|
| 156 |
+
# Save audio file first
|
| 157 |
+
audio_file="sample.mp3"
|
| 158 |
+
|
| 159 |
+
# Make API request
|
| 160 |
+
curl -X POST "https://YOUR_USERNAME-whisper-uzbek-stt.hf.space/api/predict" \
|
| 161 |
+
-F "data=@${audio_file}"
|
| 162 |
+
```
|
| 163 |
+
|
| 164 |
+
---
|
| 165 |
+
|
| 166 |
+
## Response Format
|
| 167 |
+
|
| 168 |
+
The API returns JSON with the following structure:
|
| 169 |
+
|
| 170 |
+
```json
|
| 171 |
+
{
|
| 172 |
+
"data": ["Transcribed text in Uzbek"],
|
| 173 |
+
"duration": 2.5,
|
| 174 |
+
"is_generating": false
|
| 175 |
+
}
|
| 176 |
+
```
|
| 177 |
+
|
| 178 |
+
---
|
| 179 |
+
|
| 180 |
+
## Error Handling
|
| 181 |
+
|
| 182 |
+
Possible error responses:
|
| 183 |
+
|
| 184 |
+
### No Audio Provided
|
| 185 |
+
```json
|
| 186 |
+
{
|
| 187 |
+
"data": ["β οΈ No audio provided. Please upload or record audio."]
|
| 188 |
+
}
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
### Processing Error
|
| 192 |
+
```json
|
| 193 |
+
{
|
| 194 |
+
"data": ["β Error during transcription: <error message>"]
|
| 195 |
+
}
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
---
|
| 199 |
+
|
| 200 |
+
## Rate Limiting
|
| 201 |
+
|
| 202 |
+
Hugging Face Spaces may have rate limits. For production use:
|
| 203 |
+
- Implement retry logic with exponential backoff
|
| 204 |
+
- Consider caching results
|
| 205 |
+
- Monitor your Space's usage metrics
|
| 206 |
+
|
| 207 |
+
---
|
| 208 |
+
|
| 209 |
+
## Best Practices
|
| 210 |
+
|
| 211 |
+
1. **File Formats**: Supported formats include MP3, WAV, M4A, FLAC
|
| 212 |
+
2. **File Size**: Keep files under 25MB for best performance
|
| 213 |
+
3. **Sample Rate**: Any sample rate works (automatically resampled to 16kHz)
|
| 214 |
+
4. **Audio Quality**: Higher quality audio = better transcription
|
| 215 |
+
5. **Language**: Optimized for Uzbek language
|
| 216 |
+
|
| 217 |
+
---
|
| 218 |
+
|
| 219 |
+
## Troubleshooting
|
| 220 |
+
|
| 221 |
+
### Connection Issues
|
| 222 |
+
```python
|
| 223 |
+
# Add timeout
|
| 224 |
+
from gradio_client import Client
|
| 225 |
+
|
| 226 |
+
client = Client("YOUR_SPACE_URL", timeout=60)
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
### Large Files
|
| 230 |
+
```python
|
| 231 |
+
# Use file upload instead of path
|
| 232 |
+
with open("large_audio.mp3", "rb") as f:
|
| 233 |
+
result = client.predict(f, api_name="/predict")
|
| 234 |
+
```
|
| 235 |
+
|
| 236 |
+
---
|
| 237 |
+
|
| 238 |
+
## Support
|
| 239 |
+
|
| 240 |
+
For issues or questions:
|
| 241 |
+
- Check the Space logs on Hugging Face
|
| 242 |
+
- Review the error messages in the response
|
| 243 |
+
- Ensure your audio file is valid and accessible
|
app.py
CHANGED
|
@@ -5,6 +5,8 @@ import logging
|
|
| 5 |
import os
|
| 6 |
from datetime import datetime
|
| 7 |
from huggingface_hub import HfApi
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Setup logging
|
| 10 |
logging.basicConfig(
|
|
@@ -32,6 +34,40 @@ except Exception as e:
|
|
| 32 |
logger.error(f"Error loading model: {str(e)}")
|
| 33 |
raise
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
def transcribe(audio, progress=gr.Progress()):
|
| 36 |
"""
|
| 37 |
Transcribe audio to text using Whisper model
|
|
@@ -51,7 +87,20 @@ def transcribe(audio, progress=gr.Progress()):
|
|
| 51 |
progress(0.1, desc="Processing audio...")
|
| 52 |
sample_rate, audio_data = audio
|
| 53 |
|
| 54 |
-
logger.info(f"Processing audio - Sample rate: {sample_rate}, Shape: {audio_data.shape}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
progress(0.3, desc="Preparing input features...")
|
| 57 |
inputs = processor(
|
|
@@ -113,10 +162,23 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
| 113 |
2. Click the "Transcribe" button to convert speech to text
|
| 114 |
3. The transcribed text will appear in the output box
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
### βΉοΈ Information:
|
| 117 |
- Supported language: Uzbek
|
| 118 |
- Processing: CPU-only (may be slower than GPU)
|
| 119 |
- Model size: Small
|
|
|
|
| 120 |
"""
|
| 121 |
)
|
| 122 |
|
|
@@ -129,9 +191,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
| 129 |
# Launch configuration for Hugging Face Spaces
|
| 130 |
if __name__ == "__main__":
|
| 131 |
logger.info("Launching Gradio interface...")
|
|
|
|
|
|
|
| 132 |
iface.launch(
|
| 133 |
share=False,
|
| 134 |
show_error=True,
|
| 135 |
server_name="0.0.0.0",
|
| 136 |
-
server_port=7860
|
|
|
|
| 137 |
)
|
|
|
|
| 5 |
import os
|
| 6 |
from datetime import datetime
|
| 7 |
from huggingface_hub import HfApi
|
| 8 |
+
import numpy as np
|
| 9 |
+
from scipy import signal
|
| 10 |
|
| 11 |
# Setup logging
|
| 12 |
logging.basicConfig(
|
|
|
|
| 34 |
logger.error(f"Error loading model: {str(e)}")
|
| 35 |
raise
|
| 36 |
|
| 37 |
+
def resample_audio(audio_data, orig_sr, target_sr=16000):
|
| 38 |
+
"""
|
| 39 |
+
Resample audio to target sample rate
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
audio_data: Audio array
|
| 43 |
+
orig_sr: Original sample rate
|
| 44 |
+
target_sr: Target sample rate (default 16000 for Whisper)
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
Resampled audio array
|
| 48 |
+
"""
|
| 49 |
+
if orig_sr == target_sr:
|
| 50 |
+
return audio_data
|
| 51 |
+
|
| 52 |
+
# Convert to float32 if not already
|
| 53 |
+
if audio_data.dtype != np.float32:
|
| 54 |
+
audio_data = audio_data.astype(np.float32)
|
| 55 |
+
|
| 56 |
+
# Normalize if needed
|
| 57 |
+
if audio_data.dtype == np.int16:
|
| 58 |
+
audio_data = audio_data / 32768.0
|
| 59 |
+
elif audio_data.dtype == np.int32:
|
| 60 |
+
audio_data = audio_data / 2147483648.0
|
| 61 |
+
|
| 62 |
+
# Calculate resampling ratio
|
| 63 |
+
duration = len(audio_data) / orig_sr
|
| 64 |
+
target_length = int(duration * target_sr)
|
| 65 |
+
|
| 66 |
+
# Resample using scipy
|
| 67 |
+
resampled = signal.resample(audio_data, target_length)
|
| 68 |
+
|
| 69 |
+
return resampled.astype(np.float32)
|
| 70 |
+
|
| 71 |
def transcribe(audio, progress=gr.Progress()):
|
| 72 |
"""
|
| 73 |
Transcribe audio to text using Whisper model
|
|
|
|
| 87 |
progress(0.1, desc="Processing audio...")
|
| 88 |
sample_rate, audio_data = audio
|
| 89 |
|
| 90 |
+
logger.info(f"Processing audio - Sample rate: {sample_rate}, Shape: {audio_data.shape}, Dtype: {audio_data.dtype}")
|
| 91 |
+
|
| 92 |
+
# Handle stereo to mono conversion
|
| 93 |
+
if len(audio_data.shape) > 1:
|
| 94 |
+
logger.info("Converting stereo to mono")
|
| 95 |
+
audio_data = np.mean(audio_data, axis=1)
|
| 96 |
+
|
| 97 |
+
# Resample to 16000 Hz if needed
|
| 98 |
+
target_sr = 16000
|
| 99 |
+
if sample_rate != target_sr:
|
| 100 |
+
logger.info(f"Resampling from {sample_rate} Hz to {target_sr} Hz")
|
| 101 |
+
progress(0.2, desc=f"Resampling audio from {sample_rate} Hz to {target_sr} Hz...")
|
| 102 |
+
audio_data = resample_audio(audio_data, sample_rate, target_sr)
|
| 103 |
+
sample_rate = target_sr
|
| 104 |
|
| 105 |
progress(0.3, desc="Preparing input features...")
|
| 106 |
inputs = processor(
|
|
|
|
| 162 |
2. Click the "Transcribe" button to convert speech to text
|
| 163 |
3. The transcribed text will appear in the output box
|
| 164 |
|
| 165 |
+
### π API Access:
|
| 166 |
+
This Space provides a REST API for programmatic access. Click "Use via API" button below for details.
|
| 167 |
+
|
| 168 |
+
**Quick API Example (Python):**
|
| 169 |
+
```python
|
| 170 |
+
from gradio_client import Client
|
| 171 |
+
|
| 172 |
+
client = Client("YOUR_SPACE_URL")
|
| 173 |
+
result = client.predict("path/to/audio.mp3", api_name="/predict")
|
| 174 |
+
print(result)
|
| 175 |
+
```
|
| 176 |
+
|
| 177 |
### βΉοΈ Information:
|
| 178 |
- Supported language: Uzbek
|
| 179 |
- Processing: CPU-only (may be slower than GPU)
|
| 180 |
- Model size: Small
|
| 181 |
+
- API: Enabled via Gradio Client
|
| 182 |
"""
|
| 183 |
)
|
| 184 |
|
|
|
|
| 191 |
# Launch configuration for Hugging Face Spaces
|
| 192 |
if __name__ == "__main__":
|
| 193 |
logger.info("Launching Gradio interface...")
|
| 194 |
+
logger.info("API endpoints will be available at /api/predict")
|
| 195 |
+
iface.queue() # Enable queue for better API performance
|
| 196 |
iface.launch(
|
| 197 |
share=False,
|
| 198 |
show_error=True,
|
| 199 |
server_name="0.0.0.0",
|
| 200 |
+
server_port=7860,
|
| 201 |
+
show_api=True # Enable API documentation
|
| 202 |
)
|
requirements.txt
CHANGED
|
@@ -4,3 +4,6 @@ torch>=2.0.0
|
|
| 4 |
torchaudio>=2.0.0
|
| 5 |
accelerate>=0.20.0
|
| 6 |
huggingface_hub>=0.16.0
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
torchaudio>=2.0.0
|
| 5 |
accelerate>=0.20.0
|
| 6 |
huggingface_hub>=0.16.0
|
| 7 |
+
scipy>=1.10.0
|
| 8 |
+
numpy>=1.24.0
|
| 9 |
+
gradio-client>=0.7.0
|
test_api.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Quick test script for the Whisper Uzbek STT API
|
| 3 |
+
|
| 4 |
+
This script tests both local and remote API endpoints.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import sys
|
| 8 |
+
import os
|
| 9 |
+
|
| 10 |
+
def test_local_api():
|
| 11 |
+
"""Test the API when running locally"""
|
| 12 |
+
from gradio_client import Client
|
| 13 |
+
|
| 14 |
+
print("Testing local API (http://localhost:7860)...")
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
client = Client("http://localhost:7860")
|
| 18 |
+
print("β Connected to local server")
|
| 19 |
+
|
| 20 |
+
# Test with a sample audio file if provided
|
| 21 |
+
if len(sys.argv) > 1:
|
| 22 |
+
audio_file = sys.argv[1]
|
| 23 |
+
if os.path.exists(audio_file):
|
| 24 |
+
print(f"β Testing with audio file: {audio_file}")
|
| 25 |
+
result = client.predict(audio_file, api_name="/predict")
|
| 26 |
+
print(f"\n{'='*60}")
|
| 27 |
+
print("TRANSCRIPTION RESULT:")
|
| 28 |
+
print(f"{'='*60}")
|
| 29 |
+
print(result)
|
| 30 |
+
print(f"{'='*60}\n")
|
| 31 |
+
return True
|
| 32 |
+
else:
|
| 33 |
+
print(f"β Audio file not found: {audio_file}")
|
| 34 |
+
return False
|
| 35 |
+
else:
|
| 36 |
+
print("βΉ No audio file provided for testing")
|
| 37 |
+
print("Usage: python test_api.py <audio_file_path>")
|
| 38 |
+
return True
|
| 39 |
+
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f"β Error: {str(e)}")
|
| 42 |
+
return False
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def test_remote_api(space_url):
|
| 46 |
+
"""Test the API on Hugging Face Spaces"""
|
| 47 |
+
from gradio_client import Client
|
| 48 |
+
|
| 49 |
+
print(f"\nTesting remote API ({space_url})...")
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
client = Client(space_url)
|
| 53 |
+
print("β Connected to remote Space")
|
| 54 |
+
|
| 55 |
+
if len(sys.argv) > 1:
|
| 56 |
+
audio_file = sys.argv[1]
|
| 57 |
+
if os.path.exists(audio_file):
|
| 58 |
+
print(f"β Testing with audio file: {audio_file}")
|
| 59 |
+
result = client.predict(audio_file, api_name="/predict")
|
| 60 |
+
print(f"\n{'='*60}")
|
| 61 |
+
print("TRANSCRIPTION RESULT:")
|
| 62 |
+
print(f"{'='*60}")
|
| 63 |
+
print(result)
|
| 64 |
+
print(f"{'='*60}\n")
|
| 65 |
+
return True
|
| 66 |
+
else:
|
| 67 |
+
print(f"β Audio file not found: {audio_file}")
|
| 68 |
+
return False
|
| 69 |
+
else:
|
| 70 |
+
print("βΉ No audio file provided for testing")
|
| 71 |
+
return True
|
| 72 |
+
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"β Error: {str(e)}")
|
| 75 |
+
return False
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def main():
|
| 79 |
+
print("="*60)
|
| 80 |
+
print("Whisper Uzbek STT - API Test Script")
|
| 81 |
+
print("="*60)
|
| 82 |
+
print()
|
| 83 |
+
|
| 84 |
+
# Test local API
|
| 85 |
+
local_success = test_local_api()
|
| 86 |
+
|
| 87 |
+
# Optionally test remote API
|
| 88 |
+
print("\n" + "-"*60)
|
| 89 |
+
test_remote = input("Do you want to test the remote API? (y/n): ").lower().strip()
|
| 90 |
+
|
| 91 |
+
if test_remote == 'y':
|
| 92 |
+
space_url = input("Enter your Space URL (e.g., username/space-name): ").strip()
|
| 93 |
+
if space_url:
|
| 94 |
+
remote_success = test_remote_api(space_url)
|
| 95 |
+
else:
|
| 96 |
+
print("β No Space URL provided")
|
| 97 |
+
remote_success = False
|
| 98 |
+
else:
|
| 99 |
+
remote_success = None
|
| 100 |
+
|
| 101 |
+
# Summary
|
| 102 |
+
print("\n" + "="*60)
|
| 103 |
+
print("TEST SUMMARY")
|
| 104 |
+
print("="*60)
|
| 105 |
+
print(f"Local API: {'β PASSED' if local_success else 'β FAILED'}")
|
| 106 |
+
if remote_success is not None:
|
| 107 |
+
print(f"Remote API: {'β PASSED' if remote_success else 'β FAILED'}")
|
| 108 |
+
print("="*60)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
if __name__ == "__main__":
|
| 112 |
+
try:
|
| 113 |
+
from gradio_client import Client
|
| 114 |
+
except ImportError:
|
| 115 |
+
print("β Error: gradio-client is not installed")
|
| 116 |
+
print("Install it with: pip install gradio-client")
|
| 117 |
+
sys.exit(1)
|
| 118 |
+
|
| 119 |
+
main()
|