Spaces:
Sleeping
Sleeping
Ronaldo commited on
Commit ·
3e08670
0
Parent(s):
first commit
Browse files- .gitignore +56 -0
- ARCHITECTURE.md +273 -0
- DEPLOYMENT.md +206 -0
- Dockerfile +22 -0
- GRADIO_DEPLOY.md +182 -0
- GRADIO_SUMMARY.txt +196 -0
- PROJECT_SUMMARY.py +197 -0
- README.md +204 -0
- app.py +312 -0
- app_config.yaml +12 -0
- app_v2.py +326 -0
- client.py +102 -0
- docker-compose.yml +23 -0
- examples.py +148 -0
- requirements-dev.txt +21 -0
- requirements.txt +7 -0
- run.sh +39 -0
- setup_project.py +156 -0
- test_api.py +131 -0
.gitignore
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
env/
|
| 8 |
+
venv/
|
| 9 |
+
ENV/
|
| 10 |
+
build/
|
| 11 |
+
develop-eggs/
|
| 12 |
+
dist/
|
| 13 |
+
downloads/
|
| 14 |
+
eggs/
|
| 15 |
+
.eggs/
|
| 16 |
+
lib/
|
| 17 |
+
lib64/
|
| 18 |
+
parts/
|
| 19 |
+
sdist/
|
| 20 |
+
var/
|
| 21 |
+
wheels/
|
| 22 |
+
pip-wheel-metadata/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# IDE
|
| 30 |
+
.vscode/
|
| 31 |
+
.idea/
|
| 32 |
+
*.swp
|
| 33 |
+
*.swo
|
| 34 |
+
*~
|
| 35 |
+
|
| 36 |
+
# Testing
|
| 37 |
+
.pytest_cache/
|
| 38 |
+
.coverage
|
| 39 |
+
.tox/
|
| 40 |
+
|
| 41 |
+
# Audio files
|
| 42 |
+
*.wav
|
| 43 |
+
*.mp3
|
| 44 |
+
*.m4a
|
| 45 |
+
*.ogg
|
| 46 |
+
|
| 47 |
+
# Output
|
| 48 |
+
output*.wav
|
| 49 |
+
*.pyc
|
| 50 |
+
|
| 51 |
+
# OS
|
| 52 |
+
.DS_Store
|
| 53 |
+
Thumbs.db
|
| 54 |
+
|
| 55 |
+
# Model cache (optional, remove if using persistent storage)
|
| 56 |
+
# .cache/
|
ARCHITECTURE.md
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Architecture et Documentation Technique
|
| 2 |
+
|
| 3 |
+
## 🏗️ Architecture de l'API
|
| 4 |
+
|
| 5 |
+
```
|
| 6 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 7 |
+
│ Client/Frontend │
|
| 8 |
+
│ (Web, Mobile, CLI, Python Client, cURL, etc.) │
|
| 9 |
+
└────────────────────┬────────────────────────────────────────┘
|
| 10 |
+
│ HTTP/REST
|
| 11 |
+
┌────────────────────▼────────────────────────────────────────┐
|
| 12 |
+
│ Flask API Server │
|
| 13 |
+
│ - Health Check GET /health │
|
| 14 |
+
│ - Documentation GET / │
|
| 15 |
+
│ - Langues GET /supported-languages │
|
| 16 |
+
│ - ASR (Audio→Text) POST /asr │
|
| 17 |
+
│ - TTS (Text→Audio) POST /tts │
|
| 18 |
+
└────────────┬──────────────────────────────┬─────────────────┘
|
| 19 |
+
│ │
|
| 20 |
+
┌────────▼──────────┐ ┌──────────▼──────────┐
|
| 21 |
+
│ ASR Pipeline │ │ TTS Pipeline │
|
| 22 |
+
│ │ │ │
|
| 23 |
+
│ 1. Load Audio │ │ 1. Validate Text │
|
| 24 |
+
│ 2. Process │ │ 2. Load Model │
|
| 25 |
+
│ 3. Tokenize │ │ 3. Tokenize │
|
| 26 |
+
│ 4. Infer w/ MMS │ │ 4. Infer (VITS) │
|
| 27 |
+
│ 5. Decode │ │ 5. Generate WAV │
|
| 28 |
+
└────────┬──────────┘ └──────────┬─────────┘
|
| 29 |
+
│ │
|
| 30 |
+
┌────────▼──────────────────────────────▼──────────┐
|
| 31 |
+
│ Model Cache & Management │
|
| 32 |
+
│ - facebook/mms-1b-all (ASR) │
|
| 33 |
+
│ - facebook/mms-tts-* (8 langues) │
|
| 34 |
+
│ - Thread-safe loading │
|
| 35 |
+
│ - Lazy initialization │
|
| 36 |
+
└────────┬──────────────────────────────────────┘
|
| 37 |
+
│
|
| 38 |
+
┌────────▼──────────────────────────────────────┐
|
| 39 |
+
│ PyTorch / GPU Support │
|
| 40 |
+
│ - Détection automatique GPU/CPU │
|
| 41 |
+
│ - Device management │
|
| 42 |
+
└──────────────────────────────────────────────┘
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
## 📊 Flow des requêtes
|
| 46 |
+
|
| 47 |
+
### ASR (Automatic Speech Recognition)
|
| 48 |
+
|
| 49 |
+
```
|
| 50 |
+
Audio File
|
| 51 |
+
↓
|
| 52 |
+
[Validation] → Error if invalid
|
| 53 |
+
↓
|
| 54 |
+
[Load & Resample] → Convert to 16kHz mono
|
| 55 |
+
↓
|
| 56 |
+
[Normalize] → [-1, 1] range
|
| 57 |
+
↓
|
| 58 |
+
[Truncate] → Max 30 seconds
|
| 59 |
+
↓
|
| 60 |
+
[Tokenize] → Convert to features
|
| 61 |
+
↓
|
| 62 |
+
[Infer] → facebook/mms-1b-all (GPU/CPU)
|
| 63 |
+
↓
|
| 64 |
+
[Decode] → Text output
|
| 65 |
+
↓
|
| 66 |
+
JSON Response
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
### TTS (Text-to-Speech)
|
| 70 |
+
|
| 71 |
+
```
|
| 72 |
+
Text + Language
|
| 73 |
+
↓
|
| 74 |
+
[Validation] → Error if empty/too long
|
| 75 |
+
↓
|
| 76 |
+
[Load Model] → facebook/mms-tts-{lang}
|
| 77 |
+
↓
|
| 78 |
+
[Tokenize] → Convert text to token IDs
|
| 79 |
+
↓
|
| 80 |
+
[Infer] → VITS model (GPU/CPU)
|
| 81 |
+
↓
|
| 82 |
+
[Generate WAV] → Audio synthesis (22050 Hz)
|
| 83 |
+
↓
|
| 84 |
+
WAV File (audio/wav)
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
## 🧠 Modèles utilisés
|
| 88 |
+
|
| 89 |
+
### ASR: facebook/mms-1b-all
|
| 90 |
+
|
| 91 |
+
- **Architecture**: wav2vec2
|
| 92 |
+
- **Taille**: 964.8M parameters
|
| 93 |
+
- **Langues**: 100+ (ISO 639-3)
|
| 94 |
+
- **Input**: Audio 16kHz mono
|
| 95 |
+
- **Output**: Transcription texte
|
| 96 |
+
- **Entraînement**: XLSL-R + Fine-tuning multilingual
|
| 97 |
+
|
| 98 |
+
### TTS: facebook/mms-tts-{language}
|
| 99 |
+
|
| 100 |
+
- **Architecture**: VITS (Variational Inference Text-to-Speech)
|
| 101 |
+
- **Taille**: ~5-10M parameters par modèle
|
| 102 |
+
- **Langues**: 8 (voir supported languages)
|
| 103 |
+
- **Input**: Texte (max 1000 chars)
|
| 104 |
+
- **Output**: Waveform 22050 Hz
|
| 105 |
+
- **Entraînement**: Multilingual dataset + data augmentation
|
| 106 |
+
|
| 107 |
+
## 🔧 Configuration
|
| 108 |
+
|
| 109 |
+
```python
|
| 110 |
+
SAMPLE_RATE = 16000 # Taux d'échantillonnage ASR
|
| 111 |
+
MAX_AUDIO_LENGTH = 30 # Max 30 secondes d'audio
|
| 112 |
+
MAX_TEXT_LENGTH = 1000 # Max 1000 caractères
|
| 113 |
+
DEVICE = auto (GPU if available)
|
| 114 |
+
MODEL_CACHE = Thread-safe dict
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
## 📈 Performance
|
| 118 |
+
|
| 119 |
+
| Métrique | Valeur |
|
| 120 |
+
|----------|--------|
|
| 121 |
+
| Première requête ASR | 2-5 min (chargement modèle) |
|
| 122 |
+
| Requêtes suivantes ASR | 1-10 sec (audio 10sec) |
|
| 123 |
+
| Première requête TTS | 30-60 sec (chargement modèle) |
|
| 124 |
+
| Requêtes suivantes TTS | 1-5 sec (100 chars) |
|
| 125 |
+
| Mémoire GPU | ~2GB (ASR) + 1GB (TTS) |
|
| 126 |
+
| Mémoire RAM | ~1GB cache |
|
| 127 |
+
|
| 128 |
+
## 🔐 Sécurité
|
| 129 |
+
|
| 130 |
+
### Input Validation
|
| 131 |
+
- ✅ Vérification type fichier audio
|
| 132 |
+
- ✅ Limitation taille audio (30s)
|
| 133 |
+
- ✅ Limitation taille texte (1000 chars)
|
| 134 |
+
- ✅ Vérification contenu non-vide
|
| 135 |
+
|
| 136 |
+
### Rate Limiting (À ajouter)
|
| 137 |
+
```python
|
| 138 |
+
from flask_limiter import Limiter
|
| 139 |
+
limiter = Limiter(app, key_func=lambda: request.remote_addr)
|
| 140 |
+
|
| 141 |
+
@app.route('/tts')
|
| 142 |
+
@limiter.limit("10/minute")
|
| 143 |
+
def tts():
|
| 144 |
+
...
|
| 145 |
+
```
|
| 146 |
+
|
| 147 |
+
### Authentication (À ajouter)
|
| 148 |
+
```python
|
| 149 |
+
from functools import wraps
|
| 150 |
+
def require_token(f):
|
| 151 |
+
@wraps(f)
|
| 152 |
+
def decorated(*args, **kwargs):
|
| 153 |
+
token = request.headers.get('Authorization')
|
| 154 |
+
if not validate_token(token):
|
| 155 |
+
return {'error': 'Unauthorized'}, 401
|
| 156 |
+
return f(*args, **kwargs)
|
| 157 |
+
return decorated
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
## 🚀 Optimisations
|
| 161 |
+
|
| 162 |
+
### Cache des modèles
|
| 163 |
+
- Modèles chargés une seule fois
|
| 164 |
+
- Partage entre toutes les requêtes
|
| 165 |
+
- Thread-safe avec locks
|
| 166 |
+
|
| 167 |
+
### GPU Acceleration
|
| 168 |
+
- Détection automatique GPU
|
| 169 |
+
- Inference sur GPU si disponible
|
| 170 |
+
- Fallback CPU automatique
|
| 171 |
+
|
| 172 |
+
### Memory Management
|
| 173 |
+
- Gradients désactivés pour inférence
|
| 174 |
+
- Modèles en eval mode
|
| 175 |
+
- Audio / texte tronqués
|
| 176 |
+
|
| 177 |
+
## 📦 Déploiement
|
| 178 |
+
|
| 179 |
+
### Local Development
|
| 180 |
+
```bash
|
| 181 |
+
python app_v2.py
|
| 182 |
+
# Runs on http://localhost:7860
|
| 183 |
+
```
|
| 184 |
+
|
| 185 |
+
### Docker
|
| 186 |
+
```bash
|
| 187 |
+
docker build -t mms-api .
|
| 188 |
+
docker run -p 7860:7860 mms-api
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
### Docker Compose (avec GPU)
|
| 192 |
+
```bash
|
| 193 |
+
docker-compose up
|
| 194 |
+
```
|
| 195 |
+
|
| 196 |
+
### Hugging Face Spaces
|
| 197 |
+
- Crée un Space Docker
|
| 198 |
+
- Push code vers HF
|
| 199 |
+
- Auto-build et déploiement
|
| 200 |
+
- URL: https://huggingface.co/spaces/{user}/{space}
|
| 201 |
+
|
| 202 |
+
## 📡 API Endpoints
|
| 203 |
+
|
| 204 |
+
### GET /
|
| 205 |
+
Documentation et métadonnées
|
| 206 |
+
|
| 207 |
+
### GET /health
|
| 208 |
+
État du service et device info
|
| 209 |
+
|
| 210 |
+
### GET /supported-languages
|
| 211 |
+
Langues supportées ASR/TTS
|
| 212 |
+
|
| 213 |
+
### GET /models-info
|
| 214 |
+
Infos détaillées sur les modèles
|
| 215 |
+
|
| 216 |
+
### POST /asr
|
| 217 |
+
Transcription audio
|
| 218 |
+
- **Input**: multipart/form-data (audio + language)
|
| 219 |
+
- **Output**: JSON (transcription + métadonnées)
|
| 220 |
+
|
| 221 |
+
### POST /tts
|
| 222 |
+
Synthèse vocale
|
| 223 |
+
- **Input**: JSON (text + language)
|
| 224 |
+
- **Output**: WAV audio file
|
| 225 |
+
|
| 226 |
+
## 🐛 Debugging
|
| 227 |
+
|
| 228 |
+
### Logs
|
| 229 |
+
```bash
|
| 230 |
+
# Local
|
| 231 |
+
python app_v2.py
|
| 232 |
+
# Voir les logs en stdout
|
| 233 |
+
|
| 234 |
+
# Docker
|
| 235 |
+
docker logs <container_id>
|
| 236 |
+
|
| 237 |
+
# HF Spaces
|
| 238 |
+
# Voir onglet "Logs" dans le Space
|
| 239 |
+
```
|
| 240 |
+
|
| 241 |
+
### Common Issues
|
| 242 |
+
|
| 243 |
+
**Issue**: Model not found
|
| 244 |
+
**Solution**: Attendre le téléchargement des modèles (5-10 min)
|
| 245 |
+
|
| 246 |
+
**Issue**: CUDA out of memory
|
| 247 |
+
**Solution**: Réduire MAX_AUDIO_LENGTH ou utiliser CPU
|
| 248 |
+
|
| 249 |
+
**Issue**: Port already in use
|
| 250 |
+
**Solution**: `PORT=8080 python app_v2.py`
|
| 251 |
+
|
| 252 |
+
## 🔮 Roadmap
|
| 253 |
+
|
| 254 |
+
- [ ] Streaming ASR/TTS
|
| 255 |
+
- [ ] Batch processing
|
| 256 |
+
- [ ] WebSockets pour streaming
|
| 257 |
+
- [ ] Caching Redis
|
| 258 |
+
- [ ] Database logging
|
| 259 |
+
- [ ] Rate limiting
|
| 260 |
+
- [ ] Authentication/API keys
|
| 261 |
+
- [ ] Metrics (Prometheus)
|
| 262 |
+
- [ ] Web UI (Gradio/Streamlit)
|
| 263 |
+
- [ ] More languages
|
| 264 |
+
- [ ] Emotion synthesis
|
| 265 |
+
- [ ] Custom voices
|
| 266 |
+
|
| 267 |
+
## 📚 Références
|
| 268 |
+
|
| 269 |
+
- [Meta MMS Paper](https://arxiv.org/abs/2305.13516)
|
| 270 |
+
- [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all)
|
| 271 |
+
- [facebook/mms-tts](https://huggingface.co/facebook/mms-tts)
|
| 272 |
+
- [Transformers Documentation](https://huggingface.co/docs/transformers)
|
| 273 |
+
- [Flask Documentation](https://flask.palletsprojects.com/)
|
DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Guide de déploiement sur Hugging Face Spaces
|
| 2 |
+
|
| 3 |
+
## Prérequis
|
| 4 |
+
|
| 5 |
+
- Compte Hugging Face: https://huggingface.co
|
| 6 |
+
- Git installé localement
|
| 7 |
+
- Token HF avec permissions write (https://huggingface.co/settings/tokens)
|
| 8 |
+
|
| 9 |
+
## Étapes de déploiement
|
| 10 |
+
|
| 11 |
+
### 1. Créer un Space sur Hugging Face
|
| 12 |
+
|
| 13 |
+
```bash
|
| 14 |
+
# Visite https://huggingface.co/new-space
|
| 15 |
+
# - Remplis le formulaire:
|
| 16 |
+
# - Name: mms-asr-tts (ou autre nom)
|
| 17 |
+
# - License: OpenRAIL (ou CC-BY-NC-4.0 pour correspondre à Meta)
|
| 18 |
+
# - Space SDK: Docker
|
| 19 |
+
# - Clique "Create Space"
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
### 2. Cloner le Space
|
| 23 |
+
|
| 24 |
+
```bash
|
| 25 |
+
git clone https://huggingface.co/spaces/<username>/<space-name>
|
| 26 |
+
cd <space-name>
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
### 3. Copier les fichiers du projet
|
| 30 |
+
|
| 31 |
+
```bash
|
| 32 |
+
# Depuis le répertoire du projet
|
| 33 |
+
cp app_v2.py app.py
|
| 34 |
+
cp requirements.txt .
|
| 35 |
+
cp Dockerfile .
|
| 36 |
+
cp README.md .
|
| 37 |
+
cp .gitignore .
|
| 38 |
+
cp examples.py .
|
| 39 |
+
cp client.py .
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
### 4. Configurer le fichier de révision de Hugging Face
|
| 43 |
+
|
| 44 |
+
Crée `README_SPACE.md` avec les métadonnées:
|
| 45 |
+
|
| 46 |
+
```yaml
|
| 47 |
+
---
|
| 48 |
+
title: Meta MMS ASR/TTS
|
| 49 |
+
description: API multilingue pour reconnaissance vocale et synthèse vocale
|
| 50 |
+
sdk: docker
|
| 51 |
+
pinned: false
|
| 52 |
+
app_port: 7860
|
| 53 |
+
models:
|
| 54 |
+
- facebook/mms-1b-all
|
| 55 |
+
- facebook/mms-tts-eng
|
| 56 |
+
- facebook/mms-tts-yor
|
| 57 |
+
- facebook/mms-tts-beh
|
| 58 |
+
- facebook/mms-tts-ddn
|
| 59 |
+
- facebook/mms-tts-ewe
|
| 60 |
+
- facebook/mms-tts-gej
|
| 61 |
+
- facebook/mms-tts-tbz
|
| 62 |
+
- facebook/mms-tts-bba
|
| 63 |
+
tags:
|
| 64 |
+
- ASR
|
| 65 |
+
- TTS
|
| 66 |
+
- Speech
|
| 67 |
+
- Audio
|
| 68 |
+
- Multilingual
|
| 69 |
+
- MMS
|
| 70 |
+
---
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
### 5. Pousser vers Hugging Face
|
| 74 |
+
|
| 75 |
+
```bash
|
| 76 |
+
# Configure Git si nécessaire
|
| 77 |
+
git config user.email "email@example.com"
|
| 78 |
+
git config user.name "Your Name"
|
| 79 |
+
|
| 80 |
+
# Ajoute et pousse
|
| 81 |
+
git add .
|
| 82 |
+
git commit -m "Initial commit: Meta MMS ASR/TTS API"
|
| 83 |
+
git push
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
### 6. Attendre le déploiement
|
| 87 |
+
|
| 88 |
+
Le Space se construira automatiquement (5-15 minutes). Pendant ce temps:
|
| 89 |
+
- Les dépendances seront installées
|
| 90 |
+
- Les modèles seront téléchargés (peut prendre du temps)
|
| 91 |
+
- L'application sera lancée
|
| 92 |
+
|
| 93 |
+
### 7. Tester le Space
|
| 94 |
+
|
| 95 |
+
```bash
|
| 96 |
+
# Accède à: https://huggingface.co/spaces/<username>/<space-name>
|
| 97 |
+
# Teste les endpoints:
|
| 98 |
+
# - GET / → Documentation
|
| 99 |
+
# - GET /health → État
|
| 100 |
+
# - POST /tts → Teste avec du texte en Yoruba/autres langues
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
## Troubleshooting
|
| 104 |
+
|
| 105 |
+
### Erreur: "Model not found"
|
| 106 |
+
|
| 107 |
+
Solution: Les modèles peuvent prendre du temps à télécharger. Attends 5-10 minutes et réessaie.
|
| 108 |
+
|
| 109 |
+
### Erreur: "CUDA out of memory"
|
| 110 |
+
|
| 111 |
+
Solution:
|
| 112 |
+
- Réduis MAX_AUDIO_LENGTH ou MAX_TEXT_LENGTH
|
| 113 |
+
- Utilise CPU au lieu du GPU
|
| 114 |
+
- Ajoute à requirements.txt: `transformers[deepspeed]`
|
| 115 |
+
|
| 116 |
+
### Erreur: "Port already in use"
|
| 117 |
+
|
| 118 |
+
Solution: Le port 7860 est utilisé par défaut sur HF Spaces. Vérifier `app_port` dans README.md
|
| 119 |
+
|
| 120 |
+
## Optimisation pour Production
|
| 121 |
+
|
| 122 |
+
### Augmenter les ressources
|
| 123 |
+
|
| 124 |
+
Modifie le `docker-compose.yml`:
|
| 125 |
+
|
| 126 |
+
```yaml
|
| 127 |
+
deploy:
|
| 128 |
+
resources:
|
| 129 |
+
limits:
|
| 130 |
+
memory: 8G
|
| 131 |
+
reservations:
|
| 132 |
+
memory: 4G
|
| 133 |
+
devices:
|
| 134 |
+
- driver: nvidia
|
| 135 |
+
count: 1 # GPU
|
| 136 |
+
capabilities: [gpu]
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
### Ajouter du caching
|
| 140 |
+
|
| 141 |
+
Modifie le Dockerfile pour persister les modèles:
|
| 142 |
+
|
| 143 |
+
```dockerfile
|
| 144 |
+
ENV HF_HOME=/app/hf_cache
|
| 145 |
+
ENV TORCH_HOME=/app/torch_cache
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
### Ajouter de l'authentification
|
| 149 |
+
|
| 150 |
+
Pour limiter l'accès:
|
| 151 |
+
|
| 152 |
+
```python
|
| 153 |
+
from functools import wraps
|
| 154 |
+
from flask import request
|
| 155 |
+
|
| 156 |
+
def require_token(f):
|
| 157 |
+
@wraps(f)
|
| 158 |
+
def decorated(*args, **kwargs):
|
| 159 |
+
token = request.headers.get('Authorization', '').replace('Bearer ', '')
|
| 160 |
+
if token != os.getenv('API_TOKEN'):
|
| 161 |
+
return {'error': 'Unauthorized'}, 401
|
| 162 |
+
return f(*args, **kwargs)
|
| 163 |
+
return decorated
|
| 164 |
+
|
| 165 |
+
@app.route('/tts', methods=['POST'])
|
| 166 |
+
@require_token
|
| 167 |
+
def tts():
|
| 168 |
+
# ...
|
| 169 |
+
```
|
| 170 |
+
|
| 171 |
+
## Monitoring
|
| 172 |
+
|
| 173 |
+
### Logs
|
| 174 |
+
|
| 175 |
+
Accède aux logs du Space:
|
| 176 |
+
- https://huggingface.co/spaces/<username>/<space-name>/logs
|
| 177 |
+
|
| 178 |
+
### Métriques
|
| 179 |
+
|
| 180 |
+
Ajoute à `app.py`:
|
| 181 |
+
|
| 182 |
+
```python
|
| 183 |
+
from prometheus_client import Counter, Histogram
|
| 184 |
+
|
| 185 |
+
requests_total = Counter('requests_total', 'Total requests', ['method', 'endpoint'])
|
| 186 |
+
request_duration = Histogram('request_duration_seconds', 'Request duration', ['endpoint'])
|
| 187 |
+
```
|
| 188 |
+
|
| 189 |
+
## Coûts
|
| 190 |
+
|
| 191 |
+
- **Gratuit**: Un Space gratuit avec ressources limitées (2-4GB RAM, pas de GPU)
|
| 192 |
+
- **Libre** (avec GPU): Nécessite un Space payant (~$5-20/mois selon GPU)
|
| 193 |
+
|
| 194 |
+
## Support
|
| 195 |
+
|
| 196 |
+
Pour les problèmes:
|
| 197 |
+
1. Vérifie les logs: https://huggingface.co/spaces/<username>/<space-name>/logs
|
| 198 |
+
2. Consulte la doc: https://huggingface.co/docs/hub/spaces
|
| 199 |
+
3. Pose une question: https://discuss.huggingface.co
|
| 200 |
+
|
| 201 |
+
## Prochaines étapes
|
| 202 |
+
|
| 203 |
+
1. ✅ Déploie d'abord sur HF Spaces
|
| 204 |
+
2. Teste tous les endpoints
|
| 205 |
+
3. Collecte du feedback
|
| 206 |
+
4. Ajoute des fonctionnalités (streaming, batch processing, etc.)
|
Dockerfile
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Installe les dépendances système
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
libsndfile1 \
|
| 9 |
+
ffmpeg \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copie les fichiers
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
COPY app.py .
|
| 17 |
+
|
| 18 |
+
# Expose le port
|
| 19 |
+
EXPOSE 7860
|
| 20 |
+
|
| 21 |
+
# Lance l'app
|
| 22 |
+
CMD ["python", "app.py"]
|
GRADIO_DEPLOY.md
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 Guide de déploiement Gradio sur Hugging Face Spaces
|
| 2 |
+
|
| 3 |
+
## Étape 1: Préparer le Space
|
| 4 |
+
|
| 5 |
+
### 1.1 Créer un nouveau Space
|
| 6 |
+
- Va sur https://huggingface.co/new-space
|
| 7 |
+
- Remplis les informations:
|
| 8 |
+
- **Space name**: `mms-speech` (ou autre nom)
|
| 9 |
+
- **License**: `openrail` ou `cc-by-nc-4.0`
|
| 10 |
+
- **SDK**: `Gradio`
|
| 11 |
+
- **Template**: `Blank`
|
| 12 |
+
- Clique **"Create Space"**
|
| 13 |
+
|
| 14 |
+
### 1.2 Cloner le Space
|
| 15 |
+
```bash
|
| 16 |
+
# Après la création, clique sur "Clone this space" ou:
|
| 17 |
+
git clone https://huggingface.co/spaces/<your-username>/<space-name>
|
| 18 |
+
cd <space-name>
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
## Étape 2: Copier les fichiers
|
| 22 |
+
|
| 23 |
+
Depuis ton répertoire de projet:
|
| 24 |
+
|
| 25 |
+
```bash
|
| 26 |
+
# Copie les fichiers essentiels
|
| 27 |
+
cp app.py requirements.txt README.md .
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
**Files nécessaires:**
|
| 31 |
+
- `app.py` - Application Gradio
|
| 32 |
+
- `requirements.txt` - Dépendances
|
| 33 |
+
|
| 34 |
+
**Optionnel:**
|
| 35 |
+
- `README.md` - Documentation du Space
|
| 36 |
+
- `.gitignore` - (déjà créé)
|
| 37 |
+
|
| 38 |
+
## Étape 3: Vérifier les fichiers
|
| 39 |
+
|
| 40 |
+
```bash
|
| 41 |
+
# Vérifie que tu as:
|
| 42 |
+
ls -la
|
| 43 |
+
# app.py
|
| 44 |
+
# requirements.txt
|
| 45 |
+
# README.md
|
| 46 |
+
# .git/
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
## Étape 4: Push vers Hugging Face
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
# Configure Git (si première fois)
|
| 53 |
+
git config user.email "ton.email@example.com"
|
| 54 |
+
git config user.name "Ton Nom"
|
| 55 |
+
|
| 56 |
+
# Ajoute les fichiers
|
| 57 |
+
git add app.py requirements.txt README.md
|
| 58 |
+
|
| 59 |
+
# Commit
|
| 60 |
+
git commit -m "Add MMS Speech AI with Gradio"
|
| 61 |
+
|
| 62 |
+
# Push
|
| 63 |
+
git push
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
## Étape 5: Attendre le déploiement
|
| 67 |
+
|
| 68 |
+
Le Space se construira automatiquement:
|
| 69 |
+
|
| 70 |
+
1. **Installation des dépendances** (2-5 min)
|
| 71 |
+
2. **Téléchargement des modèles** (5-15 min)
|
| 72 |
+
3. **Lancement de l'app** (1-2 min)
|
| 73 |
+
|
| 74 |
+
### Vérifier le statut
|
| 75 |
+
- Va sur ton Space: `https://huggingface.co/spaces/<username>/<space-name>`
|
| 76 |
+
- Vérifie la section "Logs" pour voir la progression
|
| 77 |
+
|
| 78 |
+
## Étape 6: Tester
|
| 79 |
+
|
| 80 |
+
Une fois déployé:
|
| 81 |
+
|
| 82 |
+
1. **Onglet ASR**: Enregistre de l'audio et transcris
|
| 83 |
+
2. **Onglet TTS**: Écris du texte et génère l'audio
|
| 84 |
+
3. **Onglet About**: Vérifie les infos
|
| 85 |
+
|
| 86 |
+
## 📋 Contenu des fichiers
|
| 87 |
+
|
| 88 |
+
### app.py
|
| 89 |
+
```python
|
| 90 |
+
import gradio as gr
|
| 91 |
+
import torch
|
| 92 |
+
from transformers import ...
|
| 93 |
+
|
| 94 |
+
# Interface Gradio avec 3 onglets:
|
| 95 |
+
# - ASR (transcription)
|
| 96 |
+
# - TTS (synthèse vocale)
|
| 97 |
+
# - About (informations)
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
### requirements.txt
|
| 101 |
+
```
|
| 102 |
+
gradio==4.26.0
|
| 103 |
+
transformers==4.36.2
|
| 104 |
+
torch==2.1.1
|
| 105 |
+
torchaudio==2.1.1
|
| 106 |
+
librosa==0.10.0
|
| 107 |
+
soundfile==0.12.1
|
| 108 |
+
numpy==1.24.3
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
## 🆘 Troubleshooting
|
| 112 |
+
|
| 113 |
+
### Les modèles prennent longtemps à charger
|
| 114 |
+
✅ **Normal!** Les modèles font plusieurs GB. Attends 5-15 minutes.
|
| 115 |
+
- Vérifie les logs: onglet "Logs" dans ton Space
|
| 116 |
+
|
| 117 |
+
### Erreur: "Model not found"
|
| 118 |
+
✅ Attends que le téléchargement se termine
|
| 119 |
+
- Regarde les logs pour voir la progression
|
| 120 |
+
|
| 121 |
+
### Space reste "Building"
|
| 122 |
+
✅ Cliquez sur le "Restart" button dans les settings du Space
|
| 123 |
+
- Va dans "Settings" → "Restart this Space"
|
| 124 |
+
|
| 125 |
+
### "RuntimeError: CUDA out of memory"
|
| 126 |
+
✅ Les ressources GPU sont limitées
|
| 127 |
+
- Réduis `MAX_AUDIO_LENGTH` ou `MAX_TEXT_LENGTH` dans app.py
|
| 128 |
+
- Ou HF bascule automatiquement sur CPU
|
| 129 |
+
|
| 130 |
+
### Je n'ai pas accès au Space
|
| 131 |
+
✅ Vérifie la visibilité:
|
| 132 |
+
- Va dans "Settings"
|
| 133 |
+
- Change "Private" → "Public" si tu veux le partager
|
| 134 |
+
|
| 135 |
+
## 🎯 Après le déploiement
|
| 136 |
+
|
| 137 |
+
### Partager ton Space
|
| 138 |
+
```
|
| 139 |
+
URL: https://huggingface.co/spaces/<username>/<space-name>
|
| 140 |
+
```
|
| 141 |
+
|
| 142 |
+
### Mettre à jour le code
|
| 143 |
+
```bash
|
| 144 |
+
# Modifie app.py
|
| 145 |
+
git add app.py
|
| 146 |
+
git commit -m "Update features"
|
| 147 |
+
git push
|
| 148 |
+
```
|
| 149 |
+
|
| 150 |
+
L'app se redéploiera automatiquement!
|
| 151 |
+
|
| 152 |
+
### Ajouter une description
|
| 153 |
+
- Va dans "Settings" → "Space Settings"
|
| 154 |
+
- Remplis "Short description" et "Description"
|
| 155 |
+
|
| 156 |
+
## 📊 Ressources GPU
|
| 157 |
+
|
| 158 |
+
Hugging Face offre:
|
| 159 |
+
- **Gratuit**: CPU seul (~2-4GB RAM)
|
| 160 |
+
- **Upgradable**: GPU payant (~$5-20/mois)
|
| 161 |
+
|
| 162 |
+
Pour voir les options:
|
| 163 |
+
- Va dans ton Space Settings
|
| 164 |
+
- Regarde "Hardware" et clique "Upgrade to GPU"
|
| 165 |
+
|
| 166 |
+
## 🔗 Liens utiles
|
| 167 |
+
|
| 168 |
+
- [HF Spaces Docs](https://huggingface.co/docs/hub/spaces)
|
| 169 |
+
- [Gradio Docs](https://www.gradio.app/)
|
| 170 |
+
- [MMS Paper](https://arxiv.org/abs/2305.13516)
|
| 171 |
+
|
| 172 |
+
## ✅ Checklist final
|
| 173 |
+
|
| 174 |
+
- [ ] Space créé sur HF
|
| 175 |
+
- [ ] Files copiés (app.py, requirements.txt)
|
| 176 |
+
- [ ] Git push effectué
|
| 177 |
+
- [ ] Déploiement en cours (vérifier logs)
|
| 178 |
+
- [ ] App accessible et fonctionnelle
|
| 179 |
+
- [ ] ASR fonctionne
|
| 180 |
+
- [ ] TTS fonctionne
|
| 181 |
+
|
| 182 |
+
Bravo! 🎉 Ton app est live!
|
GRADIO_SUMMARY.txt
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
📊 RÉSUMÉ - Meta MMS Speech AI (Gradio)
|
| 3 |
+
|
| 4 |
+
Application Gradio pour:
|
| 5 |
+
- ASR: Audio → Texte (100+ langues)
|
| 6 |
+
- TTS: Texte → Audio (8 langues)
|
| 7 |
+
|
| 8 |
+
Déployée sur Hugging Face Spaces
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
print("""
|
| 12 |
+
╔════════════════════════════════════════════════════════════════╗
|
| 13 |
+
║ 🎙️ Meta MMS Speech AI - Gradio 📢 ║
|
| 14 |
+
║ Reconnaissance vocale + Synthèse vocale multilingue ║
|
| 15 |
+
║ Interface Web ║
|
| 16 |
+
╚════════════════════════════════════════════════════════════════╝
|
| 17 |
+
|
| 18 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 19 |
+
|
| 20 |
+
📋 FILES DU PROJET
|
| 21 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 22 |
+
|
| 23 |
+
📂 CORE:
|
| 24 |
+
📄 app.py ⭐ Application Gradio
|
| 25 |
+
📄 requirements.txt ⭐ Dépendances
|
| 26 |
+
|
| 27 |
+
📂 DOCUMENTATION:
|
| 28 |
+
📄 README.md → Guide d'utilisation
|
| 29 |
+
📄 GRADIO_DEPLOY.md → Guide de déploiement sur HF
|
| 30 |
+
📄 ARCHITECTURE.md → Architecture technique
|
| 31 |
+
📄 DEPLOYMENT.md → (Ancien, pour Flask)
|
| 32 |
+
|
| 33 |
+
📂 UTILITAIRES:
|
| 34 |
+
📄 run.sh → Lancer l'app facilement
|
| 35 |
+
📄 client.py → Client Python (optionnel)
|
| 36 |
+
📄 examples.py → Exemples d'utilisation
|
| 37 |
+
📄 test_api.py → Tests unitaires
|
| 38 |
+
|
| 39 |
+
📂 CONFIG:
|
| 40 |
+
📄 .gitignore → Git configuration
|
| 41 |
+
📄 requirements-dev.txt → Dépendances dev
|
| 42 |
+
|
| 43 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 44 |
+
|
| 45 |
+
🎯 FEATURES
|
| 46 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 47 |
+
|
| 48 |
+
🔊 ASR (Automatic Speech Recognition):
|
| 49 |
+
• Modèle: facebook/mms-1b-all (964M params)
|
| 50 |
+
• Langues: 100+ (ISO 639-3)
|
| 51 |
+
• Input: Audio (microphone ou upload)
|
| 52 |
+
• Output: Texte transcrit
|
| 53 |
+
• Limite: 30 secondes
|
| 54 |
+
|
| 55 |
+
📢 TTS (Text-to-Speech):
|
| 56 |
+
• Modèles: facebook/mms-tts-* (VITS)
|
| 57 |
+
• Langues: 8 (beh, bba, ddn, ewe, gej, tbz, yor, eng)
|
| 58 |
+
• Input: Texte (max 1000 chars)
|
| 59 |
+
• Output: Audio WAV (22050 Hz)
|
| 60 |
+
|
| 61 |
+
🌐 Interface:
|
| 62 |
+
• Gradio (web UI moderne)
|
| 63 |
+
• 3 onglets: ASR, TTS, About
|
| 64 |
+
• Responsive design
|
| 65 |
+
• Partage automatique avec .share
|
| 66 |
+
|
| 67 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 68 |
+
|
| 69 |
+
🚀 DÉMARRAGE RAPIDE
|
| 70 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 71 |
+
|
| 72 |
+
1️⃣ Installation:
|
| 73 |
+
cd /home/ronaldo/Bureau/test
|
| 74 |
+
pip install -r requirements.txt
|
| 75 |
+
|
| 76 |
+
2️⃣ Lancer l'app:
|
| 77 |
+
python app.py
|
| 78 |
+
# ou: bash run.sh
|
| 79 |
+
|
| 80 |
+
3️⃣ Ouvrir dans le navigateur:
|
| 81 |
+
http://localhost:7860
|
| 82 |
+
|
| 83 |
+
4️⃣ Utiliser:
|
| 84 |
+
- ASR: Enregistre/upload audio → Transcris
|
| 85 |
+
- TTS: Écris texte → Génère audio
|
| 86 |
+
|
| 87 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 88 |
+
|
| 89 |
+
🌐 DÉPLOIEMENT SUR HUGGING FACE SPACES
|
| 90 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 91 |
+
|
| 92 |
+
1️⃣ Créer un Space:
|
| 93 |
+
https://huggingface.co/new-space
|
| 94 |
+
- SDK: Gradio
|
| 95 |
+
- Template: Blank
|
| 96 |
+
|
| 97 |
+
2️⃣ Cloner:
|
| 98 |
+
git clone https://huggingface.co/spaces/<user>/<space>
|
| 99 |
+
cd <space>
|
| 100 |
+
|
| 101 |
+
3️⃣ Copier:
|
| 102 |
+
cp app.py requirements.txt README.md .
|
| 103 |
+
|
| 104 |
+
4️⃣ Push:
|
| 105 |
+
git add .
|
| 106 |
+
git commit -m "Add MMS Speech"
|
| 107 |
+
git push
|
| 108 |
+
|
| 109 |
+
5️⃣ Attendre:
|
| 110 |
+
~15-20 minutes pour le déploiement complet
|
| 111 |
+
|
| 112 |
+
📌 Voir GRADIO_DEPLOY.md pour les instructions détaillées!
|
| 113 |
+
|
| 114 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 115 |
+
|
| 116 |
+
💡 TIPS IMPORTANTS
|
| 117 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 118 |
+
|
| 119 |
+
✅ Cache des modèles:
|
| 120 |
+
Les modèles sont chargés UNE FOIS et mis en cache.
|
| 121 |
+
Première requête: 2-5 min
|
| 122 |
+
Requêtes suivantes: 1-10 sec
|
| 123 |
+
|
| 124 |
+
✅ GPU/CPU:
|
| 125 |
+
Détection automatique du GPU.
|
| 126 |
+
Fallback CPU si GPU non disponible.
|
| 127 |
+
|
| 128 |
+
✅ Langues TTS disponibles:
|
| 129 |
+
- beh (Biali) 🇧🇯
|
| 130 |
+
- bba (Baatombu) 🇧🇯
|
| 131 |
+
- ddn (Dendi) 🇧🇯
|
| 132 |
+
- ewe (Éwé) 🇬🇭
|
| 133 |
+
- gej (Mina) 🇧🇯
|
| 134 |
+
- tbz (Ditammari) 🇧🇯
|
| 135 |
+
- yor (Yoruba) 🇳🇬
|
| 136 |
+
- eng (English) 🇬🇧
|
| 137 |
+
|
| 138 |
+
✅ Dépannage:
|
| 139 |
+
- Voir README.md section "Troubleshooting"
|
| 140 |
+
- Voir GRADIO_DEPLOY.md section "Troubleshooting"
|
| 141 |
+
|
| 142 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 143 |
+
|
| 144 |
+
📚 STRUCTURE DE L'APP GRADIO
|
| 145 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 146 |
+
|
| 147 |
+
app.py contient:
|
| 148 |
+
|
| 149 |
+
1️⃣ Imports & Configuration
|
| 150 |
+
- Modèles: ASR (mms-1b-all) + TTS (mms-tts-*)
|
| 151 |
+
- Config: SAMPLE_RATE, MAX_AUDIO_LENGTH, etc.
|
| 152 |
+
|
| 153 |
+
2️⃣ Functions principales
|
| 154 |
+
- load_asr_model() → Charge le modèle ASR
|
| 155 |
+
- load_tts_model() → Charge les modèles TTS
|
| 156 |
+
- transcribe_audio() → Transcription audio
|
| 157 |
+
- synthesize_speech() → Génération audio
|
| 158 |
+
|
| 159 |
+
3️⃣ Interface Gradio (3 onglets)
|
| 160 |
+
- ASR Tab: Enregistre/upload + transcris
|
| 161 |
+
- TTS Tab: Écrit texte + génère audio
|
| 162 |
+
- About Tab: Infos + documentation
|
| 163 |
+
|
| 164 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 165 |
+
|
| 166 |
+
🔗 RESSOURCES
|
| 167 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 168 |
+
|
| 169 |
+
📚 Documentation:
|
| 170 |
+
• Meta MMS Paper: https://arxiv.org/abs/2305.13516
|
| 171 |
+
• facebook/mms-1b-all: https://huggingface.co/facebook/mms-1b-all
|
| 172 |
+
• facebook/mms-tts: https://huggingface.co/facebook/mms-tts
|
| 173 |
+
• Gradio Docs: https://www.gradio.app/
|
| 174 |
+
• HF Spaces: https://huggingface.co/spaces
|
| 175 |
+
|
| 176 |
+
🛠️ Outils:
|
| 177 |
+
• Transformers: https://huggingface.co/docs/transformers
|
| 178 |
+
• PyTorch: https://pytorch.org/
|
| 179 |
+
• Librosa: https://librosa.org/
|
| 180 |
+
|
| 181 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 182 |
+
|
| 183 |
+
⚖️ LICENCE
|
| 184 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 185 |
+
|
| 186 |
+
CC-BY-NC-4.0 (comme les modèles Meta MMS)
|
| 187 |
+
|
| 188 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 189 |
+
|
| 190 |
+
✨ À bientôt! 🎙️
|
| 191 |
+
|
| 192 |
+
Pour commencer: python app.py
|
| 193 |
+
Pour déployer: Voir GRADIO_DEPLOY.md
|
| 194 |
+
|
| 195 |
+
Bon codage! 🚀
|
| 196 |
+
""")
|
PROJECT_SUMMARY.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
📊 Résumé complet du projet MMS ASR/TTS API
|
| 3 |
+
|
| 4 |
+
Ce projet fournit une API Flask pour:
|
| 5 |
+
- ASR (Automatic Speech Recognition): Audio → Texte avec support 100+ langues
|
| 6 |
+
- TTS (Text-to-Speech): Texte → Audio pour 8 langues
|
| 7 |
+
|
| 8 |
+
Utilise les modèles Meta MMS:
|
| 9 |
+
- facebook/mms-1b-all (ASR, 964M params)
|
| 10 |
+
- facebook/mms-tts-{lang} (TTS, 8 langues)
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import json
|
| 14 |
+
|
| 15 |
+
PROJECT_STRUCTURE = {
|
| 16 |
+
"docs": {
|
| 17 |
+
"README.md": "Documentation générale du projet",
|
| 18 |
+
"ARCHITECTURE.md": "Architecture technique détaillée",
|
| 19 |
+
"DEPLOYMENT.md": "Guide de déploiement sur Hugging Face Spaces",
|
| 20 |
+
},
|
| 21 |
+
"source_code": {
|
| 22 |
+
"app.py": "API Flask v1 - Version stable",
|
| 23 |
+
"app_v2.py": "API Flask v2 - Version optimisée (RECOMMANDÉE) ⭐",
|
| 24 |
+
"client.py": "Client Python pour tester l'API",
|
| 25 |
+
"examples.py": "Exemples d'utilisation des endpoints",
|
| 26 |
+
},
|
| 27 |
+
"testing": {
|
| 28 |
+
"test_api.py": "Tests unitaires avec pytest",
|
| 29 |
+
"setup_project.py": "Script de vérification et setup du projet",
|
| 30 |
+
},
|
| 31 |
+
"deployment": {
|
| 32 |
+
"requirements.txt": "Dépendances production",
|
| 33 |
+
"requirements-dev.txt": "Dépendances développement (tests, linting)",
|
| 34 |
+
"Dockerfile": "Conteneur Docker pour déploiement",
|
| 35 |
+
"docker-compose.yml": "Orchestration Docker (GPU support)",
|
| 36 |
+
".gitignore": "Fichiers à ignorer par Git",
|
| 37 |
+
},
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
QUICK_START = """
|
| 41 |
+
🚀 DÉMARRAGE RAPIDE
|
| 42 |
+
═══════════════════════════════════════════════════════════════
|
| 43 |
+
|
| 44 |
+
1️⃣ Installation:
|
| 45 |
+
cd /home/ronaldo/Bureau/test
|
| 46 |
+
pip install -r requirements.txt
|
| 47 |
+
|
| 48 |
+
2️⃣ Lancer l'API:
|
| 49 |
+
python app_v2.py
|
| 50 |
+
# API disponible sur http://localhost:7860
|
| 51 |
+
|
| 52 |
+
3️⃣ Tester dans un autre terminal:
|
| 53 |
+
python examples.py
|
| 54 |
+
|
| 55 |
+
4️⃣ Tester avec curl:
|
| 56 |
+
# ASR - Convertir audio en texte
|
| 57 |
+
curl -X POST -F "audio=@audio.wav" \\
|
| 58 |
+
http://localhost:7860/asr
|
| 59 |
+
|
| 60 |
+
# TTS - Convertir texte en audio
|
| 61 |
+
curl -X POST -H "Content-Type: application/json" \\
|
| 62 |
+
-d '{"text":"Hello","language":"eng"}' \\
|
| 63 |
+
http://localhost:7860/tts --output hello.wav
|
| 64 |
+
|
| 65 |
+
5️⃣ Déployer sur Hugging Face:
|
| 66 |
+
Voir DEPLOYMENT.md pour les instructions
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
FEATURES = {
|
| 70 |
+
"ASR": {
|
| 71 |
+
"model": "facebook/mms-1b-all",
|
| 72 |
+
"languages": "100+ langues (ISO 639-3)",
|
| 73 |
+
"input": "Audio (WAV, MP3, etc.)",
|
| 74 |
+
"output": "Texte transcrit",
|
| 75 |
+
"endpoint": "POST /asr",
|
| 76 |
+
},
|
| 77 |
+
"TTS": {
|
| 78 |
+
"model": "facebook/mms-tts-* (VITS)",
|
| 79 |
+
"languages": 8,
|
| 80 |
+
"supported": ["beh", "bba", "ddn", "ewe", "gej", "tbz", "yor", "eng"],
|
| 81 |
+
"input": "Texte (max 1000 chars)",
|
| 82 |
+
"output": "Audio WAV (22050 Hz)",
|
| 83 |
+
"endpoint": "POST /tts",
|
| 84 |
+
},
|
| 85 |
+
"General": {
|
| 86 |
+
"framework": "Flask",
|
| 87 |
+
"gpu_support": "CUDA auto-detect",
|
| 88 |
+
"cache": "In-memory model cache (thread-safe)",
|
| 89 |
+
"cors": "Enabled",
|
| 90 |
+
"max_audio": "30 secondes",
|
| 91 |
+
"max_text": "1000 caractères",
|
| 92 |
+
},
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
ENDPOINTS = {
|
| 96 |
+
"GET /": "Documentation de l'API",
|
| 97 |
+
"GET /health": "État du service + device info",
|
| 98 |
+
"GET /supported-languages": "Langues supportées",
|
| 99 |
+
"GET /models-info": "Informations détaillées sur les modèles",
|
| 100 |
+
"POST /asr": "Audio → Texte (ASR)",
|
| 101 |
+
"POST /tts": "Texte → Audio (TTS)",
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
def print_section(title, content=None):
|
| 105 |
+
"""Affiche une section formatée"""
|
| 106 |
+
print(f"\n{'═' * 70}")
|
| 107 |
+
print(f" {title}")
|
| 108 |
+
print(f"{'═' * 70}")
|
| 109 |
+
if content:
|
| 110 |
+
print(content)
|
| 111 |
+
|
| 112 |
+
def print_project_structure():
|
| 113 |
+
"""Affiche la structure du projet"""
|
| 114 |
+
for category, files in PROJECT_STRUCTURE.items():
|
| 115 |
+
|
| 116 |
+
print(f"\n📂 {category.upper()}:")
|
| 117 |
+
for filename, description in files.items():
|
| 118 |
+
print(f" 📄 {filename:25} → {description}")
|
| 119 |
+
|
| 120 |
+
def print_features():
|
| 121 |
+
"""Affiche les features"""
|
| 122 |
+
print(f"\n🎯 FEATURES:")
|
| 123 |
+
for feature, details in FEATURES.items():
|
| 124 |
+
print(f"\n {feature}:")
|
| 125 |
+
for key, value in details.items():
|
| 126 |
+
if isinstance(value, list):
|
| 127 |
+
value = ", ".join(value)
|
| 128 |
+
print(f" • {key}: {value}")
|
| 129 |
+
|
| 130 |
+
def print_endpoints():
|
| 131 |
+
"""Affiche les endpoints"""
|
| 132 |
+
print(f"\n📡 API ENDPOINTS:")
|
| 133 |
+
for endpoint, description in ENDPOINTS.items():
|
| 134 |
+
print(f" {endpoint:25} → {description}")
|
| 135 |
+
|
| 136 |
+
def main():
|
| 137 |
+
"""Fonction principale"""
|
| 138 |
+
print("""
|
| 139 |
+
╔════════════════════════════════════════════════════════════════╗
|
| 140 |
+
║ 🎙️ Meta MMS ASR/TTS API 🔊 ║
|
| 141 |
+
║ Reconnaissance vocale + Synthèse vocale ║
|
| 142 |
+
║ Multilingue & GPU-ready ║
|
| 143 |
+
╚═════════════════════════════��══════════════════════════════════╝
|
| 144 |
+
""")
|
| 145 |
+
|
| 146 |
+
print_section("📋 STRUCTURE DU PROJET")
|
| 147 |
+
print_project_structure()
|
| 148 |
+
|
| 149 |
+
print_section("🎯 FEATURES")
|
| 150 |
+
print_features()
|
| 151 |
+
|
| 152 |
+
print_section("📡 API ENDPOINTS")
|
| 153 |
+
print_endpoints()
|
| 154 |
+
|
| 155 |
+
print_section("🚀 DÉMARRAGE RAPIDE")
|
| 156 |
+
print(QUICK_START)
|
| 157 |
+
|
| 158 |
+
print_section("💡 INFORMATIONS SUPPLÉMENTAIRES")
|
| 159 |
+
print("""
|
| 160 |
+
✅ Recommandations:
|
| 161 |
+
• Utiliser app_v2.py (v1 is deprecated)
|
| 162 |
+
• Installer requirements.txt pour prod
|
| 163 |
+
• Voir examples.py pour voir comment utiliser l'API
|
| 164 |
+
• Pour dev: pip install -r requirements-dev.txt
|
| 165 |
+
|
| 166 |
+
📚 Documentation complète:
|
| 167 |
+
• README.md - Documentation générale
|
| 168 |
+
• ARCHITECTURE.md - Architecture technique
|
| 169 |
+
• DEPLOYMENT.md - Déploiement sur HF Spaces
|
| 170 |
+
|
| 171 |
+
🧪 Tests:
|
| 172 |
+
• pytest test_api.py -v
|
| 173 |
+
|
| 174 |
+
📊 Déploiement:
|
| 175 |
+
• Local: python app_v2.py
|
| 176 |
+
• Docker: docker build -t mms . && docker run -p 7860:7860 mms
|
| 177 |
+
• HF Spaces: Voir DEPLOYMENT.md
|
| 178 |
+
|
| 179 |
+
🌐 URLs importantes:
|
| 180 |
+
• facebook/mms-1b-all: https://huggingface.co/facebook/mms-1b-all
|
| 181 |
+
• facebook/mms-tts: https://huggingface.co/facebook/mms-tts
|
| 182 |
+
• Meta MMS Paper: https://arxiv.org/abs/2305.13516
|
| 183 |
+
""")
|
| 184 |
+
|
| 185 |
+
print_section("🎓 PROCHAINES ÉTAPES")
|
| 186 |
+
print("""
|
| 187 |
+
1. ✅ Installer les dépendances
|
| 188 |
+
2. ✅ Lancer l'API localement
|
| 189 |
+
3. ✅ Tester les endpoints
|
| 190 |
+
4. ✅ Déployer sur Hugging Face Spaces
|
| 191 |
+
5. ✅ Ajouter des features (streaming, batch, etc.)
|
| 192 |
+
|
| 193 |
+
Bon codage! 🚀
|
| 194 |
+
""")
|
| 195 |
+
|
| 196 |
+
if __name__ == "__main__":
|
| 197 |
+
main()
|
README.md
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🎙️ Meta MMS Speech AI - Gradio Interface
|
| 2 |
+
|
| 3 |
+
Une interface **Gradio** simple et élégante pour:
|
| 4 |
+
- **ASR** (Automatic Speech Recognition): Convertir l'audio en texte (100+ langues)
|
| 5 |
+
- **TTS** (Text-to-Speech): Convertir le texte en audio (8 langues)
|
| 6 |
+
|
| 7 |
+
Utilise les modèles Meta MMS:
|
| 8 |
+
- `facebook/mms-1b-all` pour l'ASR
|
| 9 |
+
- `facebook/mms-tts-*` pour le TTS
|
| 10 |
+
|
| 11 |
+
## 🌐 Déploiement en ligne
|
| 12 |
+
|
| 13 |
+
**Accès direct:** [Hugging Face Spaces](https://huggingface.co/spaces)
|
| 14 |
+
|
| 15 |
+
## 🚀 Installation locale
|
| 16 |
+
|
| 17 |
+
### Prérequis
|
| 18 |
+
- Python 3.10+
|
| 19 |
+
- (Optionnel) CUDA 11.8+ pour GPU
|
| 20 |
+
|
| 21 |
+
### Installation
|
| 22 |
+
|
| 23 |
+
```bash
|
| 24 |
+
# Clone ou télécharge le repo
|
| 25 |
+
git clone <url>
|
| 26 |
+
cd mms-speech
|
| 27 |
+
|
| 28 |
+
# Crée un environnement virtuel
|
| 29 |
+
python -m venv venv
|
| 30 |
+
source venv/bin/activate # Windows: venv\Scripts\activate
|
| 31 |
+
|
| 32 |
+
# Installe les dépendances
|
| 33 |
+
pip install -r requirements.txt
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
### Lancement
|
| 37 |
+
|
| 38 |
+
```bash
|
| 39 |
+
python app.py
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
L'interface Gradio s'ouvrira automatiquement sur `http://localhost:7860`
|
| 43 |
+
|
| 44 |
+
## 📱 Interface utilisateur
|
| 45 |
+
|
| 46 |
+
### Onglet 1: 🔊 ASR (Audio → Texte)
|
| 47 |
+
- Enregistre ou upload un fichier audio
|
| 48 |
+
- Choisis la langue
|
| 49 |
+
- Clique "Transcrire"
|
| 50 |
+
- Récupère le texte transcrit
|
| 51 |
+
|
| 52 |
+
**Langues supportées:**
|
| 53 |
+
- English, French, Spanish, German, Portuguese, Arabic, Hindi, Chinese, Japanese, et 90+ autres
|
| 54 |
+
|
| 55 |
+
### Onglet 2: 📢 TTS (Texte → Audio)
|
| 56 |
+
- Entre du texte
|
| 57 |
+
- Choisis la langue
|
| 58 |
+
- Clique "Générer l'audio"
|
| 59 |
+
- Écoute ou télécharge l'audio généré
|
| 60 |
+
|
| 61 |
+
**Langues TTS:**
|
| 62 |
+
- 🇧🇯 Biali (beh)
|
| 63 |
+
- 🇧🇯 Baatombu (bba)
|
| 64 |
+
- 🇧🇯 Dendi (ddn)
|
| 65 |
+
- 🇬🇭 Éwé (ewe)
|
| 66 |
+
- 🇧🇯 Mina (gej)
|
| 67 |
+
- 🇧🇯 Ditammari (tbz)
|
| 68 |
+
- 🇳🇬 Yoruba (yor)
|
| 69 |
+
- 🇬🇧 English (eng)
|
| 70 |
+
|
| 71 |
+
### Onglet 3: ℹ️ À propos
|
| 72 |
+
Informations sur les modèles et ressources
|
| 73 |
+
|
| 74 |
+
## 📊 Modèles utilisés
|
| 75 |
+
|
| 76 |
+
### ASR: facebook/mms-1b-all
|
| 77 |
+
- **Architecture:** wav2vec2
|
| 78 |
+
- **Taille:** 964.8M parameters
|
| 79 |
+
- **Langues:** 100+ (ISO 639-3)
|
| 80 |
+
- **Input:** Audio 16kHz mono
|
| 81 |
+
- **Output:** Texte transcrit
|
| 82 |
+
|
| 83 |
+
### TTS: facebook/mms-tts-{lang}
|
| 84 |
+
- **Architecture:** VITS
|
| 85 |
+
- **Taille:** ~5-10M parameters par langue
|
| 86 |
+
- **Langues:** 8
|
| 87 |
+
- **Input:** Texte (max 1000 chars)
|
| 88 |
+
- **Output:** Audio WAV 22050Hz
|
| 89 |
+
|
| 90 |
+
## 💡 Utilisation
|
| 91 |
+
|
| 92 |
+
### Exemple ASR
|
| 93 |
+
1. Clique sur le micro ou "Upload file"
|
| 94 |
+
2. Enregistre ou uploader ton audio
|
| 95 |
+
3. Sélectionne la langue
|
| 96 |
+
4. Clique "Transcrire"
|
| 97 |
+
|
| 98 |
+
### Exemple TTS
|
| 99 |
+
1. Écris "Àbọ̀ wa" (hello en Yoruba)
|
| 100 |
+
2. Sélectionne "Yoruba (yor)"
|
| 101 |
+
3. Clique "Générer l'audio"
|
| 102 |
+
4. Écoute le résultat!
|
| 103 |
+
|
| 104 |
+
## 🔧 Développement
|
| 105 |
+
|
| 106 |
+
### Installer les dépendances dev
|
| 107 |
+
```bash
|
| 108 |
+
pip install -r requirements-dev.txt
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
### Tests
|
| 112 |
+
```bash
|
| 113 |
+
pytest test_api.py -v
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
### Linting
|
| 117 |
+
```bash
|
| 118 |
+
black app.py
|
| 119 |
+
flake8 app.py
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
## 🌐 Déploiement sur Hugging Face Spaces
|
| 123 |
+
|
| 124 |
+
### 1. Créer un Space
|
| 125 |
+
- Va sur https://huggingface.co/new-space
|
| 126 |
+
- Choisis "Gradio" comme SDK
|
| 127 |
+
- Sélectionne "Blank" comme template
|
| 128 |
+
|
| 129 |
+
### 2. Cloner le Space
|
| 130 |
+
```bash
|
| 131 |
+
git clone https://huggingface.co/spaces/<username>/<space-name>
|
| 132 |
+
cd <space-name>
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
### 3. Copier les fichiers
|
| 136 |
+
```bash
|
| 137 |
+
cp app.py requirements.txt README.md .
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
### 4. Push vers Hugging Face
|
| 141 |
+
```bash
|
| 142 |
+
git add .
|
| 143 |
+
git commit -m "Add MMS Speech AI"
|
| 144 |
+
git push
|
| 145 |
+
```
|
| 146 |
+
|
| 147 |
+
### 5. Attendre le déploiement
|
| 148 |
+
- HF va installer les dépendances (2-5 min)
|
| 149 |
+
- Télécharger les modèles (5-15 min)
|
| 150 |
+
- Lancer l'app automatiquement
|
| 151 |
+
|
| 152 |
+
**URL du Space:** `https://huggingface.co/spaces/<username>/<space-name>`
|
| 153 |
+
|
| 154 |
+
## ⚙️ Configuration
|
| 155 |
+
|
| 156 |
+
```python
|
| 157 |
+
SAMPLE_RATE = 16000 # Taux ASR
|
| 158 |
+
MAX_AUDIO_LENGTH = 30 # Max 30 sec
|
| 159 |
+
MAX_TEXT_LENGTH = 1000 # Max 1000 chars
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
## 📈 Performance
|
| 163 |
+
|
| 164 |
+
| Action | Temps |
|
| 165 |
+
|--------|-------|
|
| 166 |
+
| Première requête ASR | 2-5 min (chargement) |
|
| 167 |
+
| Requêtes ASR suivantes | 1-10 sec |
|
| 168 |
+
| Première requête TTS | 30-60 sec (chargement) |
|
| 169 |
+
| Requêtes TTS suivantes | 1-5 sec |
|
| 170 |
+
|
| 171 |
+
## 🐛 Troubleshooting
|
| 172 |
+
|
| 173 |
+
**Q: Les modèles prennent trop de temps à charger**
|
| 174 |
+
A: C'est normal! Les modèles sont volumineux. La première requête charge le modèle (2-5 min), puis ça devient rapide.
|
| 175 |
+
|
| 176 |
+
**Q: "CUDA out of memory"**
|
| 177 |
+
A: Réduis `MAX_AUDIO_LENGTH` ou `MAX_TEXT_LENGTH`, ou utilise CPU.
|
| 178 |
+
|
| 179 |
+
**Q: Je ne vois pas mon audio après avoir enregistré**
|
| 180 |
+
A: Attends que le traitement soit terminé. Gradio affichera le résultat automatiquement.
|
| 181 |
+
|
| 182 |
+
**Q: L'audio généré est muet**
|
| 183 |
+
A: Vérifie que tu as écrit du texte et choisi une langue correcte.
|
| 184 |
+
|
| 185 |
+
## 📚 Ressources
|
| 186 |
+
|
| 187 |
+
- [Meta MMS Paper](https://arxiv.org/abs/2305.13516)
|
| 188 |
+
- [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all)
|
| 189 |
+
- [facebook/mms-tts](https://huggingface.co/facebook/mms-tts)
|
| 190 |
+
- [Gradio Documentation](https://www.gradio.app/)
|
| 191 |
+
- [Hugging Face Spaces](https://huggingface.co/spaces)
|
| 192 |
+
|
| 193 |
+
## ⚖️ Licence
|
| 194 |
+
|
| 195 |
+
CC-BY-NC-4.0 (même que les modèles Meta MMS)
|
| 196 |
+
|
| 197 |
+
## 👨💻 Auteur
|
| 198 |
+
|
| 199 |
+
Créé avec ❤️ pour explorer la synthèse et reconnaissance vocale multilingue.
|
| 200 |
+
|
| 201 |
+
---
|
| 202 |
+
|
| 203 |
+
**Happy speaking! 🎙️**
|
| 204 |
+
|
app.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import AutoModelForCTC, AutoProcessor, VitsModel, AutoTokenizer
|
| 4 |
+
import librosa
|
| 5 |
+
import numpy as np
|
| 6 |
+
import io
|
| 7 |
+
import soundfile as sf
|
| 8 |
+
import logging
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
# Configuration
|
| 14 |
+
SAMPLE_RATE = 16000
|
| 15 |
+
MAX_AUDIO_LENGTH = 30
|
| 16 |
+
|
| 17 |
+
# Mapping des langues TTS
|
| 18 |
+
LANGUAGE_MAPPING = {
|
| 19 |
+
"Biali (beh)": "facebook/mms-tts-beh",
|
| 20 |
+
"Baatombu (bba)": "facebook/mms-tts-bba",
|
| 21 |
+
"Dendi (ddn)": "facebook/mms-tts-ddn",
|
| 22 |
+
"Éwé (ewe)": "facebook/mms-tts-ewe",
|
| 23 |
+
"Mina (gej)": "facebook/mms-tts-gej",
|
| 24 |
+
"Ditammari (tbz)": "facebook/mms-tts-tbz",
|
| 25 |
+
"Yoruba (yor)": "facebook/mms-tts-yor",
|
| 26 |
+
"English (eng)": "facebook/mms-tts-eng",
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
# Cache des modèles
|
| 30 |
+
models_cache = {}
|
| 31 |
+
|
| 32 |
+
def get_device():
|
| 33 |
+
"""Retourne le device disponible"""
|
| 34 |
+
return "cuda" if torch.cuda.is_available() else "cpu"
|
| 35 |
+
|
| 36 |
+
def load_asr_model():
|
| 37 |
+
"""Charge le modèle ASR"""
|
| 38 |
+
if "asr" not in models_cache:
|
| 39 |
+
device = get_device()
|
| 40 |
+
logger.info("⏳ Chargement du modèle ASR...")
|
| 41 |
+
processor = AutoProcessor.from_pretrained("facebook/mms-1b-all")
|
| 42 |
+
model = AutoModelForCTC.from_pretrained("facebook/mms-1b-all").to(device)
|
| 43 |
+
model.eval()
|
| 44 |
+
models_cache["asr"] = {"model": model, "processor": processor}
|
| 45 |
+
logger.info("✅ Modèle ASR chargé")
|
| 46 |
+
return models_cache["asr"]["model"], models_cache["asr"]["processor"]
|
| 47 |
+
|
| 48 |
+
def load_tts_model(language_name):
|
| 49 |
+
"""Charge le modèle TTS pour une langue"""
|
| 50 |
+
if language_name not in models_cache:
|
| 51 |
+
device = get_device()
|
| 52 |
+
model_id = LANGUAGE_MAPPING.get(language_name)
|
| 53 |
+
if not model_id:
|
| 54 |
+
raise ValueError(f"Langue non supportée: {language_name}")
|
| 55 |
+
|
| 56 |
+
logger.info(f"⏳ Chargement du modèle TTS {language_name}...")
|
| 57 |
+
model = VitsModel.from_pretrained(model_id).to(device)
|
| 58 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 59 |
+
model.eval()
|
| 60 |
+
models_cache[language_name] = {"model": model, "tokenizer": tokenizer}
|
| 61 |
+
logger.info(f"✅ Modèle TTS {language_name} chargé")
|
| 62 |
+
return models_cache[language_name]["model"], models_cache[language_name]["tokenizer"]
|
| 63 |
+
|
| 64 |
+
def process_audio(audio_data):
|
| 65 |
+
"""Traite l'audio"""
|
| 66 |
+
try:
|
| 67 |
+
if isinstance(audio_data, tuple):
|
| 68 |
+
# Gradio retourne (sample_rate, audio_array)
|
| 69 |
+
sr, audio = audio_data
|
| 70 |
+
else:
|
| 71 |
+
sr = SAMPLE_RATE
|
| 72 |
+
audio = audio_data
|
| 73 |
+
|
| 74 |
+
# Convertit en float32 si nécessaire
|
| 75 |
+
audio = np.array(audio, dtype=np.float32)
|
| 76 |
+
|
| 77 |
+
# Mono
|
| 78 |
+
if len(audio.shape) > 1:
|
| 79 |
+
audio = np.mean(audio, axis=1)
|
| 80 |
+
|
| 81 |
+
# Rééchantillonne
|
| 82 |
+
if sr != SAMPLE_RATE:
|
| 83 |
+
audio = librosa.resample(audio, orig_sr=sr, target_sr=SAMPLE_RATE)
|
| 84 |
+
|
| 85 |
+
# Normalise
|
| 86 |
+
if np.max(np.abs(audio)) > 0:
|
| 87 |
+
audio = audio / np.max(np.abs(audio))
|
| 88 |
+
|
| 89 |
+
# Tronque
|
| 90 |
+
max_samples = MAX_AUDIO_LENGTH * SAMPLE_RATE
|
| 91 |
+
if len(audio) > max_samples:
|
| 92 |
+
audio = audio[:max_samples]
|
| 93 |
+
|
| 94 |
+
return audio
|
| 95 |
+
except Exception as e:
|
| 96 |
+
logger.error(f"Erreur traitement audio: {e}")
|
| 97 |
+
raise
|
| 98 |
+
|
| 99 |
+
def transcribe_audio(audio, language):
|
| 100 |
+
"""Transcrit l'audio en texte (ASR)"""
|
| 101 |
+
if audio is None:
|
| 102 |
+
return "❌ Veuillez enregistrer ou uploader un fichier audio"
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
audio_processed = process_audio(audio)
|
| 106 |
+
model, processor = load_asr_model()
|
| 107 |
+
processor.current_lang = language
|
| 108 |
+
|
| 109 |
+
device = get_device()
|
| 110 |
+
with torch.no_grad():
|
| 111 |
+
inputs = processor(audio_processed, sampling_rate=SAMPLE_RATE, return_tensors="pt").to(device)
|
| 112 |
+
outputs = model(**inputs)
|
| 113 |
+
ids = torch.argmax(outputs.logits, dim=-1)[0]
|
| 114 |
+
transcription = processor.decode(ids)
|
| 115 |
+
|
| 116 |
+
return f"✅ Transcription:\n{transcription}"
|
| 117 |
+
except Exception as e:
|
| 118 |
+
logger.error(f"Erreur ASR: {e}")
|
| 119 |
+
return f"❌ Erreur: {str(e)}"
|
| 120 |
+
|
| 121 |
+
def synthesize_speech(text, language):
|
| 122 |
+
"""Synthétise le texte en audio (TTS)"""
|
| 123 |
+
if not text or not text.strip():
|
| 124 |
+
return None, "❌ Veuillez entrer du texte"
|
| 125 |
+
|
| 126 |
+
try:
|
| 127 |
+
text = text.strip()[:1000] # Limite à 1000 chars
|
| 128 |
+
model, tokenizer = load_tts_model(language)
|
| 129 |
+
|
| 130 |
+
device = get_device()
|
| 131 |
+
with torch.no_grad():
|
| 132 |
+
inputs = tokenizer(text, return_tensors="pt").to(device)
|
| 133 |
+
outputs = model(**inputs)
|
| 134 |
+
waveform = outputs.waveform.cpu().numpy().flatten()
|
| 135 |
+
|
| 136 |
+
# Convertit en (sample_rate, audio_array) pour Gradio
|
| 137 |
+
return (22050, waveform), f"✅ Audio généré avec succès!"
|
| 138 |
+
|
| 139 |
+
except Exception as e:
|
| 140 |
+
logger.error(f"Erreur TTS: {e}")
|
| 141 |
+
return None, f"❌ Erreur: {str(e)}"
|
| 142 |
+
|
| 143 |
+
# ============= INTERFACE GRADIO =============
|
| 144 |
+
|
| 145 |
+
with gr.Blocks(title="🎙️ MMS ASR/TTS - Speech AI", theme=gr.themes.Soft()) as demo:
|
| 146 |
+
gr.HTML("""
|
| 147 |
+
<div style="text-align: center;">
|
| 148 |
+
<h1>🎙️ Meta MMS Speech AI</h1>
|
| 149 |
+
<p style="font-size: 16px; color: #666;">
|
| 150 |
+
Reconnaissance vocale (ASR) + Synthèse vocale (TTS) multilingue
|
| 151 |
+
</p>
|
| 152 |
+
<p style="font-size: 14px; color: #999;">
|
| 153 |
+
Utilise les modèles <strong>facebook/mms-1b-all</strong> et <strong>facebook/mms-tts</strong>
|
| 154 |
+
</p>
|
| 155 |
+
</div>
|
| 156 |
+
""")
|
| 157 |
+
|
| 158 |
+
with gr.Tabs():
|
| 159 |
+
# ============= TAB 1: ASR =============
|
| 160 |
+
with gr.TabItem("🔊 ASR (Audio → Texte)", id="asr"):
|
| 161 |
+
gr.HTML("<h2>Reconnaissance Vocale Multilingue</h2>")
|
| 162 |
+
gr.HTML("<p>Enregistre ou uploader un fichier audio pour obtenir la transcription.</p>")
|
| 163 |
+
|
| 164 |
+
with gr.Row():
|
| 165 |
+
with gr.Column():
|
| 166 |
+
audio_input = gr.Audio(
|
| 167 |
+
label="📁 Fichier audio",
|
| 168 |
+
type="numpy",
|
| 169 |
+
sources=["upload", "microphone"]
|
| 170 |
+
)
|
| 171 |
+
language_asr = gr.Dropdown(
|
| 172 |
+
choices=[
|
| 173 |
+
"English (eng)",
|
| 174 |
+
"French (fra)",
|
| 175 |
+
"Spanish (spa)",
|
| 176 |
+
"German (deu)",
|
| 177 |
+
"Portuguese (por)",
|
| 178 |
+
"Arabic (ara)",
|
| 179 |
+
"Hindi (hin)",
|
| 180 |
+
"Chinese (zho)",
|
| 181 |
+
"Japanese (jpn)",
|
| 182 |
+
],
|
| 183 |
+
value="English (eng)",
|
| 184 |
+
label="🌐 Langue"
|
| 185 |
+
)
|
| 186 |
+
btn_asr = gr.Button("🎯 Transcrire", variant="primary", size="lg")
|
| 187 |
+
|
| 188 |
+
with gr.Column():
|
| 189 |
+
output_asr = gr.Textbox(
|
| 190 |
+
label="📝 Transcription",
|
| 191 |
+
lines=6,
|
| 192 |
+
interactive=False
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
btn_asr.click(
|
| 196 |
+
fn=transcribe_audio,
|
| 197 |
+
inputs=[audio_input, language_asr],
|
| 198 |
+
outputs=output_asr
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
# ============= TAB 2: TTS =============
|
| 202 |
+
with gr.TabItem("📢 TTS (Texte → Audio)", id="tts"):
|
| 203 |
+
gr.HTML("<h2>Synthèse Vocale</h2>")
|
| 204 |
+
gr.HTML("<p>Entre du texte et écoute la synthèse vocale dans la langue choisie.</p>")
|
| 205 |
+
|
| 206 |
+
with gr.Row():
|
| 207 |
+
with gr.Column():
|
| 208 |
+
text_input = gr.Textbox(
|
| 209 |
+
label="✍️ Texte à convertir",
|
| 210 |
+
placeholder="Écris du texte ici...",
|
| 211 |
+
lines=4
|
| 212 |
+
)
|
| 213 |
+
language_tts = gr.Dropdown(
|
| 214 |
+
choices=list(LANGUAGE_MAPPING.keys()),
|
| 215 |
+
value="English (eng)",
|
| 216 |
+
label="🌐 Langue"
|
| 217 |
+
)
|
| 218 |
+
btn_tts = gr.Button("🔊 Générer l'audio", variant="primary", size="lg")
|
| 219 |
+
info_tts = gr.Textbox(
|
| 220 |
+
label="📊 Info",
|
| 221 |
+
interactive=False,
|
| 222 |
+
value="Clique sur 'Générer l'audio' pour commencer"
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
with gr.Column():
|
| 226 |
+
audio_output = gr.Audio(
|
| 227 |
+
label="🎵 Audio généré",
|
| 228 |
+
type="numpy"
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
btn_tts.click(
|
| 232 |
+
fn=synthesize_speech,
|
| 233 |
+
inputs=[text_input, language_tts],
|
| 234 |
+
outputs=[audio_output, info_tts]
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
# Exemples
|
| 238 |
+
gr.Examples(
|
| 239 |
+
examples=[
|
| 240 |
+
["Hello world", "English (eng)"],
|
| 241 |
+
["Àbọ̀ wa", "Yoruba (yor)"],
|
| 242 |
+
["Bonjour", "English (eng)"],
|
| 243 |
+
],
|
| 244 |
+
inputs=[text_input, language_tts],
|
| 245 |
+
label="💡 Exemples"
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
# ============= TAB 3: INFOS =============
|
| 249 |
+
with gr.TabItem("ℹ️ À propos", id="about"):
|
| 250 |
+
gr.HTML("""
|
| 251 |
+
<h2>À propos de cette API</h2>
|
| 252 |
+
|
| 253 |
+
<h3>🎙️ ASR (Automatic Speech Recognition)</h3>
|
| 254 |
+
<ul>
|
| 255 |
+
<li><strong>Modèle:</strong> facebook/mms-1b-all (964M params)</li>
|
| 256 |
+
<li><strong>Langues:</strong> 100+ langues (ISO 639-3)</li>
|
| 257 |
+
<li><strong>Architecture:</strong> wav2vec2</li>
|
| 258 |
+
<li><strong>Taux d'échantillonnage:</strong> 16 kHz</li>
|
| 259 |
+
<li><strong>Limite:</strong> 30 secondes d'audio</li>
|
| 260 |
+
</ul>
|
| 261 |
+
|
| 262 |
+
<h3>📢 TTS (Text-to-Speech)</h3>
|
| 263 |
+
<ul>
|
| 264 |
+
<li><strong>Modèle:</strong> facebook/mms-tts-* (VITS)</li>
|
| 265 |
+
<li><strong>Langues supportées:</strong> 8 langues</li>
|
| 266 |
+
<li><strong>Taux d'échantillonnage:</strong> 22050 Hz</li>
|
| 267 |
+
<li><strong>Limite:</strong> 1000 caractères</li>
|
| 268 |
+
</ul>
|
| 269 |
+
|
| 270 |
+
<h3>🌍 Langues TTS</h3>
|
| 271 |
+
<ul>
|
| 272 |
+
<li>🇧🇯 Biali (beh)</li>
|
| 273 |
+
<li>🇧🇯 Baatombu (bba)</li>
|
| 274 |
+
<li>🇧🇯 Dendi (ddn)</li>
|
| 275 |
+
<li>🇬🇭 Éwé (ewe)</li>
|
| 276 |
+
<li>🇧🇯 Mina (gej)</li>
|
| 277 |
+
<li>🇧🇯 Ditammari (tbz)</li>
|
| 278 |
+
<li>🇳🇬 Yoruba (yor)</li>
|
| 279 |
+
<li>🇬🇧 English (eng)</li>
|
| 280 |
+
</ul>
|
| 281 |
+
|
| 282 |
+
<h3>🚀 Déploiement</h3>
|
| 283 |
+
<p>Cette application est déployée sur <strong>Hugging Face Spaces</strong></p>
|
| 284 |
+
<p>Code source: <a href="https://huggingface.co/spaces" target="_blank">GitHub</a></p>
|
| 285 |
+
|
| 286 |
+
<h3>📚 Ressources</h3>
|
| 287 |
+
<ul>
|
| 288 |
+
<li><a href="https://arxiv.org/abs/2305.13516" target="_blank">Meta MMS Paper</a></li>
|
| 289 |
+
<li><a href="https://huggingface.co/facebook/mms-1b-all" target="_blank">facebook/mms-1b-all</a></li>
|
| 290 |
+
<li><a href="https://huggingface.co/facebook/mms-tts" target="_blank">facebook/mms-tts</a></li>
|
| 291 |
+
</ul>
|
| 292 |
+
|
| 293 |
+
<h3>⚖️ Licence</h3>
|
| 294 |
+
<p>CC-BY-NC-4.0 (comme les modèles Meta MMS)</p>
|
| 295 |
+
""")
|
| 296 |
+
|
| 297 |
+
# Footer
|
| 298 |
+
gr.HTML("""
|
| 299 |
+
<hr>
|
| 300 |
+
<div style="text-align: center; font-size: 12px; color: #999; margin-top: 20px;">
|
| 301 |
+
<p>🏠 Powered by <strong>Gradio</strong> + <strong>Hugging Face</strong> |
|
| 302 |
+
Device: <span id="device">Loading...</span></p>
|
| 303 |
+
</div>
|
| 304 |
+
<script>
|
| 305 |
+
document.getElementById('device').innerText = document.body.innerText.includes('cuda') ? '🚀 GPU' : '💻 CPU';
|
| 306 |
+
</script>
|
| 307 |
+
""")
|
| 308 |
+
|
| 309 |
+
if __name__ == "__main__":
|
| 310 |
+
logger.info(f"🚀 Démarrage de l'interface Gradio")
|
| 311 |
+
logger.info(f"📊 Device: {get_device()}")
|
| 312 |
+
demo.launch(share=True, debug=False)
|
app_config.yaml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
title: Meta MMS ASR/TTS API
|
| 2 |
+
description: API pour la reconnaissance vocale et synthèse vocale multilingue
|
| 3 |
+
sdk: docker
|
| 4 |
+
pinned: false
|
| 5 |
+
space_id: ronaldodev/mms-asr-tts
|
| 6 |
+
tags:
|
| 7 |
+
- ASR
|
| 8 |
+
- TTS
|
| 9 |
+
- Speech
|
| 10 |
+
- Audio
|
| 11 |
+
- MMS
|
| 12 |
+
- Multilingual
|
app_v2.py
ADDED
|
@@ -0,0 +1,326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Version améliorée de app.py avec optimisations de performance
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from flask import Flask, request, jsonify, send_file
|
| 6 |
+
from flask_cors import CORS
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoModelForCTC, AutoProcessor, VitsModel, AutoTokenizer
|
| 9 |
+
import librosa
|
| 10 |
+
import numpy as np
|
| 11 |
+
import io
|
| 12 |
+
import logging
|
| 13 |
+
import threading
|
| 14 |
+
import time
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
|
| 17 |
+
app = Flask(__name__)
|
| 18 |
+
CORS(app)
|
| 19 |
+
|
| 20 |
+
# Configuration des logs
|
| 21 |
+
logging.basicConfig(
|
| 22 |
+
level=logging.INFO,
|
| 23 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 24 |
+
)
|
| 25 |
+
logger = logging.getLogger(__name__)
|
| 26 |
+
|
| 27 |
+
# Configuration
|
| 28 |
+
SAMPLE_RATE = 16000
|
| 29 |
+
MAX_AUDIO_LENGTH = 30
|
| 30 |
+
MAX_TEXT_LENGTH = 1000
|
| 31 |
+
|
| 32 |
+
# Dictionnaire de mapping pour les langues TTS
|
| 33 |
+
LANGUAGE_MAPPING = {
|
| 34 |
+
"beh": "facebook/mms-tts-beh",
|
| 35 |
+
"bba": "facebook/mms-tts-bba",
|
| 36 |
+
"ddn": "facebook/mms-tts-ddn",
|
| 37 |
+
"ewe": "facebook/mms-tts-ewe",
|
| 38 |
+
"gej": "facebook/mms-tts-gej",
|
| 39 |
+
"tbz": "facebook/mms-tts-tbz",
|
| 40 |
+
"yor": "facebook/mms-tts-yor",
|
| 41 |
+
"eng": "facebook/mms-tts-eng",
|
| 42 |
+
"fra": "facebook/mms-tts-fra",
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
# Cache pour les modèles
|
| 46 |
+
models_cache = {}
|
| 47 |
+
cache_lock = threading.Lock()
|
| 48 |
+
|
| 49 |
+
# Métadonnées de l'API
|
| 50 |
+
API_METADATA = {
|
| 51 |
+
"name": "Meta MMS ASR/TTS API",
|
| 52 |
+
"version": "2.0",
|
| 53 |
+
"description": "Reconnaissance vocale et synthèse vocale multilingue",
|
| 54 |
+
"models": {
|
| 55 |
+
"asr": "facebook/mms-1b-all (964M parameters)",
|
| 56 |
+
"tts": f"{len(LANGUAGE_MAPPING)} langues supportées"
|
| 57 |
+
}
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
def get_device():
|
| 61 |
+
"""Retourne le device (GPU ou CPU)"""
|
| 62 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 63 |
+
if torch.cuda.is_available():
|
| 64 |
+
logger.info(f"GPU disponible: {torch.cuda.get_device_name(0)}")
|
| 65 |
+
return device
|
| 66 |
+
|
| 67 |
+
def load_asr_model():
|
| 68 |
+
"""Charge le modèle ASR avec cache"""
|
| 69 |
+
with cache_lock:
|
| 70 |
+
if "asr" not in models_cache:
|
| 71 |
+
try:
|
| 72 |
+
device = get_device()
|
| 73 |
+
logger.info("⏳ Chargement du modèle ASR facebook/mms-1b-all...")
|
| 74 |
+
|
| 75 |
+
processor = AutoProcessor.from_pretrained("facebook/mms-1b-all")
|
| 76 |
+
model = AutoModelForCTC.from_pretrained("facebook/mms-1b-all").to(device)
|
| 77 |
+
model.eval()
|
| 78 |
+
|
| 79 |
+
# Désactif les gradients
|
| 80 |
+
with torch.no_grad():
|
| 81 |
+
pass
|
| 82 |
+
|
| 83 |
+
models_cache["asr"] = {"model": model, "processor": processor}
|
| 84 |
+
logger.info("✅ Modèle ASR chargé")
|
| 85 |
+
except Exception as e:
|
| 86 |
+
logger.error(f"❌ Erreur lors du chargement du modèle ASR: {e}")
|
| 87 |
+
raise
|
| 88 |
+
|
| 89 |
+
return models_cache["asr"]["model"], models_cache["asr"]["processor"]
|
| 90 |
+
|
| 91 |
+
def load_tts_model(language_code):
|
| 92 |
+
"""Charge le modèle TTS pour une langue"""
|
| 93 |
+
with cache_lock:
|
| 94 |
+
if language_code not in models_cache:
|
| 95 |
+
try:
|
| 96 |
+
model_id = LANGUAGE_MAPPING.get(language_code)
|
| 97 |
+
if not model_id:
|
| 98 |
+
raise ValueError(f"Langue non supportée: {language_code}")
|
| 99 |
+
|
| 100 |
+
device = get_device()
|
| 101 |
+
logger.info(f"⏳ Chargement du modèle TTS {language_code} ({model_id})...")
|
| 102 |
+
|
| 103 |
+
model = VitsModel.from_pretrained(model_id).to(device)
|
| 104 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 105 |
+
model.eval()
|
| 106 |
+
|
| 107 |
+
models_cache[language_code] = {"model": model, "tokenizer": tokenizer}
|
| 108 |
+
logger.info(f"✅ Modèle TTS {language_code} chargé")
|
| 109 |
+
except Exception as e:
|
| 110 |
+
logger.error(f"❌ Erreur lors du chargement du modèle TTS {language_code}: {e}")
|
| 111 |
+
raise
|
| 112 |
+
|
| 113 |
+
return models_cache[language_code]["model"], models_cache[language_code]["tokenizer"]
|
| 114 |
+
|
| 115 |
+
def process_audio(audio_data, target_sr=SAMPLE_RATE):
|
| 116 |
+
"""Traite et normalise l'audio"""
|
| 117 |
+
try:
|
| 118 |
+
if isinstance(audio_data, bytes):
|
| 119 |
+
audio, sr = librosa.load(io.BytesIO(audio_data), sr=None, mono=True)
|
| 120 |
+
else:
|
| 121 |
+
audio = audio_data
|
| 122 |
+
sr = SAMPLE_RATE
|
| 123 |
+
|
| 124 |
+
# Rééchantillonne si nécessaire
|
| 125 |
+
if sr != target_sr:
|
| 126 |
+
audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
|
| 127 |
+
|
| 128 |
+
# Normalise
|
| 129 |
+
if np.max(np.abs(audio)) > 0:
|
| 130 |
+
audio = audio / np.max(np.abs(audio))
|
| 131 |
+
|
| 132 |
+
# Tronque si trop long
|
| 133 |
+
max_samples = MAX_AUDIO_LENGTH * target_sr
|
| 134 |
+
if len(audio) > max_samples:
|
| 135 |
+
audio = audio[:max_samples]
|
| 136 |
+
logger.warning(f"Audio tronqué à {MAX_AUDIO_LENGTH}s")
|
| 137 |
+
|
| 138 |
+
return audio
|
| 139 |
+
except Exception as e:
|
| 140 |
+
logger.error(f"❌ Erreur lors du traitement audio: {e}")
|
| 141 |
+
raise
|
| 142 |
+
|
| 143 |
+
@app.route("/", methods=["GET"])
|
| 144 |
+
def index():
|
| 145 |
+
"""Documentation de l'API"""
|
| 146 |
+
return jsonify({
|
| 147 |
+
**API_METADATA,
|
| 148 |
+
"device": get_device(),
|
| 149 |
+
"endpoints": {
|
| 150 |
+
"GET /health": "État du service",
|
| 151 |
+
"GET /supported-languages": "Langues supportées",
|
| 152 |
+
"POST /asr": "Audio → Texte",
|
| 153 |
+
"POST /tts": "Texte → Audio",
|
| 154 |
+
"GET /models-info": "Infos sur les modèles",
|
| 155 |
+
},
|
| 156 |
+
"docs": "https://github.com/ronaldodev/mms-asr-tts"
|
| 157 |
+
})
|
| 158 |
+
|
| 159 |
+
@app.route("/health", methods=["GET"])
|
| 160 |
+
def health():
|
| 161 |
+
"""Vérifier l'état du service"""
|
| 162 |
+
try:
|
| 163 |
+
device = get_device()
|
| 164 |
+
return jsonify({
|
| 165 |
+
"status": "healthy",
|
| 166 |
+
"device": device,
|
| 167 |
+
"timestamp": time.time(),
|
| 168 |
+
"cached_models": list(models_cache.keys())
|
| 169 |
+
})
|
| 170 |
+
except Exception as e:
|
| 171 |
+
return jsonify({"status": "error", "error": str(e)}), 500
|
| 172 |
+
|
| 173 |
+
@app.route("/models-info", methods=["GET"])
|
| 174 |
+
def models_info():
|
| 175 |
+
"""Informations détaillées sur les modèles"""
|
| 176 |
+
return jsonify({
|
| 177 |
+
"asr": {
|
| 178 |
+
"model_id": "facebook/mms-1b-all",
|
| 179 |
+
"parameters": "964.8M",
|
| 180 |
+
"architecture": "wav2vec2",
|
| 181 |
+
"languages": 100,
|
| 182 |
+
"description": "Automatic Speech Recognition multilingue"
|
| 183 |
+
},
|
| 184 |
+
"tts": {
|
| 185 |
+
"model_family": "facebook/mms-tts-*",
|
| 186 |
+
"architecture": "VITS",
|
| 187 |
+
"sample_rate": 22050,
|
| 188 |
+
"supported_languages": LANGUAGE_MAPPING,
|
| 189 |
+
"description": "Text-to-Speech pour 8 langues"
|
| 190 |
+
}
|
| 191 |
+
})
|
| 192 |
+
|
| 193 |
+
@app.route("/supported-languages", methods=["GET"])
|
| 194 |
+
def supported_languages():
|
| 195 |
+
"""Langues supportées"""
|
| 196 |
+
return jsonify({
|
| 197 |
+
"asr": {
|
| 198 |
+
"model": "facebook/mms-1b-all",
|
| 199 |
+
"languages": 100,
|
| 200 |
+
"description": "Support de 100+ langues ISO 639-3"
|
| 201 |
+
},
|
| 202 |
+
"tts": {
|
| 203 |
+
"languages": LANGUAGE_MAPPING,
|
| 204 |
+
"count": len(LANGUAGE_MAPPING),
|
| 205 |
+
"sample_rate": 22050
|
| 206 |
+
}
|
| 207 |
+
})
|
| 208 |
+
|
| 209 |
+
@app.route("/asr", methods=["POST"])
|
| 210 |
+
def asr():
|
| 211 |
+
"""Convertir audio en texte (ASR)"""
|
| 212 |
+
start_time = time.time()
|
| 213 |
+
try:
|
| 214 |
+
if "audio" not in request.files:
|
| 215 |
+
return jsonify({"error": "Pas de fichier audio fourni"}), 400
|
| 216 |
+
|
| 217 |
+
audio_file = request.files["audio"]
|
| 218 |
+
language = request.form.get("language", "eng")
|
| 219 |
+
|
| 220 |
+
logger.info(f"📥 ASR demandé: language={language}, file={audio_file.filename}")
|
| 221 |
+
|
| 222 |
+
# Valide le fichier
|
| 223 |
+
if not audio_file.filename:
|
| 224 |
+
return jsonify({"error": "Nom de fichier invalide"}), 400
|
| 225 |
+
|
| 226 |
+
# Charge et traite l'audio
|
| 227 |
+
audio_data = audio_file.read()
|
| 228 |
+
audio = process_audio(audio_data)
|
| 229 |
+
logger.info(f" Audio chargé: {len(audio)/SAMPLE_RATE:.2f}s")
|
| 230 |
+
|
| 231 |
+
# Charge le modèle
|
| 232 |
+
model, processor = load_asr_model()
|
| 233 |
+
processor.current_lang = language
|
| 234 |
+
|
| 235 |
+
# Inférence
|
| 236 |
+
device = get_device()
|
| 237 |
+
with torch.no_grad():
|
| 238 |
+
inputs = processor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt").to(device)
|
| 239 |
+
outputs = model(**inputs)
|
| 240 |
+
ids = torch.argmax(outputs.logits, dim=-1)[0]
|
| 241 |
+
transcription = processor.decode(ids)
|
| 242 |
+
|
| 243 |
+
elapsed = time.time() - start_time
|
| 244 |
+
logger.info(f"✅ ASR complété en {elapsed:.2f}s: {transcription}")
|
| 245 |
+
|
| 246 |
+
return jsonify({
|
| 247 |
+
"transcription": transcription,
|
| 248 |
+
"language": language,
|
| 249 |
+
"audio_length": len(audio) / SAMPLE_RATE,
|
| 250 |
+
"processing_time": elapsed,
|
| 251 |
+
"confidence": "not_available"
|
| 252 |
+
})
|
| 253 |
+
|
| 254 |
+
except Exception as e:
|
| 255 |
+
logger.error(f"❌ Erreur ASR: {e}")
|
| 256 |
+
return jsonify({"error": str(e)}), 500
|
| 257 |
+
|
| 258 |
+
@app.route("/tts", methods=["POST"])
|
| 259 |
+
def tts():
|
| 260 |
+
"""Convertir texte en audio (TTS)"""
|
| 261 |
+
start_time = time.time()
|
| 262 |
+
try:
|
| 263 |
+
data = request.get_json()
|
| 264 |
+
|
| 265 |
+
if not data or "text" not in data:
|
| 266 |
+
return jsonify({"error": "Paramètre 'text' requis"}), 400
|
| 267 |
+
|
| 268 |
+
text = data["text"].strip()
|
| 269 |
+
language = data.get("language", "eng")
|
| 270 |
+
|
| 271 |
+
if not text:
|
| 272 |
+
return jsonify({"error": "Le texte ne peut pas être vide"}), 400
|
| 273 |
+
|
| 274 |
+
# Limite la longueur
|
| 275 |
+
if len(text) > MAX_TEXT_LENGTH:
|
| 276 |
+
text = text[:MAX_TEXT_LENGTH]
|
| 277 |
+
logger.warning(f"Texte tronqué à {MAX_TEXT_LENGTH} caractères")
|
| 278 |
+
|
| 279 |
+
logger.info(f"📥 TTS demandé: language={language}, text_len={len(text)}")
|
| 280 |
+
|
| 281 |
+
# Charge le modèle
|
| 282 |
+
model, tokenizer = load_tts_model(language)
|
| 283 |
+
|
| 284 |
+
# Inférence
|
| 285 |
+
device = get_device()
|
| 286 |
+
with torch.no_grad():
|
| 287 |
+
inputs = tokenizer(text, return_tensors="pt").to(device)
|
| 288 |
+
outputs = model(**inputs)
|
| 289 |
+
waveform = outputs.waveform.cpu().numpy().flatten()
|
| 290 |
+
|
| 291 |
+
# Encode en WAV
|
| 292 |
+
import soundfile as sf
|
| 293 |
+
audio_bytes = io.BytesIO()
|
| 294 |
+
sf.write(audio_bytes, waveform, 22050, format="WAV")
|
| 295 |
+
audio_bytes.seek(0)
|
| 296 |
+
|
| 297 |
+
elapsed = time.time() - start_time
|
| 298 |
+
logger.info(f"✅ TTS complété en {elapsed:.2f}s: {len(waveform)} samples")
|
| 299 |
+
|
| 300 |
+
return send_file(
|
| 301 |
+
audio_bytes,
|
| 302 |
+
mimetype="audio/wav",
|
| 303 |
+
as_attachment=True,
|
| 304 |
+
download_name=f"tts_{language}.wav"
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
except ValueError as e:
|
| 308 |
+
logger.error(f"❌ Erreur TTS (valeur): {e}")
|
| 309 |
+
return jsonify({"error": str(e)}), 400
|
| 310 |
+
except Exception as e:
|
| 311 |
+
logger.error(f"❌ Erreur TTS: {e}")
|
| 312 |
+
return jsonify({"error": str(e)}), 500
|
| 313 |
+
|
| 314 |
+
@app.errorhandler(404)
|
| 315 |
+
def not_found(e):
|
| 316 |
+
return jsonify({"error": "Endpoint non trouvé"}), 404
|
| 317 |
+
|
| 318 |
+
@app.errorhandler(500)
|
| 319 |
+
def server_error(e):
|
| 320 |
+
return jsonify({"error": "Erreur serveur interne"}), 500
|
| 321 |
+
|
| 322 |
+
if __name__ == "__main__":
|
| 323 |
+
logger.info(f"🚀 Démarrage de l'API MMS")
|
| 324 |
+
logger.info(f"📊 Device: {get_device()}")
|
| 325 |
+
logger.info(f"🌐 Démarrage sur 0.0.0.0:7860")
|
| 326 |
+
app.run(host="0.0.0.0", port=7860, debug=False, threaded=True)
|
client.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Client pour tester l'API MMS ASR/TTS"""
|
| 2 |
+
|
| 3 |
+
import requests
|
| 4 |
+
import io
|
| 5 |
+
import json
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
class MMSClient:
|
| 9 |
+
def __init__(self, base_url="http://localhost:7860"):
|
| 10 |
+
self.base_url = base_url
|
| 11 |
+
|
| 12 |
+
def health(self):
|
| 13 |
+
"""Vérifie l'état du service"""
|
| 14 |
+
response = requests.get(f"{self.base_url}/health")
|
| 15 |
+
return response.json()
|
| 16 |
+
|
| 17 |
+
def get_supported_languages(self):
|
| 18 |
+
"""Récupère les langues supportées"""
|
| 19 |
+
response = requests.get(f"{self.base_url}/supported-languages")
|
| 20 |
+
return response.json()
|
| 21 |
+
|
| 22 |
+
def asr(self, audio_path, language="eng"):
|
| 23 |
+
"""
|
| 24 |
+
Convertit l'audio en texte
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
audio_path: chemin vers le fichier audio
|
| 28 |
+
language: code de langue ISO 639-3
|
| 29 |
+
"""
|
| 30 |
+
with open(audio_path, "rb") as f:
|
| 31 |
+
files = {"audio": f}
|
| 32 |
+
data = {"language": language}
|
| 33 |
+
response = requests.post(
|
| 34 |
+
f"{self.base_url}/asr",
|
| 35 |
+
files=files,
|
| 36 |
+
data=data
|
| 37 |
+
)
|
| 38 |
+
return response.json()
|
| 39 |
+
|
| 40 |
+
def tts(self, text, language="eng"):
|
| 41 |
+
"""
|
| 42 |
+
Convertit le texte en audio
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
text: texte à convertir
|
| 46 |
+
language: code de langue (beh, bba, ddn, ewe, gej, tbz, yor, eng)
|
| 47 |
+
|
| 48 |
+
Returns:
|
| 49 |
+
bytes: données audio WAV
|
| 50 |
+
"""
|
| 51 |
+
payload = {
|
| 52 |
+
"text": text,
|
| 53 |
+
"language": language
|
| 54 |
+
}
|
| 55 |
+
response = requests.post(
|
| 56 |
+
f"{self.base_url}/tts",
|
| 57 |
+
json=payload
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
if response.status_code == 200:
|
| 61 |
+
data = response.json()
|
| 62 |
+
# Convertit le hex en bytes
|
| 63 |
+
audio_bytes = bytes.fromhex(data["audio"])
|
| 64 |
+
return audio_bytes, data
|
| 65 |
+
else:
|
| 66 |
+
return None, response.json()
|
| 67 |
+
|
| 68 |
+
def save_audio(self, audio_bytes, output_path):
|
| 69 |
+
"""Sauvegarde l'audio dans un fichier"""
|
| 70 |
+
with open(output_path, "wb") as f:
|
| 71 |
+
f.write(audio_bytes)
|
| 72 |
+
print(f"Audio sauvegardé: {output_path}")
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
client = MMSClient()
|
| 77 |
+
|
| 78 |
+
# Test 1: Vérifier l'état du service
|
| 79 |
+
print("=== Test 1: Santé ===")
|
| 80 |
+
print(client.health())
|
| 81 |
+
|
| 82 |
+
# Test 2: Langues supportées
|
| 83 |
+
print("\n=== Test 2: Langues supportées ===")
|
| 84 |
+
print(json.dumps(client.get_supported_languages(), indent=2, ensure_ascii=False))
|
| 85 |
+
|
| 86 |
+
# Test 3: TTS - Yoruba
|
| 87 |
+
print("\n=== Test 3: TTS - Yoruba ===")
|
| 88 |
+
audio, data = client.tts("Àbọ̀ wa", language="yor")
|
| 89 |
+
if audio:
|
| 90 |
+
client.save_audio(audio, "output_yoruba.wav")
|
| 91 |
+
print(f"Audio généré: {len(audio)} bytes")
|
| 92 |
+
else:
|
| 93 |
+
print("Erreur:", data)
|
| 94 |
+
|
| 95 |
+
# Test 4: TTS - English
|
| 96 |
+
print("\n=== Test 4: TTS - English ===")
|
| 97 |
+
audio, data = client.tts("Hello world", language="eng")
|
| 98 |
+
if audio:
|
| 99 |
+
client.save_audio(audio, "output_english.wav")
|
| 100 |
+
print(f"Audio généré: {len(audio)} bytes")
|
| 101 |
+
else:
|
| 102 |
+
print("Erreur:", data)
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
mms-api:
|
| 5 |
+
build: .
|
| 6 |
+
ports:
|
| 7 |
+
- "7860:7860"
|
| 8 |
+
volumes:
|
| 9 |
+
- .:/app
|
| 10 |
+
- model_cache:/root/.cache/huggingface
|
| 11 |
+
environment:
|
| 12 |
+
- CUDA_VISIBLE_DEVICES=0
|
| 13 |
+
shm_size: 4gb
|
| 14 |
+
deploy:
|
| 15 |
+
resources:
|
| 16 |
+
reservations:
|
| 17 |
+
devices:
|
| 18 |
+
- driver: nvidia
|
| 19 |
+
count: 1
|
| 20 |
+
capabilities: [ gpu ]
|
| 21 |
+
|
| 22 |
+
volumes:
|
| 23 |
+
model_cache:
|
examples.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Exemples d'utilisation de l'API MMS ASR/TTS"""
|
| 2 |
+
|
| 3 |
+
import requests
|
| 4 |
+
import json
|
| 5 |
+
|
| 6 |
+
BASE_URL = "http://localhost:7860"
|
| 7 |
+
|
| 8 |
+
# Exemples de textes dans différentes langues
|
| 9 |
+
EXAMPLES = {
|
| 10 |
+
"beh": {
|
| 11 |
+
"text": "Àbọ̀ wa",
|
| 12 |
+
"translation": "Hello"
|
| 13 |
+
},
|
| 14 |
+
"bba": {
|
| 15 |
+
"text": "A gbà kú",
|
| 16 |
+
"translation": "Good morning"
|
| 17 |
+
},
|
| 18 |
+
"ddn": {
|
| 19 |
+
"text": "Sàlaam alaikum",
|
| 20 |
+
"translation": "Peace be upon you"
|
| 21 |
+
},
|
| 22 |
+
"ewe": {
|
| 23 |
+
"text": "Woé gbé o",
|
| 24 |
+
"translation": "Hello"
|
| 25 |
+
},
|
| 26 |
+
"gej": {
|
| 27 |
+
"text": "A-kúma",
|
| 28 |
+
"translation": "Good morning"
|
| 29 |
+
},
|
| 30 |
+
"tbz": {
|
| 31 |
+
"text": "Salaam",
|
| 32 |
+
"translation": "Hello"
|
| 33 |
+
},
|
| 34 |
+
"yor": {
|
| 35 |
+
"text": "Àbọ̀ wa",
|
| 36 |
+
"translation": "Hello"
|
| 37 |
+
},
|
| 38 |
+
"eng": {
|
| 39 |
+
"text": "Hello world",
|
| 40 |
+
"translation": "Hello world"
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
def test_tts():
|
| 45 |
+
"""Test TTS pour toutes les langues"""
|
| 46 |
+
print("=" * 60)
|
| 47 |
+
print("TEST TTS (Text-to-Speech)")
|
| 48 |
+
print("=" * 60)
|
| 49 |
+
|
| 50 |
+
for lang, data in EXAMPLES.items():
|
| 51 |
+
print(f"\n📢 Langue: {lang} ({data['translation']})")
|
| 52 |
+
print(f" Texte: {data['text']}")
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
response = requests.post(
|
| 56 |
+
f"{BASE_URL}/tts",
|
| 57 |
+
json={"text": data["text"], "language": lang},
|
| 58 |
+
timeout=60
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
if response.status_code == 200:
|
| 62 |
+
result = response.json()
|
| 63 |
+
audio_size = len(result["audio"]) // 2 # hex = 2 chars per byte
|
| 64 |
+
print(f" ✅ Audio généré: {audio_size} bytes")
|
| 65 |
+
print(f" Sample rate: {result['sample_rate']} Hz")
|
| 66 |
+
|
| 67 |
+
# Sauvegarde l'audio
|
| 68 |
+
with open(f"output_{lang}.wav", "wb") as f:
|
| 69 |
+
f.write(bytes.fromhex(result["audio"]))
|
| 70 |
+
print(f" Fichier: output_{lang}.wav")
|
| 71 |
+
else:
|
| 72 |
+
print(f" ❌ Erreur {response.status_code}: {response.json()}")
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f" ❌ Exception: {e}")
|
| 75 |
+
|
| 76 |
+
def test_health():
|
| 77 |
+
"""Teste la santé du service"""
|
| 78 |
+
print("=" * 60)
|
| 79 |
+
print("TEST SANTÉ DU SERVICE")
|
| 80 |
+
print("=" * 60)
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
response = requests.get(f"{BASE_URL}/health")
|
| 84 |
+
data = response.json()
|
| 85 |
+
print(f"✅ Status: {data['status']}")
|
| 86 |
+
print(f" Device: {data['device']}")
|
| 87 |
+
except Exception as e:
|
| 88 |
+
print(f"❌ Erreur: {e}")
|
| 89 |
+
|
| 90 |
+
def test_supported_languages():
|
| 91 |
+
"""Liste les langues supportées"""
|
| 92 |
+
print("=" * 60)
|
| 93 |
+
print("LANGUES SUPPORTÉES")
|
| 94 |
+
print("=" * 60)
|
| 95 |
+
|
| 96 |
+
try:
|
| 97 |
+
response = requests.get(f"{BASE_URL}/supported-languages")
|
| 98 |
+
data = response.json()
|
| 99 |
+
print(f"\n🎤 ASR: {data['asr']}")
|
| 100 |
+
print(f"\n📢 TTS: {', '.join(data['tts'])}")
|
| 101 |
+
print(f"\n📝 Codes de langue:")
|
| 102 |
+
for code, name in data['language_codes'].items():
|
| 103 |
+
print(f" {code:5} -> {name}")
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"❌ Erreur: {e}")
|
| 106 |
+
|
| 107 |
+
def test_asr_sample():
|
| 108 |
+
"""Test ASR (nécessite un fichier audio)"""
|
| 109 |
+
print("\n" + "=" * 60)
|
| 110 |
+
print("TEST ASR (Automatic Speech Recognition)")
|
| 111 |
+
print("=" * 60)
|
| 112 |
+
|
| 113 |
+
audio_path = "sample_audio.wav"
|
| 114 |
+
try:
|
| 115 |
+
with open(audio_path, "rb") as f:
|
| 116 |
+
files = {"audio": f}
|
| 117 |
+
response = requests.post(
|
| 118 |
+
f"{BASE_URL}/asr",
|
| 119 |
+
files=files,
|
| 120 |
+
data={"language": "eng"},
|
| 121 |
+
timeout=60
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
if response.status_code == 200:
|
| 125 |
+
result = response.json()
|
| 126 |
+
print(f"✅ Transcription: {result['transcription']}")
|
| 127 |
+
print(f" Langue: {result['language']}")
|
| 128 |
+
print(f" Durée: {result['audio_length']:.2f}s")
|
| 129 |
+
else:
|
| 130 |
+
print(f"❌ Erreur {response.status_code}: {response.json()}")
|
| 131 |
+
except FileNotFoundError:
|
| 132 |
+
print(f"⚠️ Fichier audio introuvable: {audio_path}")
|
| 133 |
+
print(" Pour tester l'ASR, fournir un fichier audio valide")
|
| 134 |
+
except Exception as e:
|
| 135 |
+
print(f"❌ Erreur: {e}")
|
| 136 |
+
|
| 137 |
+
if __name__ == "__main__":
|
| 138 |
+
print("\n🚀 MMS ASR/TTS API - Exemples d'utilisation\n")
|
| 139 |
+
|
| 140 |
+
# Tests
|
| 141 |
+
test_health()
|
| 142 |
+
test_supported_languages()
|
| 143 |
+
test_tts()
|
| 144 |
+
test_asr_sample()
|
| 145 |
+
|
| 146 |
+
print("\n" + "=" * 60)
|
| 147 |
+
print("Tests terminés!")
|
| 148 |
+
print("=" * 60)
|
requirements-dev.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dépendances pour le développement
|
| 2 |
+
-r requirements.txt
|
| 3 |
+
|
| 4 |
+
# Testing
|
| 5 |
+
pytest==7.4.3
|
| 6 |
+
pytest-cov==4.1.0
|
| 7 |
+
requests-mock==1.11.0
|
| 8 |
+
|
| 9 |
+
# Code quality
|
| 10 |
+
black==23.12.0
|
| 11 |
+
flake8==6.1.0
|
| 12 |
+
pylint==3.0.3
|
| 13 |
+
isort==5.13.2
|
| 14 |
+
|
| 15 |
+
# Debugging
|
| 16 |
+
ipython==8.18.1
|
| 17 |
+
ipdb==0.13.13
|
| 18 |
+
|
| 19 |
+
# Documentation
|
| 20 |
+
sphinx==7.2.6
|
| 21 |
+
sphinx-rtd-theme==2.0.0
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==4.26.0
|
| 2 |
+
transformers==4.36.2
|
| 3 |
+
torch==2.1.1
|
| 4 |
+
torchaudio==2.1.1
|
| 5 |
+
librosa==0.10.0
|
| 6 |
+
soundfile==0.12.1
|
| 7 |
+
numpy==1.24.3
|
run.sh
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Script de démarrage de l'app Gradio MMS
|
| 4 |
+
|
| 5 |
+
echo "🚀 Démarrage de Meta MMS Speech AI (Gradio)"
|
| 6 |
+
echo "==========================================="
|
| 7 |
+
|
| 8 |
+
# Vérifie si Python est disponible
|
| 9 |
+
if ! command -v python &> /dev/null; then
|
| 10 |
+
echo "❌ Python n'est pas installé"
|
| 11 |
+
exit 1
|
| 12 |
+
fi
|
| 13 |
+
|
| 14 |
+
# Crée un environnement virtuel s'il n'existe pas
|
| 15 |
+
if [ ! -d "venv" ]; then
|
| 16 |
+
echo "📦 Création de l'environnement virtuel..."
|
| 17 |
+
python -m venv venv
|
| 18 |
+
fi
|
| 19 |
+
|
| 20 |
+
# Active l'environnement virtuel
|
| 21 |
+
echo "🔧 Activation de l'environnement virtuel..."
|
| 22 |
+
source venv/bin/activate
|
| 23 |
+
|
| 24 |
+
# Installe les dépendances
|
| 25 |
+
echo "📚 Installation des dépendances..."
|
| 26 |
+
pip install -q -r requirements.txt
|
| 27 |
+
|
| 28 |
+
# Démarre l'application
|
| 29 |
+
echo ""
|
| 30 |
+
echo "✅ Démarrage de Gradio sur http://localhost:7860"
|
| 31 |
+
echo ""
|
| 32 |
+
echo "Interface Gradio disponible avec:"
|
| 33 |
+
echo " 🔊 ASR (Audio → Texte)"
|
| 34 |
+
echo " 📢 TTS (Texte → Audio)"
|
| 35 |
+
echo " ℹ️ À propos"
|
| 36 |
+
echo ""
|
| 37 |
+
|
| 38 |
+
python app.py
|
| 39 |
+
|
setup_project.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Script de vérification et configuration du projet MMS ASR/TTS
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
def check_files():
|
| 11 |
+
"""Vérifie que tous les fichiers requis existent"""
|
| 12 |
+
required_files = [
|
| 13 |
+
"app.py",
|
| 14 |
+
"requirements.txt",
|
| 15 |
+
"Dockerfile",
|
| 16 |
+
"README.md",
|
| 17 |
+
"DEPLOYMENT.md",
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
print("📋 Vérification des fichiers...")
|
| 21 |
+
missing = []
|
| 22 |
+
|
| 23 |
+
for file in required_files:
|
| 24 |
+
if Path(file).exists():
|
| 25 |
+
print(f" ✅ {file}")
|
| 26 |
+
else:
|
| 27 |
+
print(f" ❌ {file} (manquant)")
|
| 28 |
+
missing.append(file)
|
| 29 |
+
|
| 30 |
+
return len(missing) == 0
|
| 31 |
+
|
| 32 |
+
def check_dependencies():
|
| 33 |
+
"""Vérifie les dépendances Python"""
|
| 34 |
+
print("\n📦 Vérification des dépendances...")
|
| 35 |
+
|
| 36 |
+
required_packages = {
|
| 37 |
+
"flask": "Flask (API web)",
|
| 38 |
+
"torch": "PyTorch (ML)",
|
| 39 |
+
"transformers": "Hugging Face Transformers",
|
| 40 |
+
"librosa": "Audio processing",
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
for package, description in required_packages.items():
|
| 44 |
+
try:
|
| 45 |
+
__import__(package)
|
| 46 |
+
print(f" ✅ {package:15} - {description}")
|
| 47 |
+
except ImportError:
|
| 48 |
+
print(f" ❌ {package:15} - {description} (non installé)")
|
| 49 |
+
|
| 50 |
+
def show_quick_start():
|
| 51 |
+
"""Affiche le guide de démarrage rapide"""
|
| 52 |
+
print("\n" + "="*60)
|
| 53 |
+
print("🚀 GUIDE DE DÉMARRAGE RAPIDE")
|
| 54 |
+
print("="*60)
|
| 55 |
+
|
| 56 |
+
print("""
|
| 57 |
+
1️⃣ Installation des dépendances:
|
| 58 |
+
pip install -r requirements.txt
|
| 59 |
+
|
| 60 |
+
2️⃣ Lancement de l'API:
|
| 61 |
+
python app_v2.py
|
| 62 |
+
# Ou: bash run.sh
|
| 63 |
+
|
| 64 |
+
3️⃣ Test des endpoints (dans un autre terminal):
|
| 65 |
+
python examples.py
|
| 66 |
+
|
| 67 |
+
# Ou avec curl:
|
| 68 |
+
curl http://localhost:7860/health
|
| 69 |
+
curl http://localhost:7860/supported-languages
|
| 70 |
+
|
| 71 |
+
curl -X POST -H "Content-Type: application/json" \\
|
| 72 |
+
-d '{"text": "Hello", "language": "eng"}' \\
|
| 73 |
+
http://localhost:7860/tts --output audio.wav
|
| 74 |
+
|
| 75 |
+
4️⃣ Déploiement sur Hugging Face Spaces:
|
| 76 |
+
- Voir DEPLOYMENT.md pour les instructions détaillées
|
| 77 |
+
|
| 78 |
+
📂 Structure du projet:
|
| 79 |
+
.
|
| 80 |
+
├── app.py # API Flask principale (v1)
|
| 81 |
+
├── app_v2.py # API Flask optimisée (v2) ⭐ RECOMMANDÉ
|
| 82 |
+
├── requirements.txt # Dépendances
|
| 83 |
+
├── Dockerfile # Conteneur Docker
|
| 84 |
+
├── docker-compose.yml # Orchestration Docker
|
| 85 |
+
├── client.py # Client Python pour tester
|
| 86 |
+
├── examples.py # Exemples d'utilisation
|
| 87 |
+
├── test_api.py # Tests unitaires
|
| 88 |
+
├── run.sh # Script de lancement
|
| 89 |
+
├── README.md # Documentation générale
|
| 90 |
+
└── DEPLOYMENT.md # Guide de déploiement
|
| 91 |
+
|
| 92 |
+
🔧 Développement:
|
| 93 |
+
pip install -r requirements-dev.txt
|
| 94 |
+
pytest test_api.py -v
|
| 95 |
+
black .
|
| 96 |
+
flake8 .
|
| 97 |
+
|
| 98 |
+
📚 Documentation:
|
| 99 |
+
- App v2 est la version recommandée (meilleure gestion d'erreurs)
|
| 100 |
+
- Supporte 100+ langues pour l'ASR
|
| 101 |
+
- Supporte 8 langues pour le TTS
|
| 102 |
+
- Compatible GPU/CPU
|
| 103 |
+
- Cache des modèles pour performance
|
| 104 |
+
|
| 105 |
+
🌐 Endpoints disponibles:
|
| 106 |
+
GET / → Documentation
|
| 107 |
+
GET /health → État du service
|
| 108 |
+
GET /models-info → Infos détaillées
|
| 109 |
+
GET /supported-languages → Langues supportées
|
| 110 |
+
POST /asr → Audio → Texte
|
| 111 |
+
POST /tts → Texte → Audio (retourne WAV)
|
| 112 |
+
|
| 113 |
+
📊 Langues TTS:
|
| 114 |
+
- beh (Biali)
|
| 115 |
+
- bba (Baatombu)
|
| 116 |
+
- ddn (Dendi)
|
| 117 |
+
- ewe (Éwé)
|
| 118 |
+
- gej (Mina)
|
| 119 |
+
- tbz (Ditammari)
|
| 120 |
+
- yor (Yoruba)
|
| 121 |
+
- eng (English)
|
| 122 |
+
|
| 123 |
+
💡 Tips:
|
| 124 |
+
- La première requête peut être lente (chargement des modèles)
|
| 125 |
+
- Utilise GPU pour de meilleures performances
|
| 126 |
+
- Cache les modèles automatiquement après le chargement
|
| 127 |
+
- Limite: 30s pour audio ASR, 1000 chars pour TTS
|
| 128 |
+
""")
|
| 129 |
+
|
| 130 |
+
def main():
|
| 131 |
+
"""Fonction principale"""
|
| 132 |
+
print("\n🎯 Configuration du projet MMS ASR/TTS\n")
|
| 133 |
+
|
| 134 |
+
# Vérifie les fichiers
|
| 135 |
+
files_ok = check_files()
|
| 136 |
+
|
| 137 |
+
# Vérifie les dépendances
|
| 138 |
+
check_dependencies()
|
| 139 |
+
|
| 140 |
+
# Affiche le guide de démarrage
|
| 141 |
+
show_quick_start()
|
| 142 |
+
|
| 143 |
+
# Status final
|
| 144 |
+
print("\n" + "="*60)
|
| 145 |
+
if files_ok:
|
| 146 |
+
print("✅ Configuration complète! Prêt à démarrer!")
|
| 147 |
+
print("="*60)
|
| 148 |
+
print("\n👉 Prochaine étape: python app_v2.py\n")
|
| 149 |
+
return 0
|
| 150 |
+
else:
|
| 151 |
+
print("⚠️ Certains fichiers manquent. Vérifie l'installation.")
|
| 152 |
+
print("="*60)
|
| 153 |
+
return 1
|
| 154 |
+
|
| 155 |
+
if __name__ == "__main__":
|
| 156 |
+
sys.exit(main())
|
test_api.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests unitaires pour l'API MMS ASR/TTS
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import pytest
|
| 6 |
+
import json
|
| 7 |
+
import io
|
| 8 |
+
from flask import Flask
|
| 9 |
+
import sys
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
# Importe l'app
|
| 13 |
+
sys.path.insert(0, str(Path(__file__).parent))
|
| 14 |
+
|
| 15 |
+
# Note: Pour les tests complets, il faudrait utiliser une version mockée
|
| 16 |
+
# des modèles ou des fixtures avec des modèles minimalistes
|
| 17 |
+
|
| 18 |
+
@pytest.fixture
|
| 19 |
+
def client():
|
| 20 |
+
"""Crée un client Flask pour les tests"""
|
| 21 |
+
from app_v2 import app
|
| 22 |
+
app.config['TESTING'] = True
|
| 23 |
+
with app.test_client() as client:
|
| 24 |
+
yield client
|
| 25 |
+
|
| 26 |
+
class TestAPI:
|
| 27 |
+
"""Tests des endpoints"""
|
| 28 |
+
|
| 29 |
+
def test_health(self, client):
|
| 30 |
+
"""Test le endpoint /health"""
|
| 31 |
+
response = client.get('/health')
|
| 32 |
+
assert response.status_code == 200
|
| 33 |
+
data = response.get_json()
|
| 34 |
+
assert 'status' in data
|
| 35 |
+
assert data['status'] == 'healthy'
|
| 36 |
+
|
| 37 |
+
def test_index(self, client):
|
| 38 |
+
"""Test le endpoint racine"""
|
| 39 |
+
response = client.get('/')
|
| 40 |
+
assert response.status_code == 200
|
| 41 |
+
data = response.get_json()
|
| 42 |
+
assert 'name' in data
|
| 43 |
+
assert 'endpoints' in data
|
| 44 |
+
|
| 45 |
+
def test_supported_languages(self, client):
|
| 46 |
+
"""Test le endpoint /supported-languages"""
|
| 47 |
+
response = client.get('/supported-languages')
|
| 48 |
+
assert response.status_code == 200
|
| 49 |
+
data = response.get_json()
|
| 50 |
+
assert 'asr' in data
|
| 51 |
+
assert 'tts' in data
|
| 52 |
+
assert 'eng' in data['tts']['languages']
|
| 53 |
+
|
| 54 |
+
def test_models_info(self, client):
|
| 55 |
+
"""Test le endpoint /models-info"""
|
| 56 |
+
response = client.get('/models-info')
|
| 57 |
+
assert response.status_code == 200
|
| 58 |
+
data = response.get_json()
|
| 59 |
+
assert 'asr' in data
|
| 60 |
+
assert 'tts' in data
|
| 61 |
+
|
| 62 |
+
def test_tts_missing_text(self, client):
|
| 63 |
+
"""Test TTS sans texte"""
|
| 64 |
+
response = client.post('/tts',
|
| 65 |
+
data=json.dumps({}),
|
| 66 |
+
content_type='application/json'
|
| 67 |
+
)
|
| 68 |
+
assert response.status_code == 400
|
| 69 |
+
data = response.get_json()
|
| 70 |
+
assert 'error' in data
|
| 71 |
+
|
| 72 |
+
def test_tts_empty_text(self, client):
|
| 73 |
+
"""Test TTS avec texte vide"""
|
| 74 |
+
response = client.post('/tts',
|
| 75 |
+
data=json.dumps({'text': ' '}),
|
| 76 |
+
content_type='application/json'
|
| 77 |
+
)
|
| 78 |
+
assert response.status_code == 400
|
| 79 |
+
|
| 80 |
+
def test_asr_missing_file(self, client):
|
| 81 |
+
"""Test ASR sans fichier"""
|
| 82 |
+
response = client.post('/asr')
|
| 83 |
+
assert response.status_code == 400
|
| 84 |
+
data = response.get_json()
|
| 85 |
+
assert 'error' in data
|
| 86 |
+
|
| 87 |
+
def test_404(self, client):
|
| 88 |
+
"""Test endpoint inexistant"""
|
| 89 |
+
response = client.get('/nonexistent')
|
| 90 |
+
assert response.status_code == 404
|
| 91 |
+
|
| 92 |
+
class TestLanguageMapping:
|
| 93 |
+
"""Tests du mapping des langues"""
|
| 94 |
+
|
| 95 |
+
def test_supported_languages(self):
|
| 96 |
+
"""Vérifie que les langues documentées sont configurées"""
|
| 97 |
+
from app_v2 import LANGUAGE_MAPPING
|
| 98 |
+
|
| 99 |
+
expected_languages = ['beh', 'bba', 'ddn', 'ewe', 'gej', 'tbz', 'yor', 'eng']
|
| 100 |
+
for lang in expected_languages:
|
| 101 |
+
assert lang in LANGUAGE_MAPPING, f"Langue {lang} manquante"
|
| 102 |
+
|
| 103 |
+
def test_language_mapping_format(self):
|
| 104 |
+
"""Vérifie le format du mapping des langues"""
|
| 105 |
+
from app_v2 import LANGUAGE_MAPPING
|
| 106 |
+
|
| 107 |
+
for lang, model_id in LANGUAGE_MAPPING.items():
|
| 108 |
+
assert isinstance(lang, str)
|
| 109 |
+
assert isinstance(model_id, str)
|
| 110 |
+
assert model_id.startswith('facebook/mms-tts-')
|
| 111 |
+
|
| 112 |
+
class TestConfig:
|
| 113 |
+
"""Tests de configuration"""
|
| 114 |
+
|
| 115 |
+
def test_sample_rate(self):
|
| 116 |
+
"""Test que SAMPLE_RATE est correct"""
|
| 117 |
+
from app_v2 import SAMPLE_RATE
|
| 118 |
+
assert SAMPLE_RATE == 16000
|
| 119 |
+
|
| 120 |
+
def test_max_audio_length(self):
|
| 121 |
+
"""Test que MAX_AUDIO_LENGTH est raisonnable"""
|
| 122 |
+
from app_v2 import MAX_AUDIO_LENGTH
|
| 123 |
+
assert 10 <= MAX_AUDIO_LENGTH <= 120
|
| 124 |
+
|
| 125 |
+
def test_max_text_length(self):
|
| 126 |
+
"""Test que MAX_TEXT_LENGTH est raisonnable"""
|
| 127 |
+
from app_v2 import MAX_TEXT_LENGTH
|
| 128 |
+
assert 100 <= MAX_TEXT_LENGTH <= 5000
|
| 129 |
+
|
| 130 |
+
if __name__ == '__main__':
|
| 131 |
+
pytest.main([__file__, '-v'])
|