Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import torch
|
|
| 4 |
import os
|
| 5 |
from pydub import AudioSegment
|
| 6 |
from transformers import pipeline
|
|
|
|
| 7 |
|
| 8 |
# Mapping of model names to Whisper model sizes
|
| 9 |
MODELS = {
|
|
@@ -11,7 +12,8 @@ MODELS = {
|
|
| 11 |
"Base (Faster)": "base",
|
| 12 |
"Small (Balanced)": "small",
|
| 13 |
"Medium (Accurate)": "medium",
|
| 14 |
-
"Large (Most Accurate)": "large"
|
|
|
|
| 15 |
}
|
| 16 |
|
| 17 |
# Fine-tuned models for specific languages
|
|
@@ -180,19 +182,28 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
|
|
| 180 |
detected_language = language
|
| 181 |
else:
|
| 182 |
# Use the selected Whisper model
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
detected_language_code =
|
| 189 |
detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
|
| 190 |
else:
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
# Clean up processed audio file
|
| 198 |
os.remove(processed_audio_path)
|
|
|
|
| 4 |
import os
|
| 5 |
from pydub import AudioSegment
|
| 6 |
from transformers import pipeline
|
| 7 |
+
from faster_whisper import WhisperModel # Import faster-whisper
|
| 8 |
|
| 9 |
# Mapping of model names to Whisper model sizes
|
| 10 |
MODELS = {
|
|
|
|
| 12 |
"Base (Faster)": "base",
|
| 13 |
"Small (Balanced)": "small",
|
| 14 |
"Medium (Accurate)": "medium",
|
| 15 |
+
"Large (Most Accurate)": "large",
|
| 16 |
+
"Systran Faster Whisper Large v3": "Systran/faster-whisper-large-v3" # Add the new model
|
| 17 |
}
|
| 18 |
|
| 19 |
# Fine-tuned models for specific languages
|
|
|
|
| 182 |
detected_language = language
|
| 183 |
else:
|
| 184 |
# Use the selected Whisper model
|
| 185 |
+
if model_size == "Systran/faster-whisper-large-v3":
|
| 186 |
+
# Use faster-whisper for the Systran model
|
| 187 |
+
model = WhisperModel(model_size, device="cuda" if torch.cuda.is_available() else "cpu")
|
| 188 |
+
segments, info = model.transcribe(processed_audio_path, beam_size=5)
|
| 189 |
+
transcription = " ".join([segment.text for segment in segments])
|
| 190 |
+
detected_language_code = info.language
|
| 191 |
detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
|
| 192 |
else:
|
| 193 |
+
# Use the standard Whisper model
|
| 194 |
+
model = whisper.load_model(MODELS[model_size])
|
| 195 |
+
|
| 196 |
+
# Transcribe the audio
|
| 197 |
+
if language == "Auto Detect":
|
| 198 |
+
result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
|
| 199 |
+
detected_language_code = result.get("language", "unknown")
|
| 200 |
+
detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
|
| 201 |
+
else:
|
| 202 |
+
language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
|
| 203 |
+
result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
|
| 204 |
+
detected_language = language
|
| 205 |
+
|
| 206 |
+
transcription = result["text"]
|
| 207 |
|
| 208 |
# Clean up processed audio file
|
| 209 |
os.remove(processed_audio_path)
|