Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -149,31 +149,28 @@ def detect_language(audio_file):
|
|
| 149 |
return "Error: No audio file uploaded."
|
| 150 |
|
| 151 |
try:
|
| 152 |
-
# Convert audio to WAV format
|
| 153 |
-
wav_path = convert_to_wav(audio_file)
|
| 154 |
-
logger.info(f"Audio file converted to WAV: {wav_path}")
|
| 155 |
-
|
| 156 |
# Define device and compute type for faster-whisper
|
| 157 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 158 |
compute_type = "float32" if device == "cuda" else "int8"
|
| 159 |
-
logger.info(f"Using device: {device}, compute_type: {compute_type}")
|
| 160 |
|
| 161 |
# Load the faster-whisper model for language detection
|
| 162 |
model = WhisperModel(MODELS["Faster Whisper Large v3"], device=device, compute_type=compute_type)
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
# Detect the language using faster-whisper
|
| 166 |
-
segments, info = model.transcribe(
|
| 167 |
detected_language_code = info.language
|
| 168 |
-
logger.info(f"Detected language code: {detected_language_code}")
|
| 169 |
|
| 170 |
# Get the full language name from the code
|
| 171 |
detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
|
| 172 |
-
logger.info(f"Detected language: {detected_language}")
|
| 173 |
|
| 174 |
-
# Clean up
|
| 175 |
-
os.remove(
|
| 176 |
-
logger.info("Temporary WAV file removed.")
|
| 177 |
|
| 178 |
return f"Detected Language: {detected_language}"
|
| 179 |
except Exception as e:
|
|
@@ -317,15 +314,11 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whis
|
|
| 317 |
return "Error: No audio file uploaded."
|
| 318 |
|
| 319 |
try:
|
| 320 |
-
# Convert audio to WAV format
|
| 321 |
-
wav_path = convert_to_wav(audio_file)
|
| 322 |
-
|
| 323 |
# Convert audio to 16kHz mono for better compatibility
|
| 324 |
-
audio = AudioSegment.from_file(
|
| 325 |
audio = audio.set_frame_rate(16000).set_channels(1)
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
audio.export(processed_audio_path, format="wav")
|
| 329 |
|
| 330 |
# Load the appropriate model
|
| 331 |
if model_size == "Faster Whisper Large v3":
|
|
@@ -363,7 +356,6 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whis
|
|
| 363 |
|
| 364 |
# Clean up processed audio file
|
| 365 |
os.remove(processed_audio_path)
|
| 366 |
-
os.remove(wav_path)
|
| 367 |
|
| 368 |
# Return transcription and detected language
|
| 369 |
return f"Detected Language: {detected_language}\n\nTranscription:\n{transcription}"
|
|
|
|
| 149 |
return "Error: No audio file uploaded."
|
| 150 |
|
| 151 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
# Define device and compute type for faster-whisper
|
| 153 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 154 |
compute_type = "float32" if device == "cuda" else "int8"
|
|
|
|
| 155 |
|
| 156 |
# Load the faster-whisper model for language detection
|
| 157 |
model = WhisperModel(MODELS["Faster Whisper Large v3"], device=device, compute_type=compute_type)
|
| 158 |
+
|
| 159 |
+
# Convert audio to 16kHz mono for better compatibility
|
| 160 |
+
audio = AudioSegment.from_file(audio_file)
|
| 161 |
+
audio = audio.set_frame_rate(16000).set_channels(1)
|
| 162 |
+
processed_audio_path = "processed_audio.wav"
|
| 163 |
+
audio.export(processed_audio_path, format="wav")
|
| 164 |
|
| 165 |
# Detect the language using faster-whisper
|
| 166 |
+
segments, info = model.transcribe(processed_audio_path, task="translate", language=None)
|
| 167 |
detected_language_code = info.language
|
|
|
|
| 168 |
|
| 169 |
# Get the full language name from the code
|
| 170 |
detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
|
|
|
|
| 171 |
|
| 172 |
+
# Clean up processed audio file
|
| 173 |
+
os.remove(processed_audio_path)
|
|
|
|
| 174 |
|
| 175 |
return f"Detected Language: {detected_language}"
|
| 176 |
except Exception as e:
|
|
|
|
| 314 |
return "Error: No audio file uploaded."
|
| 315 |
|
| 316 |
try:
|
|
|
|
|
|
|
|
|
|
| 317 |
# Convert audio to 16kHz mono for better compatibility
|
| 318 |
+
audio = AudioSegment.from_file(audio_file)
|
| 319 |
audio = audio.set_frame_rate(16000).set_channels(1)
|
| 320 |
+
processed_audio_path = "processed_audio.wav"
|
| 321 |
+
audio.export(processed_audio_path, format="wav")
|
|
|
|
| 322 |
|
| 323 |
# Load the appropriate model
|
| 324 |
if model_size == "Faster Whisper Large v3":
|
|
|
|
| 356 |
|
| 357 |
# Clean up processed audio file
|
| 358 |
os.remove(processed_audio_path)
|
|
|
|
| 359 |
|
| 360 |
# Return transcription and detected language
|
| 361 |
return f"Detected Language: {detected_language}\n\nTranscription:\n{transcription}"
|