Update app.py
Browse files
app.py
CHANGED
|
@@ -10,11 +10,11 @@ from transformers import logging
|
|
| 10 |
import math
|
| 11 |
import json
|
| 12 |
|
| 13 |
-
#
|
| 14 |
warnings.filterwarnings("ignore")
|
| 15 |
logging.set_verbosity_error()
|
| 16 |
|
| 17 |
-
#
|
| 18 |
MODELS = {
|
| 19 |
"es": [
|
| 20 |
"openai/whisper-large-v3",
|
|
@@ -29,13 +29,13 @@ MODELS = {
|
|
| 29 |
"pt": [
|
| 30 |
"facebook/wav2vec2-large-xlsr-53-portuguese",
|
| 31 |
"openai/whisper-medium",
|
| 32 |
-
"jonatasgrosman/wav2vec2-
|
| 33 |
]
|
| 34 |
}
|
| 35 |
|
| 36 |
def convert_audio_to_wav(audio_path):
|
| 37 |
if os.path.isdir(audio_path):
|
| 38 |
-
raise ValueError(f"
|
| 39 |
wav_path = "converted_audio.wav"
|
| 40 |
command = ["ffmpeg", "-i", audio_path, "-ac", "1", "-ar", "16000", wav_path]
|
| 41 |
subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
|
|
@@ -45,7 +45,7 @@ def detect_language(audio_path):
|
|
| 45 |
try:
|
| 46 |
speech, _ = librosa.load(audio_path, sr=16000, duration=30)
|
| 47 |
except Exception as e:
|
| 48 |
-
raise ValueError(f"Error
|
| 49 |
|
| 50 |
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
|
| 51 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
|
|
@@ -75,7 +75,7 @@ def transcribe_audio_stream(audio, model_name):
|
|
| 75 |
processor = WhisperProcessor.from_pretrained(model_name)
|
| 76 |
model = WhisperForConditionalGeneration.from_pretrained(model_name)
|
| 77 |
|
| 78 |
-
chunk_duration = 30 #
|
| 79 |
|
| 80 |
for i in range(0, int(duration), chunk_duration):
|
| 81 |
end = min(i + chunk_duration, duration)
|
|
@@ -95,7 +95,7 @@ def transcribe_audio_stream(audio, model_name):
|
|
| 95 |
else:
|
| 96 |
transcriber = pipeline("automatic-speech-recognition", model=model_name)
|
| 97 |
|
| 98 |
-
chunk_duration = 10 #
|
| 99 |
|
| 100 |
for i in range(0, int(duration), chunk_duration):
|
| 101 |
end = min(i + chunk_duration, duration)
|
|
@@ -133,7 +133,8 @@ def combined_interface(audio, file_format):
|
|
| 133 |
language, model_options = detect_and_select_model(audio)
|
| 134 |
selected_model = model_options[0]
|
| 135 |
|
| 136 |
-
yield
|
|
|
|
| 137 |
|
| 138 |
transcriptions = []
|
| 139 |
for partial_transcriptions, progress in transcribe_audio_stream(audio, selected_model):
|
|
@@ -141,17 +142,20 @@ def combined_interface(audio, file_format):
|
|
| 141 |
full_transcription = " ".join([t["text"] for t in transcriptions])
|
| 142 |
progress_int = math.floor(progress)
|
| 143 |
status = f"Transcribing... {progress_int}% complete"
|
| 144 |
-
|
|
|
|
| 145 |
|
| 146 |
-
#
|
| 147 |
file_path = save_transcription(transcriptions, file_format)
|
| 148 |
|
| 149 |
-
#
|
| 150 |
os.remove("converted_audio.wav")
|
| 151 |
|
|
|
|
| 152 |
yield language, model_options, selected_model, full_transcription.strip(), 100, f"Transcription complete! Download {file_path}", file_path
|
| 153 |
|
| 154 |
except Exception as e:
|
|
|
|
| 155 |
yield str(e), [], "", "An error occurred during processing.", 0, "Error", ""
|
| 156 |
|
| 157 |
iface = gr.Interface(
|
|
@@ -175,4 +179,4 @@ iface = gr.Interface(
|
|
| 175 |
)
|
| 176 |
|
| 177 |
if __name__ == "__main__":
|
| 178 |
-
iface.queue().launch()
|
|
|
|
| 10 |
import math
|
| 11 |
import json
|
| 12 |
|
| 13 |
+
# Suprimir advertencias
|
| 14 |
warnings.filterwarnings("ignore")
|
| 15 |
logging.set_verbosity_error()
|
| 16 |
|
| 17 |
+
# Modelos actualizados por idioma
|
| 18 |
MODELS = {
|
| 19 |
"es": [
|
| 20 |
"openai/whisper-large-v3",
|
|
|
|
| 29 |
"pt": [
|
| 30 |
"facebook/wav2vec2-large-xlsr-53-portuguese",
|
| 31 |
"openai/whisper-medium",
|
| 32 |
+
"jonatasgrosman/wav2vec2-xlsr-53-portuguese"
|
| 33 |
]
|
| 34 |
}
|
| 35 |
|
| 36 |
def convert_audio_to_wav(audio_path):
|
| 37 |
if os.path.isdir(audio_path):
|
| 38 |
+
raise ValueError(f"La ruta proporcionada es un directorio, no un archivo: {audio_path}")
|
| 39 |
wav_path = "converted_audio.wav"
|
| 40 |
command = ["ffmpeg", "-i", audio_path, "-ac", "1", "-ar", "16000", wav_path]
|
| 41 |
subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
|
|
|
|
| 45 |
try:
|
| 46 |
speech, _ = librosa.load(audio_path, sr=16000, duration=30)
|
| 47 |
except Exception as e:
|
| 48 |
+
raise ValueError(f"Error al cargar el archivo de audio con librosa: {e}")
|
| 49 |
|
| 50 |
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
|
| 51 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
|
|
|
|
| 75 |
processor = WhisperProcessor.from_pretrained(model_name)
|
| 76 |
model = WhisperForConditionalGeneration.from_pretrained(model_name)
|
| 77 |
|
| 78 |
+
chunk_duration = 30 # segundos
|
| 79 |
|
| 80 |
for i in range(0, int(duration), chunk_duration):
|
| 81 |
end = min(i + chunk_duration, duration)
|
|
|
|
| 95 |
else:
|
| 96 |
transcriber = pipeline("automatic-speech-recognition", model=model_name)
|
| 97 |
|
| 98 |
+
chunk_duration = 10 # segundos
|
| 99 |
|
| 100 |
for i in range(0, int(duration), chunk_duration):
|
| 101 |
end = min(i + chunk_duration, duration)
|
|
|
|
| 133 |
language, model_options = detect_and_select_model(audio)
|
| 134 |
selected_model = model_options[0]
|
| 135 |
|
| 136 |
+
# Primer yield: A帽adir None para la s茅ptima salida (Archivo de Descarga)
|
| 137 |
+
yield language, model_options, selected_model, "", 0, "Initializing...", None
|
| 138 |
|
| 139 |
transcriptions = []
|
| 140 |
for partial_transcriptions, progress in transcribe_audio_stream(audio, selected_model):
|
|
|
|
| 142 |
full_transcription = " ".join([t["text"] for t in transcriptions])
|
| 143 |
progress_int = math.floor(progress)
|
| 144 |
status = f"Transcribing... {progress_int}% complete"
|
| 145 |
+
# Yield con None para el archivo de descarga hasta que est茅 completo
|
| 146 |
+
yield language, model_options, selected_model, full_transcription.strip(), progress_int, status, None
|
| 147 |
|
| 148 |
+
# Guardar transcripci贸n
|
| 149 |
file_path = save_transcription(transcriptions, file_format)
|
| 150 |
|
| 151 |
+
# Limpiar archivos temporales
|
| 152 |
os.remove("converted_audio.wav")
|
| 153 |
|
| 154 |
+
# Yield final con el archivo de descarga
|
| 155 |
yield language, model_options, selected_model, full_transcription.strip(), 100, f"Transcription complete! Download {file_path}", file_path
|
| 156 |
|
| 157 |
except Exception as e:
|
| 158 |
+
# Asegurarse de que el yield de error tambi茅n devuelva 7 valores
|
| 159 |
yield str(e), [], "", "An error occurred during processing.", 0, "Error", ""
|
| 160 |
|
| 161 |
iface = gr.Interface(
|
|
|
|
| 179 |
)
|
| 180 |
|
| 181 |
if __name__ == "__main__":
|
| 182 |
+
iface.queue().launch()
|