|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import pipeline |
|
|
from pydub import AudioSegment, effects, silence |
|
|
import os |
|
|
from langdetect import detect |
|
|
from langdetect.lang_detect_exception import LangDetectException |
|
|
|
|
|
|
|
|
|
|
|
LANG_MODEL_NAME = "openai/whisper-base" |
|
|
|
|
|
device = 0 if torch.cuda.is_available() else "cpu" |
|
|
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
|
|
|
|
|
|
lang_pipe = pipeline( |
|
|
"automatic-speech-recognition", |
|
|
model=LANG_MODEL_NAME, |
|
|
torch_dtype=torch_dtype, |
|
|
device=device, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def convert_to_wav(audio_path): |
|
|
"""Converte qualquer arquivo de áudio para WAV mono 16 kHz.""" |
|
|
try: |
|
|
audio = AudioSegment.from_file(audio_path) |
|
|
audio = audio.set_channels(1) |
|
|
audio = audio.set_frame_rate(16000) |
|
|
wav_path = os.path.splitext(audio_path)[0] + ".wav" |
|
|
audio.export(wav_path, format="wav") |
|
|
return wav_path |
|
|
except Exception as e: |
|
|
print(f"Erro ao converter para WAV: {e}") |
|
|
return None |
|
|
|
|
|
def make_speech_head_wav(input_wav_path, max_seconds=7): |
|
|
""" |
|
|
Version simplifiée et robuste : prend les premiers max_seconds |
|
|
après suppression du silence initial (avec protection contre les boucles). |
|
|
""" |
|
|
try: |
|
|
audio = AudioSegment.from_wav(input_wav_path) |
|
|
|
|
|
|
|
|
if len(audio) <= max_seconds * 1000: |
|
|
return input_wav_path |
|
|
|
|
|
|
|
|
normalized = effects.normalize(audio) |
|
|
|
|
|
|
|
|
try: |
|
|
silence_thresh = normalized.dBFS - 20 |
|
|
|
|
|
search_audio = normalized[:30000] |
|
|
start_trim = silence.detect_leading_silence( |
|
|
search_audio, |
|
|
silence_threshold=silence_thresh, |
|
|
chunk_size=100 |
|
|
) |
|
|
|
|
|
start_trim = min(start_trim, 15000) |
|
|
trimmed = normalized[start_trim:] |
|
|
except: |
|
|
|
|
|
trimmed = normalized |
|
|
|
|
|
|
|
|
if len(trimmed) < 2000: |
|
|
trimmed = normalized |
|
|
|
|
|
|
|
|
clip = trimmed[:max_seconds * 1000] |
|
|
|
|
|
short_path = os.path.splitext(input_wav_path)[0] + f"_head_{max_seconds}s.wav" |
|
|
clip.export(short_path, format="wav") |
|
|
return short_path |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Erro ao criar o trecho: {e}") |
|
|
|
|
|
return input_wav_path |
|
|
|
|
|
def detect_language_on_upload(filepath): |
|
|
""" |
|
|
Détection rapide et robuste de langue avec timeout et fallbacks. |
|
|
""" |
|
|
if filepath is None: |
|
|
return "auto" |
|
|
|
|
|
try: |
|
|
print(f"Début détection langue pour: {filepath}") |
|
|
|
|
|
wav_filepath = convert_to_wav(filepath) |
|
|
if not wav_filepath: |
|
|
print("Échec conversion WAV") |
|
|
return "auto" |
|
|
|
|
|
|
|
|
short_wav = make_speech_head_wav(wav_filepath, max_seconds=7) |
|
|
if not short_wav: |
|
|
short_wav = wav_filepath |
|
|
|
|
|
print(f"Analyse du fichier: {short_wav}") |
|
|
|
|
|
|
|
|
outputs = lang_pipe( |
|
|
short_wav, |
|
|
chunk_length_s=5, |
|
|
return_timestamps=False, |
|
|
generate_kwargs={"max_new_tokens": 50} |
|
|
) |
|
|
|
|
|
transcribed_text = outputs.get("text", "").strip() |
|
|
print(f"Texte transcrit: {transcribed_text[:100]}...") |
|
|
|
|
|
|
|
|
whisper_lang = outputs.get("language") |
|
|
if whisper_lang and isinstance(whisper_lang, str) and len(whisper_lang) <= 5: |
|
|
print(f"Langue Whisper détectée: {whisper_lang}") |
|
|
return whisper_lang |
|
|
|
|
|
|
|
|
if len(transcribed_text) < 10: |
|
|
print("Texte trop court, retour auto") |
|
|
return "auto" |
|
|
|
|
|
|
|
|
detected_lang = detect(transcribed_text) |
|
|
print(f"Langue LangDetect: {detected_lang}") |
|
|
|
|
|
|
|
|
lang_mapping = { |
|
|
'fr': 'fr', 'en': 'en', 'es': 'es', 'de': 'de', 'it': 'it', |
|
|
'pt': 'pt', 'nl': 'nl', 'pl': 'pl', 'ru': 'ru', 'ja': 'ja', |
|
|
'ko': 'ko', 'zh-cn': 'zh', 'zh': 'zh' |
|
|
} |
|
|
|
|
|
result = lang_mapping.get(detected_lang, "auto") |
|
|
print(f"Résultat final: {result}") |
|
|
return result |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Erreur détection langue: {e}") |
|
|
return "auto" |
|
|
|
|
|
def ensure_mp3_same_name_as_input(input_path, source_wav_path): |
|
|
""" |
|
|
Cria um arquivo MP3 com o mesmo nome base do arquivo de entrada. |
|
|
""" |
|
|
try: |
|
|
base, _ = os.path.splitext(os.path.basename(input_path)) |
|
|
mp3_path = f"{base}.mp3" |
|
|
audio = AudioSegment.from_wav(source_wav_path) |
|
|
audio.export(mp3_path, format="mp3", bitrate="192k") |
|
|
return mp3_path |
|
|
except Exception as e: |
|
|
print(f"Erro ao exportar MP3: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
def make_output_mp3(filepath, language_choice): |
|
|
""" |
|
|
Conversion audio vers MP3 avec détection de langue optimisée. |
|
|
""" |
|
|
if filepath is None: |
|
|
return None, None, "" |
|
|
|
|
|
wav_filepath = convert_to_wav(filepath) |
|
|
if not wav_filepath: |
|
|
return None, None, "" |
|
|
|
|
|
mp3_path = ensure_mp3_same_name_as_input(filepath, wav_filepath) |
|
|
|
|
|
|
|
|
if language_choice == "auto": |
|
|
language_info = "Langue détectée automatiquement" |
|
|
else: |
|
|
language_info = f"Langue détectée: {language_choice}" |
|
|
|
|
|
return mp3_path, mp3_path, language_info |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.HTML("<div style='text-align:center;'><h1>Conversion audio vers format MP3</h1></div>") |
|
|
gr.Markdown("Uploadez un fichier audio. La sortie sera toujours un .mp3 avec le même nom de base, écoutable en ligne et téléchargeable.") |
|
|
|
|
|
gr.Markdown(""" |
|
|
## ⚡ **Version optimisée** |
|
|
- **Détection rapide** : Analyse les 7 premières secondes (hors silence initial) |
|
|
- **Robuste** : Fonctionne avec tous types de fichiers |
|
|
- **Timeout protection** : Évite les blocages |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
audio_input = gr.Audio(type="filepath", label="Envoyer un fichier audio") |
|
|
language_dropdown = gr.Dropdown( |
|
|
choices=["auto", "fr", "en", "es", "de", "it", "pt", "nl", "pl", "ru", "ja", "ko", "zh"], |
|
|
value="auto", |
|
|
label="Langue (auto = détection automatique)", |
|
|
info="Détection automatique rapide après upload" |
|
|
) |
|
|
submit_btn = gr.Button("Générer MP3", variant="primary") |
|
|
reset_btn = gr.Button("Reset", variant="secondary") |
|
|
with gr.Column(): |
|
|
language_info_output = gr.Textbox(label="Information sur la langue", lines=1) |
|
|
mp3_download = gr.File(label="Télécharger la sortie (.mp3)") |
|
|
mp3_playback = gr.Audio(label="Écouter la sortie (.mp3)", type="filepath") |
|
|
|
|
|
|
|
|
audio_input.change( |
|
|
fn=detect_language_on_upload, |
|
|
inputs=audio_input, |
|
|
outputs=language_dropdown |
|
|
) |
|
|
|
|
|
submit_btn.click( |
|
|
fn=make_output_mp3, |
|
|
inputs=[audio_input, language_dropdown], |
|
|
outputs=[mp3_download, mp3_playback, language_info_output] |
|
|
) |
|
|
|
|
|
def reset_fields(): |
|
|
return None, None, "auto", "" |
|
|
|
|
|
reset_btn.click( |
|
|
fn=reset_fields, |
|
|
inputs=[], |
|
|
outputs=[audio_input, mp3_download, language_dropdown, language_info_output] |
|
|
) |
|
|
|
|
|
demo.launch(share=True) |