Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import re | |
| from groq import Groq | |
| from faster_whisper import WhisperModel | |
| from transformers import pipeline | |
| # ========================= | |
| # CONFIG | |
| # ========================= | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") # set in HuggingFace secrets | |
| groq_client = Groq(api_key=GROQ_API_KEY) | |
| # Whisper ASR model | |
| whisper_model = WhisperModel("medium") | |
| # Hugging Face fallback translation models | |
| translator_en2es = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es") | |
| translator_es2en = pipeline("translation", model="Helsinki-NLP/opus-mt-es-en") | |
| # ========================= | |
| # TEXT CLEANING FUNCTION | |
| # ========================= | |
| def clean_text(text): | |
| # Remove filler words | |
| text = re.sub(r"\b(um+|uh+|erm+|hmm+)\b", "", text, flags=re.IGNORECASE) | |
| # Normalize spacing | |
| text = re.sub(r"\s+", " ", text).strip() | |
| # Capitalize first letter | |
| if text and not text[0].isupper(): | |
| text = text[0].upper() + text[1:] | |
| return text | |
| # ========================= | |
| # TRANSLATION FUNCTION | |
| # ========================= | |
| def mistral_translate(text, source_lang, target_lang): | |
| system_prompt = """ | |
| You are an expert bilingual translator (English β Spanish). | |
| Translate text accurately while preserving meaning, idioms, and emotional tags (<happy>, <angry>, <calm>). | |
| Output only the translated text. | |
| """ | |
| user_prompt = f""" | |
| Translate the following text: | |
| Source Language: {source_lang} | |
| Target Language: {target_lang} | |
| Text: "{text}" | |
| """ | |
| try: | |
| response = groq_client.chat.completions.create( | |
| model="mistral-7b-instruct", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ], | |
| temperature=0.3, | |
| ) | |
| return response.choices[0].message["content"].strip() | |
| except Exception as e: | |
| print("Groq API failed, switching to OPUS-MT:", e) | |
| if source_lang.lower().startswith("english"): | |
| return translator_en2es(text)[0]["translation_text"] | |
| else: | |
| return translator_es2en(text)[0]["translation_text"] | |
| # ========================= | |
| # MAIN PIPELINE | |
| # ========================= | |
| def translate_speech(audio, source_lang="English", target_lang="Spanish"): | |
| # Step 1: Speech β Text | |
| segments, _ = whisper_model.transcribe(audio, beam_size=5) | |
| asr_text = " ".join([seg.text for seg in segments]) | |
| asr_text = clean_text(asr_text) | |
| # Step 2: Translate Text | |
| translated_text = mistral_translate(asr_text, source_lang, target_lang) | |
| return { | |
| "original_text": asr_text, | |
| "translated_text": translated_text | |
| } | |
| # ========================= | |
| # GRADIO UI | |
| # ========================= | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# ποΈ AI Universal Translator (EN β ES)") | |
| gr.Markdown("Speak in English or Spanish, and get real-time translated speech + text.") | |
| with gr.Row(): | |
| source_lang = gr.Dropdown(["English", "Spanish"], value="English", label="Source Language") | |
| target_lang = gr.Dropdown(["Spanish", "English"], value="Spanish", label="Target Language") | |
| with gr.Row(): | |
| audio_in = gr.Audio(sources=["microphone"], type="filepath", label="π€ Speak Here") | |
| output_text = gr.JSON(label="Translation Result") | |
| btn = gr.Button("Translate") | |
| btn.click(translate_speech, inputs=[audio_in, source_lang, target_lang], outputs=[output_text]) | |
| demo.launch() | |