lalaru's picture
Update app.py
030831c verified
raw
history blame
3.5 kB
import gradio as gr
import os
import re
from groq import Groq
from faster_whisper import WhisperModel
from transformers import pipeline
# =========================
# CONFIG
# =========================
GROQ_API_KEY = os.getenv("GROQ_API_KEY") # set in HuggingFace secrets
groq_client = Groq(api_key=GROQ_API_KEY)
# Whisper ASR model
whisper_model = WhisperModel("medium")
# Hugging Face fallback translation models
translator_en2es = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")
translator_es2en = pipeline("translation", model="Helsinki-NLP/opus-mt-es-en")
# =========================
# TEXT CLEANING FUNCTION
# =========================
def clean_text(text):
# Remove filler words
text = re.sub(r"\b(um+|uh+|erm+|hmm+)\b", "", text, flags=re.IGNORECASE)
# Normalize spacing
text = re.sub(r"\s+", " ", text).strip()
# Capitalize first letter
if text and not text[0].isupper():
text = text[0].upper() + text[1:]
return text
# =========================
# TRANSLATION FUNCTION
# =========================
def mistral_translate(text, source_lang, target_lang):
system_prompt = """
You are an expert bilingual translator (English ↔ Spanish).
Translate text accurately while preserving meaning, idioms, and emotional tags (<happy>, <angry>, <calm>).
Output only the translated text.
"""
user_prompt = f"""
Translate the following text:
Source Language: {source_lang}
Target Language: {target_lang}
Text: "{text}"
"""
try:
response = groq_client.chat.completions.create(
model="mistral-7b-instruct",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=0.3,
)
return response.choices[0].message["content"].strip()
except Exception as e:
print("Groq API failed, switching to OPUS-MT:", e)
if source_lang.lower().startswith("english"):
return translator_en2es(text)[0]["translation_text"]
else:
return translator_es2en(text)[0]["translation_text"]
# =========================
# MAIN PIPELINE
# =========================
def translate_speech(audio, source_lang="English", target_lang="Spanish"):
# Step 1: Speech β†’ Text
segments, _ = whisper_model.transcribe(audio, beam_size=5)
asr_text = " ".join([seg.text for seg in segments])
asr_text = clean_text(asr_text)
# Step 2: Translate Text
translated_text = mistral_translate(asr_text, source_lang, target_lang)
return {
"original_text": asr_text,
"translated_text": translated_text
}
# =========================
# GRADIO UI
# =========================
with gr.Blocks() as demo:
gr.Markdown("# πŸŽ™οΈ AI Universal Translator (EN ↔ ES)")
gr.Markdown("Speak in English or Spanish, and get real-time translated speech + text.")
with gr.Row():
source_lang = gr.Dropdown(["English", "Spanish"], value="English", label="Source Language")
target_lang = gr.Dropdown(["Spanish", "English"], value="Spanish", label="Target Language")
with gr.Row():
audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎀 Speak Here")
output_text = gr.JSON(label="Translation Result")
btn = gr.Button("Translate")
btn.click(translate_speech, inputs=[audio_in, source_lang, target_lang], outputs=[output_text])
demo.launch()