Spaces:

lalaru
/

Translation-code

Sleeping

App Files Files Community

lalaru commited on Sep 10, 2025

Commit

030831c

verified ·

1 Parent(s): 545856a

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -143

app.py CHANGED Viewed

@@ -1,151 +1,103 @@
 import gradio as gr
-import requests
-import json
 import os
 from transformers import pipeline
-from huggingface_hub import login
-from groq import Groq  # Import the Groq SDK
-# Initialize the Groq client
-# The Groq SDK automatically reads the GROQ_API_KEY environment variable.
-try:
-    client = Groq()
-except Exception as e:
-    print(f"Warning: Could not initialize Groq client. Check GROQ_API_KEY. Error: {e}")
-    client = None
-# Hugging Face token for the ASR model
-HF_TOKEN = os.getenv("HF_TOKEN")
-# Log in to Hugging Face Hub for the ASR model
-if HF_TOKEN:
-    login(token=HF_TOKEN, add_to_git_credential=False)
-else:
-    print("Warning: HF_TOKEN not set. ASR model may not load.")
-# Load a speech-to-text model with authentication
-try:
-    asr_pipeline = pipeline(
-        "automatic-speech-recognition",
-        model="distil-whisper/distil-small.en",
-        use_auth_token=HF_TOKEN
-    )
-except Exception as e:
-    print(f"Warning: Could not load ASR model. Error: {e}")
-    asr_pipeline = None
-# Prompt template
-PROMPT_TEMPLATE = """
-You are an AI translation assistant for a real-time universal translator.
-Your tasks:
-1. Take the input text in either English or Spanish.
-2. Remove filler words like "um", "uh", "ehhh", "pues", "like", "you know", and stretched words ("soooo", "pizzaaaa").
-3. Correct punctuation and casing.
-4. Translate the cleaned text into the target language (English ↔ Spanish).
-5. Do not change the emotion tag.
-Return only JSON in this exact format:
-{{
-  "cleaned_text": "<cleaned input text in original language>",
-  "translated_text": "<translation in target language>",
-  "emotion": "<given emotion>"
-}}
-Input:
-{text}
-Source language: {source_lang}
-Target language: {target_lang}
-Emotion: {emotion}
-"""
-def query_groq(payload):
-    if not client:
-        return {"error": "Groq client not initialized. Check GROQ_API_KEY."}
-    # Structure messages for Groq's chat completions API
-    messages = [
-        {"role": "system", "content": "You are an AI translation assistant for a real-time universal translator that returns JSON."},
-        {"role": "user", "content": payload["inputs"]},
-    ]
     try:
-        chat_completion = client.chat.completions.create(
-            messages=messages,
-            model="llama3-8b-8192",  # Choose an appropriate Groq model
-            temperature=0.2,
-            response_format={"type": "json_object"}  # Request JSON output
         )
-        # Extract the content from the response
-        return {"generated_text": chat_completion.choices[0].message.content}
     except Exception as e:
-        return {"error": str(e)}
-def translate(text, source_lang, target_lang, emotion):
-    prompt = PROMPT_TEMPLATE.format(
-        text=text,
-        source_lang=source_lang,
-        target_lang=target_lang,
-        emotion=emotion
-    )
-    payload = {"inputs": prompt}
-    output = query_groq(payload)
-    # Debug check
-    if "error" in output:
-        return {
-            "cleaned_text": text,
-            "translated_text": "[Error: " + output["error"] + "]",
-            "emotion": emotion
-        }
-    try:
-        raw_text = output["generated_text"]
-        # The response is already in JSON due to response_format
-        parsed = json.loads(raw_text.strip())
-    except Exception as e:
-        parsed = {
-            "cleaned_text": text,
-            "translated_text": "[Parsing error: " + str(e) + "]",
-            "emotion": emotion
-        }
-    return parsed
-# Gradio UI with speech input
-def gradio_interface(audio, text, source_lang, target_lang, emotion):
-    # If audio is provided, transcribe it to text
-    if audio is not None and asr_pipeline is not None:
-        try:
-            transcribed_text = asr_pipeline(audio)["text"]
-            if text:
-                text = transcribed_text + " " + text
-            else:
-                text = transcribed_text
-        except Exception as e:
-            return json.dumps({"error": f"Speech-to-text transcription failed: {e}"}, indent=2, ensure_ascii=False)
-    elif audio is not None and asr_pipeline is None:
-        return json.dumps({"error": "ASR model could not be loaded. Check HF_TOKEN."}, indent=2, ensure_ascii=False)
-    if not text:
-        return json.dumps({"error": "No input text provided"}, indent=2, ensure_ascii=False)
-    result = translate(text, source_lang, target_lang, emotion)
-    return json.dumps(result, indent=2, ensure_ascii=False)
-iface = gr.Interface(
-    fn=gradio_interface,
-    inputs=[
-        gr.Audio(sources=["microphone"], type="filepath", label="🎙 Speech Input (or leave empty)"),
-        gr.Textbox(label="💬 Text Input"),
-        gr.Radio(["en", "es"], label="Source Language"),
-        gr.Radio(["en", "es"], label="Target Language"),
-        gr.Radio(["happy", "sad", "angry", "calm", "excited"], label="Emotion")
-    ],
-    outputs=gr.Textbox(label="Output JSON"),
-    title="AI Universal Translator - Translation Module (Groq)",
-    description="Cleans text or speech, translates EN ↔ ES, and preserves emotions using Groq."
-)
-if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
 import os
+import re
+from groq import Groq
+from faster_whisper import WhisperModel
 from transformers import pipeline
+# =========================
+# CONFIG
+# =========================
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")  # set in HuggingFace secrets
+groq_client = Groq(api_key=GROQ_API_KEY)
+# Whisper ASR model
+whisper_model = WhisperModel("medium")
+# Hugging Face fallback translation models
+translator_en2es = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")
+translator_es2en = pipeline("translation", model="Helsinki-NLP/opus-mt-es-en")
+# =========================
+# TEXT CLEANING FUNCTION
+# =========================
+def clean_text(text):
+    # Remove filler words
+    text = re.sub(r"\b(um+|uh+|erm+|hmm+)\b", "", text, flags=re.IGNORECASE)
+    # Normalize spacing
+    text = re.sub(r"\s+", " ", text).strip()
+    # Capitalize first letter
+    if text and not text[0].isupper():
+        text = text[0].upper() + text[1:]
+    return text
+# =========================
+# TRANSLATION FUNCTION
+# =========================
+def mistral_translate(text, source_lang, target_lang):
+    system_prompt = """
+    You are an expert bilingual translator (English ↔ Spanish).
+    Translate text accurately while preserving meaning, idioms, and emotional tags (<happy>, <angry>, <calm>).
+    Output only the translated text.
+    """
+    user_prompt = f"""
+    Translate the following text:
+    Source Language: {source_lang}
+    Target Language: {target_lang}
+    Text: "{text}"
+    """
     try:
+        response = groq_client.chat.completions.create(
+            model="mistral-7b-instruct",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            temperature=0.3,
         )
+        return response.choices[0].message["content"].strip()
     except Exception as e:
+        print("Groq API failed, switching to OPUS-MT:", e)
+        if source_lang.lower().startswith("english"):
+            return translator_en2es(text)[0]["translation_text"]
+        else:
+            return translator_es2en(text)[0]["translation_text"]
+# =========================
+# MAIN PIPELINE
+# =========================
+def translate_speech(audio, source_lang="English", target_lang="Spanish"):
+    # Step 1: Speech → Text
+    segments, _ = whisper_model.transcribe(audio, beam_size=5)
+    asr_text = " ".join([seg.text for seg in segments])
+    asr_text = clean_text(asr_text)
+    # Step 2: Translate Text
+    translated_text = mistral_translate(asr_text, source_lang, target_lang)
+    return {
+        "original_text": asr_text,
+        "translated_text": translated_text
+    }
+# =========================
+# GRADIO UI
+# =========================
+with gr.Blocks() as demo:
+    gr.Markdown("# 🎙️ AI Universal Translator (EN ↔ ES)")
+    gr.Markdown("Speak in English or Spanish, and get real-time translated speech + text.")
+    with gr.Row():
+        source_lang = gr.Dropdown(["English", "Spanish"], value="English", label="Source Language")
+        target_lang = gr.Dropdown(["Spanish", "English"], value="Spanish", label="Target Language")
+    with gr.Row():
+        audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak Here")
+        output_text = gr.JSON(label="Translation Result")
+    btn = gr.Button("Translate")
+    btn.click(translate_speech, inputs=[audio_in, source_lang, target_lang], outputs=[output_text])
+demo.launch()