lalaru commited on
Commit
030831c
Β·
verified Β·
1 Parent(s): 545856a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -143
app.py CHANGED
@@ -1,151 +1,103 @@
1
  import gradio as gr
2
- import requests
3
- import json
4
  import os
 
 
 
5
  from transformers import pipeline
6
- from huggingface_hub import login
7
- from groq import Groq # Import the Groq SDK
8
 
9
- # Initialize the Groq client
10
- # The Groq SDK automatically reads the GROQ_API_KEY environment variable.
11
- try:
12
- client = Groq()
13
- except Exception as e:
14
- print(f"Warning: Could not initialize Groq client. Check GROQ_API_KEY. Error: {e}")
15
- client = None
16
-
17
- # Hugging Face token for the ASR model
18
- HF_TOKEN = os.getenv("HF_TOKEN")
19
-
20
- # Log in to Hugging Face Hub for the ASR model
21
- if HF_TOKEN:
22
- login(token=HF_TOKEN, add_to_git_credential=False)
23
- else:
24
- print("Warning: HF_TOKEN not set. ASR model may not load.")
25
-
26
- # Load a speech-to-text model with authentication
27
- try:
28
- asr_pipeline = pipeline(
29
- "automatic-speech-recognition",
30
- model="distil-whisper/distil-small.en",
31
- use_auth_token=HF_TOKEN
32
- )
33
- except Exception as e:
34
- print(f"Warning: Could not load ASR model. Error: {e}")
35
- asr_pipeline = None
36
-
37
- # Prompt template
38
- PROMPT_TEMPLATE = """
39
- You are an AI translation assistant for a real-time universal translator.
40
- Your tasks:
41
- 1. Take the input text in either English or Spanish.
42
- 2. Remove filler words like "um", "uh", "ehhh", "pues", "like", "you know", and stretched words ("soooo", "pizzaaaa").
43
- 3. Correct punctuation and casing.
44
- 4. Translate the cleaned text into the target language (English ↔ Spanish).
45
- 5. Do not change the emotion tag.
46
-
47
- Return only JSON in this exact format:
48
-
49
- {{
50
- "cleaned_text": "<cleaned input text in original language>",
51
- "translated_text": "<translation in target language>",
52
- "emotion": "<given emotion>"
53
- }}
54
-
55
- Input:
56
- {text}
57
- Source language: {source_lang}
58
- Target language: {target_lang}
59
- Emotion: {emotion}
60
- """
61
-
62
- def query_groq(payload):
63
- if not client:
64
- return {"error": "Groq client not initialized. Check GROQ_API_KEY."}
65
-
66
- # Structure messages for Groq's chat completions API
67
- messages = [
68
- {"role": "system", "content": "You are an AI translation assistant for a real-time universal translator that returns JSON."},
69
- {"role": "user", "content": payload["inputs"]},
70
- ]
71
 
72
  try:
73
- chat_completion = client.chat.completions.create(
74
- messages=messages,
75
- model="llama3-8b-8192", # Choose an appropriate Groq model
76
- temperature=0.2,
77
- response_format={"type": "json_object"} # Request JSON output
 
 
78
  )
79
- # Extract the content from the response
80
- return {"generated_text": chat_completion.choices[0].message.content}
81
  except Exception as e:
82
- return {"error": str(e)}
83
-
84
- def translate(text, source_lang, target_lang, emotion):
85
- prompt = PROMPT_TEMPLATE.format(
86
- text=text,
87
- source_lang=source_lang,
88
- target_lang=target_lang,
89
- emotion=emotion
90
- )
91
- payload = {"inputs": prompt}
92
- output = query_groq(payload)
93
-
94
- # Debug check
95
- if "error" in output:
96
- return {
97
- "cleaned_text": text,
98
- "translated_text": "[Error: " + output["error"] + "]",
99
- "emotion": emotion
100
- }
101
-
102
- try:
103
- raw_text = output["generated_text"]
104
- # The response is already in JSON due to response_format
105
- parsed = json.loads(raw_text.strip())
106
- except Exception as e:
107
- parsed = {
108
- "cleaned_text": text,
109
- "translated_text": "[Parsing error: " + str(e) + "]",
110
- "emotion": emotion
111
- }
112
- return parsed
113
-
114
- # Gradio UI with speech input
115
- def gradio_interface(audio, text, source_lang, target_lang, emotion):
116
- # If audio is provided, transcribe it to text
117
- if audio is not None and asr_pipeline is not None:
118
- try:
119
- transcribed_text = asr_pipeline(audio)["text"]
120
- if text:
121
- text = transcribed_text + " " + text
122
- else:
123
- text = transcribed_text
124
- except Exception as e:
125
- return json.dumps({"error": f"Speech-to-text transcription failed: {e}"}, indent=2, ensure_ascii=False)
126
- elif audio is not None and asr_pipeline is None:
127
- return json.dumps({"error": "ASR model could not be loaded. Check HF_TOKEN."}, indent=2, ensure_ascii=False)
128
-
129
- if not text:
130
- return json.dumps({"error": "No input text provided"}, indent=2, ensure_ascii=False)
131
-
132
- result = translate(text, source_lang, target_lang, emotion)
133
- return json.dumps(result, indent=2, ensure_ascii=False)
134
-
135
- iface = gr.Interface(
136
- fn=gradio_interface,
137
- inputs=[
138
- gr.Audio(sources=["microphone"], type="filepath", label="πŸŽ™ Speech Input (or leave empty)"),
139
- gr.Textbox(label="πŸ’¬ Text Input"),
140
- gr.Radio(["en", "es"], label="Source Language"),
141
- gr.Radio(["en", "es"], label="Target Language"),
142
- gr.Radio(["happy", "sad", "angry", "calm", "excited"], label="Emotion")
143
- ],
144
- outputs=gr.Textbox(label="Output JSON"),
145
- title="AI Universal Translator - Translation Module (Groq)",
146
- description="Cleans text or speech, translates EN ↔ ES, and preserves emotions using Groq."
147
- )
148
-
149
- if __name__ == "__main__":
150
- iface.launch()
151
-
 
1
  import gradio as gr
 
 
2
  import os
3
+ import re
4
+ from groq import Groq
5
+ from faster_whisper import WhisperModel
6
  from transformers import pipeline
 
 
7
 
8
+ # =========================
9
+ # CONFIG
10
+ # =========================
11
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY") # set in HuggingFace secrets
12
+ groq_client = Groq(api_key=GROQ_API_KEY)
13
+
14
+ # Whisper ASR model
15
+ whisper_model = WhisperModel("medium")
16
+
17
+ # Hugging Face fallback translation models
18
+ translator_en2es = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")
19
+ translator_es2en = pipeline("translation", model="Helsinki-NLP/opus-mt-es-en")
20
+
21
+ # =========================
22
+ # TEXT CLEANING FUNCTION
23
+ # =========================
24
+ def clean_text(text):
25
+ # Remove filler words
26
+ text = re.sub(r"\b(um+|uh+|erm+|hmm+)\b", "", text, flags=re.IGNORECASE)
27
+ # Normalize spacing
28
+ text = re.sub(r"\s+", " ", text).strip()
29
+ # Capitalize first letter
30
+ if text and not text[0].isupper():
31
+ text = text[0].upper() + text[1:]
32
+ return text
33
+
34
+ # =========================
35
+ # TRANSLATION FUNCTION
36
+ # =========================
37
+ def mistral_translate(text, source_lang, target_lang):
38
+ system_prompt = """
39
+ You are an expert bilingual translator (English ↔ Spanish).
40
+ Translate text accurately while preserving meaning, idioms, and emotional tags (<happy>, <angry>, <calm>).
41
+ Output only the translated text.
42
+ """
43
+
44
+ user_prompt = f"""
45
+ Translate the following text:
46
+ Source Language: {source_lang}
47
+ Target Language: {target_lang}
48
+ Text: "{text}"
49
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  try:
52
+ response = groq_client.chat.completions.create(
53
+ model="mistral-7b-instruct",
54
+ messages=[
55
+ {"role": "system", "content": system_prompt},
56
+ {"role": "user", "content": user_prompt},
57
+ ],
58
+ temperature=0.3,
59
  )
60
+ return response.choices[0].message["content"].strip()
 
61
  except Exception as e:
62
+ print("Groq API failed, switching to OPUS-MT:", e)
63
+ if source_lang.lower().startswith("english"):
64
+ return translator_en2es(text)[0]["translation_text"]
65
+ else:
66
+ return translator_es2en(text)[0]["translation_text"]
67
+
68
+ # =========================
69
+ # MAIN PIPELINE
70
+ # =========================
71
+ def translate_speech(audio, source_lang="English", target_lang="Spanish"):
72
+ # Step 1: Speech β†’ Text
73
+ segments, _ = whisper_model.transcribe(audio, beam_size=5)
74
+ asr_text = " ".join([seg.text for seg in segments])
75
+ asr_text = clean_text(asr_text)
76
+
77
+ # Step 2: Translate Text
78
+ translated_text = mistral_translate(asr_text, source_lang, target_lang)
79
+
80
+ return {
81
+ "original_text": asr_text,
82
+ "translated_text": translated_text
83
+ }
84
+
85
+ # =========================
86
+ # GRADIO UI
87
+ # =========================
88
+ with gr.Blocks() as demo:
89
+ gr.Markdown("# πŸŽ™οΈ AI Universal Translator (EN ↔ ES)")
90
+ gr.Markdown("Speak in English or Spanish, and get real-time translated speech + text.")
91
+
92
+ with gr.Row():
93
+ source_lang = gr.Dropdown(["English", "Spanish"], value="English", label="Source Language")
94
+ target_lang = gr.Dropdown(["Spanish", "English"], value="Spanish", label="Target Language")
95
+
96
+ with gr.Row():
97
+ audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎀 Speak Here")
98
+ output_text = gr.JSON(label="Translation Result")
99
+
100
+ btn = gr.Button("Translate")
101
+ btn.click(translate_speech, inputs=[audio_in, source_lang, target_lang], outputs=[output_text])
102
+
103
+ demo.launch()