lalaru commited on
Commit
ba67b2a
Β·
verified Β·
1 Parent(s): fd505dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -72
app.py CHANGED
@@ -5,18 +5,21 @@ import requests
5
  import gradio as gr
6
  from langdetect import detect, LangDetectException
7
 
8
- # Groq SDK (install in requirements.txt)
 
 
 
9
  try:
10
  from groq import Groq
11
  except Exception:
12
  Groq = None
13
 
14
- # Config (set these in your Space secrets)
15
- GROQ_API_KEY = os.getenv("GROQ_API_KEY") # your Groq key
16
  GROQ_MODEL = os.getenv("GROQ_MODEL", "mixtral-8x7b-32768")
17
- HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") # hf_... token (optional but recommended)
18
 
19
- # Init clients (if keys provided)
20
  groq_client = None
21
  if GROQ_API_KEY and Groq is not None:
22
  try:
@@ -24,95 +27,62 @@ if GROQ_API_KEY and Groq is not None:
24
  except Exception as e:
25
  print("Groq client init failed:", repr(e))
26
 
27
- # Simple mapping for UI -> code
 
 
 
 
 
28
  LANG_UI_TO_CODE = {"English": "en", "Spanish": "es", "French": "fr"}
29
 
30
  SYSTEM_PROMPT = """
31
  You are a multilingual translation assistant.
32
- Supported languages: English, Spanish, French.
33
-
34
  Task:
35
  1. Detect the input language automatically.
36
- 2. Translate the input into the exact target language requested by the user.
37
  3. Preserve meaning, tone, and formatting.
38
- 4. Keep numbers, symbols, names and special characters unchanged.
39
  5. If the input is already in the target language, return it unchanged.
40
- 6. Output ONLY the translated text, no commentary.
41
  """
42
 
43
  def call_groq(user_text, target_lang_ui):
44
  if not groq_client:
45
  raise RuntimeError("Groq client not configured")
46
- # Put the target explicitly for determinism
47
  messages = [
48
  {"role": "system", "content": SYSTEM_PROMPT},
49
  {"role": "user", "content": f"Target language: {target_lang_ui}\n\n{user_text}"},
50
  ]
51
- # Best-effort: extract content from different response shapes
52
  chat = groq_client.chat.completions.create(
53
  model=GROQ_MODEL,
54
  messages=messages,
55
  temperature=0,
56
  max_tokens=2048,
57
  )
58
- # SDK usually returns .choices[0].message.content
59
  try:
60
  return chat.choices[0].message.content.strip()
61
  except Exception:
62
- # fallback for dict-like response
63
  try:
64
  return chat["choices"][0]["message"]["content"].strip()
65
  except Exception as e:
66
- print("Unexpected Groq response shape:", repr(e))
67
  raise
68
 
69
- def call_hf_opus(user_text, src_code, tgt_code):
70
- # map most common pairs to explicit model ids
71
- model_map = {
72
- ("en","es"): "Helsinki-NLP/opus-mt-en-es",
73
- ("es","en"): "Helsinki-NLP/opus-mt-es-en",
74
- ("en","fr"): "Helsinki-NLP/opus-mt-en-fr",
75
- ("fr","en"): "Helsinki-NLP/opus-mt-fr-en",
76
- ("es","fr"): "Helsinki-NLP/opus-mt-es-fr",
77
- ("fr","es"): "Helsinki-NLP/opus-mt-fr-es",
78
- }
79
- model_id = model_map.get((src_code, tgt_code)) or f"Helsinki-NLP/opus-mt-{src_code}-{tgt_code}"
80
-
81
- url = f"https://api-inference.huggingface.co/models/{model_id}"
82
- headers = {"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"} if HUGGINGFACE_TOKEN else {}
83
- payload = {"inputs": user_text}
84
-
85
- resp = requests.post(url, headers=headers, json=payload, timeout=30)
86
- if resp.status_code != 200:
87
- raise RuntimeError(f"Hugging Face fallback failed: {resp.status_code} {resp.text}")
88
-
89
- data = resp.json()
90
- # handle common response shapes:
91
- if isinstance(data, list) and len(data) > 0:
92
- first = data[0]
93
- if isinstance(first, dict):
94
- # prefer common keys
95
- for k in ("translation_text", "generated_text", "text"):
96
- if k in first:
97
- return first[k]
98
- # otherwise return first value
99
- return next(iter(first.values()))
100
- else:
101
- return str(first)
102
- if isinstance(data, dict):
103
- for k in ("translation_text", "generated_text", "text"):
104
- if k in data:
105
- return data[k]
106
- return json.dumps(data)
107
- return str(data)
108
-
109
- def detect_lang_code(text):
110
  try:
111
- code = detect(text)
112
- # keep only en/es/fr; if another, default to 'en' for fallback routing
113
- return code if code in ("en","es","fr") else "en"
114
  except LangDetectException:
115
- return "en"
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  def translate_text(user_text, target_lang_ui):
118
  user_text = (user_text or "").strip()
@@ -120,7 +90,7 @@ def translate_text(user_text, target_lang_ui):
120
  return "⚠️ Please enter some text to translate."
121
  target_code = LANG_UI_TO_CODE.get(target_lang_ui, "en")
122
 
123
- # First: try Groq (preferred)
124
  try:
125
  if groq_client:
126
  out = call_groq(user_text, target_lang_ui)
@@ -129,21 +99,16 @@ def translate_text(user_text, target_lang_ui):
129
  except Exception as e:
130
  print("Groq call failed:", repr(e))
131
 
132
- # Fallback: detect source and call HF OPUS-MT models
133
  try:
134
- src_code = detect_lang_code(user_text)
135
- if src_code == target_code:
136
- # already same language
137
- return user_text
138
- out = call_hf_opus(user_text, src_code, target_code)
139
- return out.strip()
140
  except Exception as e:
141
- print("Hugging Face fallback failed:", repr(e))
142
- return f"Error: translation failed. Check Space logs for details."
143
 
144
  # ----------------- Gradio UI -----------------
145
  with gr.Blocks() as demo:
146
- gr.Markdown("## 🌐 Hackathon Translator (EN/ES/FR)")
147
 
148
  with gr.Row():
149
  txt = gr.Textbox(label="Enter your text", lines=6, placeholder="Type or paste text here...")
 
5
  import gradio as gr
6
  from langdetect import detect, LangDetectException
7
 
8
+ # Hugging Face Transformers
9
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
10
+
11
+ # Groq SDK
12
  try:
13
  from groq import Groq
14
  except Exception:
15
  Groq = None
16
 
17
+ # Config
18
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
19
  GROQ_MODEL = os.getenv("GROQ_MODEL", "mixtral-8x7b-32768")
20
+ HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
21
 
22
+ # Init Groq
23
  groq_client = None
24
  if GROQ_API_KEY and Groq is not None:
25
  try:
 
27
  except Exception as e:
28
  print("Groq client init failed:", repr(e))
29
 
30
+ # Universal translation model
31
+ m2m_model_name = "facebook/m2m100_418M"
32
+ m2m_tokenizer = M2M100Tokenizer.from_pretrained(m2m_model_name)
33
+ m2m_model = M2M100ForConditionalGeneration.from_pretrained(m2m_model_name)
34
+
35
+ # UI mapping
36
  LANG_UI_TO_CODE = {"English": "en", "Spanish": "es", "French": "fr"}
37
 
38
  SYSTEM_PROMPT = """
39
  You are a multilingual translation assistant.
 
 
40
  Task:
41
  1. Detect the input language automatically.
42
+ 2. Translate into the requested target language.
43
  3. Preserve meaning, tone, and formatting.
44
+ 4. Keep numbers, symbols, names, and special characters unchanged.
45
  5. If the input is already in the target language, return it unchanged.
 
46
  """
47
 
48
  def call_groq(user_text, target_lang_ui):
49
  if not groq_client:
50
  raise RuntimeError("Groq client not configured")
 
51
  messages = [
52
  {"role": "system", "content": SYSTEM_PROMPT},
53
  {"role": "user", "content": f"Target language: {target_lang_ui}\n\n{user_text}"},
54
  ]
 
55
  chat = groq_client.chat.completions.create(
56
  model=GROQ_MODEL,
57
  messages=messages,
58
  temperature=0,
59
  max_tokens=2048,
60
  )
 
61
  try:
62
  return chat.choices[0].message.content.strip()
63
  except Exception:
 
64
  try:
65
  return chat["choices"][0]["message"]["content"].strip()
66
  except Exception as e:
67
+ print("Unexpected Groq response:", repr(e))
68
  raise
69
 
70
+ def call_m2m(user_text, target_code):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  try:
72
+ src_code = detect(user_text)
 
 
73
  except LangDetectException:
74
+ src_code = "en" # fallback
75
+
76
+ # if already target language β†’ return as-is
77
+ if src_code == target_code:
78
+ return user_text
79
+
80
+ m2m_tokenizer.src_lang = src_code
81
+ encoded = m2m_tokenizer(user_text, return_tensors="pt")
82
+ generated = m2m_model.generate(
83
+ **encoded, forced_bos_token_id=m2m_tokenizer.get_lang_id(target_code)
84
+ )
85
+ return m2m_tokenizer.decode(generated[0], skip_special_tokens=True)
86
 
87
  def translate_text(user_text, target_lang_ui):
88
  user_text = (user_text or "").strip()
 
90
  return "⚠️ Please enter some text to translate."
91
  target_code = LANG_UI_TO_CODE.get(target_lang_ui, "en")
92
 
93
+ # Try Groq first
94
  try:
95
  if groq_client:
96
  out = call_groq(user_text, target_lang_ui)
 
99
  except Exception as e:
100
  print("Groq call failed:", repr(e))
101
 
102
+ # Fallback β†’ M2M100 universal translator
103
  try:
104
+ return call_m2m(user_text, target_code)
 
 
 
 
 
105
  except Exception as e:
106
+ print("M2M100 translation failed:", repr(e))
107
+ return "❌ Translation failed. Check logs."
108
 
109
  # ----------------- Gradio UI -----------------
110
  with gr.Blocks() as demo:
111
+ gr.Markdown("## 🌐 Hackathon Translator (Universal)")
112
 
113
  with gr.Row():
114
  txt = gr.Textbox(label="Enter your text", lines=6, placeholder="Type or paste text here...")