Dehmuller commited on
Commit
ab0b9ba
Β·
verified Β·
1 Parent(s): 87b3281

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -73
app.py CHANGED
@@ -1,93 +1,64 @@
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
- import requests
4
- from bs4 import BeautifulSoup
5
  from langdetect import detect
6
 
7
- MODEL_NAME = "facebook/mbart-large-50-many-to-many-mmt"
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
10
 
11
- def translate_text(word):
12
- options = []
13
-
14
- # ---- Step 1: Machine Translation with context ----
15
- sentence = f"The meaning of '{word}' is:"
16
- tokenizer.src_lang = "en_XX"
17
- inputs = tokenizer(sentence, return_tensors="pt")
18
- generated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["pt_XX"])
19
- mt_translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0].strip()
20
-
21
- if mt_translation and detect(mt_translation) == "pt":
22
- options.append(("Machine Translation", mt_translation, "neutral"))
23
 
24
- # ---- Step 2: Linguee ----
25
- linguee_opts = lookup_linguee(word)
26
- options.extend(linguee_opts)
 
27
 
28
- # ---- Step 3: Dicio enrichment (for each PT candidate) ----
29
- enriched = []
30
- for src, trans, reg in options:
31
- defs = lookup_dicio(trans)
32
- if defs:
33
- for d in defs:
34
- enriched.append(("Dicio", f"{trans} β†’ {d}", "formal"))
35
- options.extend(enriched)
 
 
 
 
 
 
 
36
 
37
- # ---- Step 4: Fill with synonyms if < 5 ----
38
- if len(options) < 5:
39
- syns = lookup_sinonimos(options[0][1]) if options else []
40
- for s in syns:
41
- options.append(("Sinonimos", s, "varies"))
42
- if len(options) >= 5:
43
- break
44
-
45
- # ---- Format ----
46
- if not options:
47
- return "❌ No translations found. Try with more context."
48
-
49
- formatted = f"πŸ”Ž English: {word}\n\n"
50
- for i, (src, trans, reg) in enumerate(options, start=1):
51
- formatted += f"Option {i}: {trans} (source: {src}, register: {reg})\n"
52
- return formatted
53
 
54
- def lookup_linguee(word):
55
- url = f"https://www.linguee.com/english-portuguese/search?source=english&query={word}"
56
- r = requests.get(url)
57
- if r.status_code != 200:
58
- return []
59
- soup = BeautifulSoup(r.text, "html.parser")
60
- results = []
61
- for hit in soup.select(".dictLink"):
62
- text = hit.get_text().strip()
63
- if text and text.isalpha() and len(text) > 2:
64
- results.append(("Linguee", text, "varies"))
65
- return results[:3]
66
 
67
- def lookup_dicio(word_pt):
68
- url = f"https://www.dicio.com.br/{word_pt.lower().replace(' ', '-')}/"
69
- r = requests.get(url)
70
- if r.status_code != 200:
71
- return []
72
- soup = BeautifulSoup(r.text, "html.parser")
73
- defs = soup.select("p.significado")
74
- return [d.get_text().strip() for d in defs[:2]]
75
 
76
- def lookup_sinonimos(word_pt):
77
- url = f"https://www.sinonimos.com.br/{word_pt.lower().replace(' ', '-')}/"
78
- r = requests.get(url)
79
- if r.status_code != 200:
80
- return []
81
- soup = BeautifulSoup(r.text, "html.parser")
82
- syns = soup.select("a.sinonimo")
83
- return [s.get_text().strip() for s in syns[:3]]
84
 
85
  demo = gr.Interface(
86
- fn=translate_text,
87
  inputs="text",
88
  outputs="text",
89
- title="English β†’ Brazilian Portuguese Translator",
90
- description="Provides at least 5 PT-BR equivalents (MT + Linguee + Dicio + Synonyms)."
91
  )
92
 
93
  if __name__ == "__main__":
 
1
+ import os
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ from openai import OpenAI
 
5
  from langdetect import detect
6
 
7
+ # --- Hugging Face MT model ---
8
+ MODEL_NAME = "Helsinki-NLP/opus-mt-en-pt"
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
11
 
12
+ # --- OpenAI client ---
13
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
 
 
 
 
 
 
 
14
 
15
+ def literal_translation(text):
16
+ inputs = tokenizer(text, return_tensors="pt")
17
+ outputs = model.generate(**inputs, max_length=100)
18
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
19
 
20
+ def gpt_explanation(text):
21
+ prompt = f"""
22
+ You are a bilingual translation assistant.
23
+ For the English expression "{text}":
24
+ 1. Provide a literal Brazilian Portuguese translation.
25
+ 2. Provide 2–3 idiomatic or natural Brazilian Portuguese equivalents that capture the real meaning.
26
+ 3. Explain in simple terms what the expression means in context.
27
+ Answer clearly, structured as options.
28
+ """
29
+ response = client.chat.completions.create(
30
+ model="gpt-4o-mini",
31
+ messages=[{"role": "user", "content": prompt}],
32
+ temperature=0.7
33
+ )
34
+ return response.choices[0].message.content
35
 
36
+ def translate_expression(expr):
37
+ options = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # 1. Hugging Face literal MT
40
+ try:
41
+ lit = literal_translation(expr)
42
+ if lit and detect(lit) == "pt":
43
+ options.append(f"Literal (Hugging Face MT): {lit}")
44
+ except Exception as e:
45
+ options.append(f"(MT failed: {e})")
 
 
 
 
 
46
 
47
+ # 2. GPT-4o-mini idiomatic + explanation
48
+ try:
49
+ gpt_out = gpt_explanation(expr)
50
+ options.append("GPT-4o-mini analysis:\n" + gpt_out)
51
+ except Exception as e:
52
+ options.append(f"(GPT failed: {e})")
 
 
53
 
54
+ return "\n\n".join(options)
 
 
 
 
 
 
 
55
 
56
  demo = gr.Interface(
57
+ fn=translate_expression,
58
  inputs="text",
59
  outputs="text",
60
+ title="English β†’ Brazilian Portuguese Translator (Hybrid)",
61
+ description="Gives literal + idiomatic PT-BR equivalents and explanations."
62
  )
63
 
64
  if __name__ == "__main__":