TiberiuCristianLeon commited on
Commit
24eee5b
·
verified ·
1 Parent(s): e7a8f80

Update translators.py

Browse files
Files changed (1) hide show
  1. translators.py +12 -13
translators.py CHANGED
@@ -13,7 +13,7 @@ class Translators:
13
  return response.json()[0][0][0]
14
 
15
  def riva4B11(self):
16
- set language_pairs = {
17
  'en-zh-cn': {'source': 'English', 'target': 'Simplified Chinese'},
18
  'en-zh': {'source': 'English', 'target': 'Simplified Chinese'},
19
  'en-zh-tw': {'source': 'English', 'target': 'Traditional Chinese'},
@@ -43,30 +43,29 @@ class Translators:
43
  'pt-en': {'source': 'Brazilian Portuguese', 'target': 'English'},
44
  'pt-br-en': {'source': 'Brazilian Portuguese', 'target': 'English'},
45
  }
46
- tokenizer = AutoTokenizer.from_pretrained(self.model)
47
- model = AutoModelForCausalLM.from_pretrained(self.model,
48
  torch_dtype="auto", device_map="auto")
49
  # Use the prompt template (along with chat template)
50
- messages = [
51
- {
52
  "role": "system",
53
  "content": f"{self.sl}-{self.tl}",
54
  },
55
- {"role": "user", "content": self.input,
56
- ]
57
  tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
58
- outputs = model.generate(tokenized_chat, max_new_tokens=128, pad_token_id=tokenizer.eos_token_id)
59
- return tokenizer.decode(outputs[0]))
60
 
61
  def translategemma(self):
62
  from huggingface_hub import login
63
- login(token=os.environ.get("HF_TOKEN"))
 
64
  pipe = pipeline(
65
  "image-text-to-text",
66
  model = "google/translategemma-4b-it",
67
  device = self.device,
68
- dtype = torch.bfloat16
69
- )
70
  # ---- Text Translation ----
71
  messages = [
72
  {
@@ -555,4 +554,4 @@ class Translators:
555
  message_text = f"Translated from {sl} to {tl} with Bergamot {model_name}."
556
  except Exception as error:
557
  response = error
558
- return translated_text, message_text
 
13
  return response.json()[0][0][0]
14
 
15
  def riva4B11(self):
16
+ language_pairs = {
17
  'en-zh-cn': {'source': 'English', 'target': 'Simplified Chinese'},
18
  'en-zh': {'source': 'English', 'target': 'Simplified Chinese'},
19
  'en-zh-tw': {'source': 'English', 'target': 'Traditional Chinese'},
 
43
  'pt-en': {'source': 'Brazilian Portuguese', 'target': 'English'},
44
  'pt-br-en': {'source': 'Brazilian Portuguese', 'target': 'English'},
45
  }
46
+ tokenizer = AutoTokenizer.from_pretrained(self.model_name)
47
+ model = AutoModelForCausalLM.from_pretrained(self.model_name,
48
  torch_dtype="auto", device_map="auto")
49
  # Use the prompt template (along with chat template)
50
+ messages = [{
 
51
  "role": "system",
52
  "content": f"{self.sl}-{self.tl}",
53
  },
54
+ {"role": "user", "content": self.input_text}]
 
55
  tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
56
+ outputs = model.generate(tokenized_chat, max_new_tokens=self.max_new_tokens, pad_token_id=tokenizer.eos_token_id)
57
+ return tokenizer.decode(outputs[0]).split('<s>Assistant')[1].strip().removesuffix('</s>')
58
 
59
  def translategemma(self):
60
  from huggingface_hub import login
61
+ hftoken=os.environ.get("HF_TOKEN")
62
+ login(token=hftoken)
63
  pipe = pipeline(
64
  "image-text-to-text",
65
  model = "google/translategemma-4b-it",
66
  device = self.device,
67
+ dtype = torch.bfloat16,
68
+ token=hftoken)
69
  # ---- Text Translation ----
70
  messages = [
71
  {
 
554
  message_text = f"Translated from {sl} to {tl} with Bergamot {model_name}."
555
  except Exception as error:
556
  response = error
557
+ return translated_text, message_text