NoeMartinezSanchez commited on
Commit
8d7fb25
·
1 Parent(s): 4fa1e6b

Se agrega la lectura del token para usar gemma 2b

Browse files
Files changed (1) hide show
  1. models/gemma_wrapper.py +9 -0
models/gemma_wrapper.py CHANGED
@@ -5,6 +5,7 @@ responses in a RAG architecture, optimized for CPU-only inference without quanti
5
  """
6
 
7
  import gc
 
8
  import time
9
  from typing import Optional
10
 
@@ -55,10 +56,17 @@ class GemmaWrapper:
55
  logger.info(f"Initializing Gemma model: {self.model_name}")
56
  logger.info(f"Loading on CPU with float32 (no quantization)")
57
 
 
 
 
 
 
 
58
  logger.info("Loading tokenizer...")
59
  self.tokenizer = AutoTokenizer.from_pretrained(
60
  self.model_name,
61
  cache_dir=self.cache_dir,
 
62
  )
63
 
64
  if self.tokenizer.pad_token is None:
@@ -71,6 +79,7 @@ class GemmaWrapper:
71
  device_map="cpu",
72
  torch_dtype=torch.float32,
73
  cache_dir=self.cache_dir,
 
74
  )
75
 
76
  self.model.eval()
 
5
  """
6
 
7
  import gc
8
+ import os
9
  import time
10
  from typing import Optional
11
 
 
56
  logger.info(f"Initializing Gemma model: {self.model_name}")
57
  logger.info(f"Loading on CPU with float32 (no quantization)")
58
 
59
+ hf_token = os.getenv("HF_TOKEN")
60
+ if hf_token:
61
+ logger.info("HF_TOKEN found in environment variables")
62
+ else:
63
+ logger.warning("HF_TOKEN not found in environment variables")
64
+
65
  logger.info("Loading tokenizer...")
66
  self.tokenizer = AutoTokenizer.from_pretrained(
67
  self.model_name,
68
  cache_dir=self.cache_dir,
69
+ token=hf_token,
70
  )
71
 
72
  if self.tokenizer.pad_token is None:
 
79
  device_map="cpu",
80
  torch_dtype=torch.float32,
81
  cache_dir=self.cache_dir,
82
+ token=hf_token,
83
  )
84
 
85
  self.model.eval()