Spaces:
Running
Running
NoeMartinezSanchez commited on
Commit ·
8d7fb25
1
Parent(s): 4fa1e6b
Se agrega la lectura del token para usar gemma 2b
Browse files- models/gemma_wrapper.py +9 -0
models/gemma_wrapper.py
CHANGED
|
@@ -5,6 +5,7 @@ responses in a RAG architecture, optimized for CPU-only inference without quanti
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import gc
|
|
|
|
| 8 |
import time
|
| 9 |
from typing import Optional
|
| 10 |
|
|
@@ -55,10 +56,17 @@ class GemmaWrapper:
|
|
| 55 |
logger.info(f"Initializing Gemma model: {self.model_name}")
|
| 56 |
logger.info(f"Loading on CPU with float32 (no quantization)")
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
logger.info("Loading tokenizer...")
|
| 59 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 60 |
self.model_name,
|
| 61 |
cache_dir=self.cache_dir,
|
|
|
|
| 62 |
)
|
| 63 |
|
| 64 |
if self.tokenizer.pad_token is None:
|
|
@@ -71,6 +79,7 @@ class GemmaWrapper:
|
|
| 71 |
device_map="cpu",
|
| 72 |
torch_dtype=torch.float32,
|
| 73 |
cache_dir=self.cache_dir,
|
|
|
|
| 74 |
)
|
| 75 |
|
| 76 |
self.model.eval()
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import gc
|
| 8 |
+
import os
|
| 9 |
import time
|
| 10 |
from typing import Optional
|
| 11 |
|
|
|
|
| 56 |
logger.info(f"Initializing Gemma model: {self.model_name}")
|
| 57 |
logger.info(f"Loading on CPU with float32 (no quantization)")
|
| 58 |
|
| 59 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 60 |
+
if hf_token:
|
| 61 |
+
logger.info("HF_TOKEN found in environment variables")
|
| 62 |
+
else:
|
| 63 |
+
logger.warning("HF_TOKEN not found in environment variables")
|
| 64 |
+
|
| 65 |
logger.info("Loading tokenizer...")
|
| 66 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 67 |
self.model_name,
|
| 68 |
cache_dir=self.cache_dir,
|
| 69 |
+
token=hf_token,
|
| 70 |
)
|
| 71 |
|
| 72 |
if self.tokenizer.pad_token is None:
|
|
|
|
| 79 |
device_map="cpu",
|
| 80 |
torch_dtype=torch.float32,
|
| 81 |
cache_dir=self.cache_dir,
|
| 82 |
+
token=hf_token,
|
| 83 |
)
|
| 84 |
|
| 85 |
self.model.eval()
|