Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -127,37 +127,23 @@ def image_generator(prompt: str) -> str:
|
|
| 127 |
return f"Image saved at {output_path}"
|
| 128 |
|
| 129 |
# -------------------- Local LLM (Replaces HfApiModel) --------------------
|
| 130 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 131 |
import torch
|
| 132 |
|
| 133 |
class LocalModel:
|
| 134 |
-
"""
|
| 135 |
-
Minimal local model interface compatible with smolagents CodeAgent.
|
| 136 |
-
"""
|
| 137 |
def __init__(self):
|
| 138 |
model_name = "openlm-research/open_llama_3b"
|
| 139 |
-
|
| 140 |
-
# Load tokenizer with use_fast=False to avoid SentencePiece conversion error
|
| 141 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
| 142 |
-
|
| 143 |
-
# Load model with appropriate dtype and device map
|
| 144 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 145 |
model_name,
|
| 146 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 147 |
device_map="auto" if torch.cuda.is_available() else None,
|
| 148 |
)
|
| 149 |
|
| 150 |
-
|
| 151 |
-
self.
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
tokenizer=self.tokenizer,
|
| 155 |
-
device=0 if torch.cuda.is_available() else -1,
|
| 156 |
-
)
|
| 157 |
-
|
| 158 |
-
def generate(self, prompt, **kwargs):
|
| 159 |
-
result = self.pipeline(prompt, max_new_tokens=500, do_sample=True, **kwargs)
|
| 160 |
-
return result[0]['generated_text']
|
| 161 |
|
| 162 |
def __call__(self, prompt, **kwargs):
|
| 163 |
return self.generate(prompt, **kwargs)
|
|
|
|
| 127 |
return f"Image saved at {output_path}"
|
| 128 |
|
| 129 |
# -------------------- Local LLM (Replaces HfApiModel) --------------------
|
| 130 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 131 |
import torch
|
| 132 |
|
| 133 |
class LocalModel:
|
|
|
|
|
|
|
|
|
|
| 134 |
def __init__(self):
|
| 135 |
model_name = "openlm-research/open_llama_3b"
|
|
|
|
|
|
|
| 136 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
|
|
|
|
|
|
| 137 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 138 |
model_name,
|
| 139 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 140 |
device_map="auto" if torch.cuda.is_available() else None,
|
| 141 |
)
|
| 142 |
|
| 143 |
+
def generate(self, prompt, max_new_tokens=500):
|
| 144 |
+
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
|
| 145 |
+
output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
|
| 146 |
+
return self.tokenizer.decode(output[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
def __call__(self, prompt, **kwargs):
|
| 149 |
return self.generate(prompt, **kwargs)
|