Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -223,9 +223,7 @@ class OnnxBgeEmbeddings(Embeddings):
|
|
| 223 |
# ---------------------------------------------------------
|
| 224 |
# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
|
| 225 |
# ---------------------------------------------------------
|
| 226 |
-
|
| 227 |
-
# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
|
| 228 |
-
# ---------------------------------------------------------
|
| 229 |
class LLMEvaluator:
|
| 230 |
def __init__(self):
|
| 231 |
self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
|
|
@@ -233,11 +231,8 @@ class LLMEvaluator:
|
|
| 233 |
|
| 234 |
print(f"π Preparing LLM: {self.repo_id}...")
|
| 235 |
|
| 236 |
-
# [
|
| 237 |
-
|
| 238 |
-
# and ignore the huge standard and quantized files.
|
| 239 |
-
print(f"π₯ Downloading ONLY the FP16 version to {self.local_dir}...")
|
| 240 |
-
|
| 241 |
snapshot_download(
|
| 242 |
repo_id=self.repo_id,
|
| 243 |
local_dir=self.local_dir,
|
|
@@ -248,18 +243,19 @@ class LLMEvaluator:
|
|
| 248 |
"tokenizer*",
|
| 249 |
"special_tokens_map.json",
|
| 250 |
"*.jinja",
|
| 251 |
-
"onnx/model_fp16.onnx" #
|
| 252 |
]
|
| 253 |
)
|
| 254 |
-
print("β
Download complete
|
| 255 |
|
| 256 |
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
|
| 257 |
|
| 258 |
-
#
|
| 259 |
-
#
|
| 260 |
self.model = ORTModelForCausalLM.from_pretrained(
|
| 261 |
self.local_dir,
|
| 262 |
-
|
|
|
|
| 263 |
use_cache=True,
|
| 264 |
use_io_binding=False
|
| 265 |
)
|
|
@@ -310,6 +306,7 @@ class LLMEvaluator:
|
|
| 310 |
skip_special_tokens=True
|
| 311 |
)
|
| 312 |
return response
|
|
|
|
| 313 |
# ---------------------------------------------------------
|
| 314 |
# 3. Main Application Logic
|
| 315 |
# ---------------------------------------------------------
|
|
|
|
| 223 |
# ---------------------------------------------------------
|
| 224 |
# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
|
| 225 |
# ---------------------------------------------------------
|
| 226 |
+
|
|
|
|
|
|
|
| 227 |
class LLMEvaluator:
|
| 228 |
def __init__(self):
|
| 229 |
self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
|
|
|
|
| 231 |
|
| 232 |
print(f"π Preparing LLM: {self.repo_id}...")
|
| 233 |
|
| 234 |
+
# [FIXED DOWNLOADER]
|
| 235 |
+
print(f"π₯ Downloading FP16 model + data to {self.local_dir}...")
|
|
|
|
|
|
|
|
|
|
| 236 |
snapshot_download(
|
| 237 |
repo_id=self.repo_id,
|
| 238 |
local_dir=self.local_dir,
|
|
|
|
| 243 |
"tokenizer*",
|
| 244 |
"special_tokens_map.json",
|
| 245 |
"*.jinja",
|
| 246 |
+
"onnx/model_fp16.onnx*" # WILDCARD '*' ensures we get .onnx AND .onnx_data
|
| 247 |
]
|
| 248 |
)
|
| 249 |
+
print("β
Download complete.")
|
| 250 |
|
| 251 |
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
|
| 252 |
|
| 253 |
+
# [CRITICAL FIX]
|
| 254 |
+
# Separating 'subfolder' and 'file_name' is required by Optimum
|
| 255 |
self.model = ORTModelForCausalLM.from_pretrained(
|
| 256 |
self.local_dir,
|
| 257 |
+
subfolder="onnx", # Point to the subfolder
|
| 258 |
+
file_name="model_fp16.onnx", # Just the filename
|
| 259 |
use_cache=True,
|
| 260 |
use_io_binding=False
|
| 261 |
)
|
|
|
|
| 306 |
skip_special_tokens=True
|
| 307 |
)
|
| 308 |
return response
|
| 309 |
+
|
| 310 |
# ---------------------------------------------------------
|
| 311 |
# 3. Main Application Logic
|
| 312 |
# ---------------------------------------------------------
|