heerjtdev commited on
Commit
05fc0ae
Β·
verified Β·
1 Parent(s): 7970482

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -223,9 +223,7 @@ class OnnxBgeEmbeddings(Embeddings):
223
  # ---------------------------------------------------------
224
  # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
225
  # ---------------------------------------------------------
226
- # ---------------------------------------------------------
227
- # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
228
- # ---------------------------------------------------------
229
  class LLMEvaluator:
230
  def __init__(self):
231
  self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
@@ -233,11 +231,8 @@ class LLMEvaluator:
233
 
234
  print(f"πŸ”„ Preparing LLM: {self.repo_id}...")
235
 
236
- # [CRITICAL FIX]
237
- # We use 'allow_patterns' to download ONLY the specific FP16 model (~2GB)
238
- # and ignore the huge standard and quantized files.
239
- print(f"πŸ“₯ Downloading ONLY the FP16 version to {self.local_dir}...")
240
-
241
  snapshot_download(
242
  repo_id=self.repo_id,
243
  local_dir=self.local_dir,
@@ -248,18 +243,19 @@ class LLMEvaluator:
248
  "tokenizer*",
249
  "special_tokens_map.json",
250
  "*.jinja",
251
- "onnx/model_fp16.onnx" # <--- CHANGED to FP16 model
252
  ]
253
  )
254
- print("βœ… Download complete (Filtered to ~2GB).")
255
 
256
  self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
257
 
258
- # Load the specific FP16 ONNX file
259
- # We point strictly to the file we downloaded
260
  self.model = ORTModelForCausalLM.from_pretrained(
261
  self.local_dir,
262
- file_name="onnx/model_fp16.onnx", # <--- CHANGED to load FP16 file
 
263
  use_cache=True,
264
  use_io_binding=False
265
  )
@@ -310,6 +306,7 @@ class LLMEvaluator:
310
  skip_special_tokens=True
311
  )
312
  return response
 
313
  # ---------------------------------------------------------
314
  # 3. Main Application Logic
315
  # ---------------------------------------------------------
 
223
  # ---------------------------------------------------------
224
  # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
225
  # ---------------------------------------------------------
226
+
 
 
227
  class LLMEvaluator:
228
  def __init__(self):
229
  self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
 
231
 
232
  print(f"πŸ”„ Preparing LLM: {self.repo_id}...")
233
 
234
+ # [FIXED DOWNLOADER]
235
+ print(f"πŸ“₯ Downloading FP16 model + data to {self.local_dir}...")
 
 
 
236
  snapshot_download(
237
  repo_id=self.repo_id,
238
  local_dir=self.local_dir,
 
243
  "tokenizer*",
244
  "special_tokens_map.json",
245
  "*.jinja",
246
+ "onnx/model_fp16.onnx*" # WILDCARD '*' ensures we get .onnx AND .onnx_data
247
  ]
248
  )
249
+ print("βœ… Download complete.")
250
 
251
  self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
252
 
253
+ # [CRITICAL FIX]
254
+ # Separating 'subfolder' and 'file_name' is required by Optimum
255
  self.model = ORTModelForCausalLM.from_pretrained(
256
  self.local_dir,
257
+ subfolder="onnx", # Point to the subfolder
258
+ file_name="model_fp16.onnx", # Just the filename
259
  use_cache=True,
260
  use_io_binding=False
261
  )
 
306
  skip_special_tokens=True
307
  )
308
  return response
309
+
310
  # ---------------------------------------------------------
311
  # 3. Main Application Logic
312
  # ---------------------------------------------------------