heerjtdev commited on
Commit
7970482
Β·
verified Β·
1 Parent(s): 229e510

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -10
app.py CHANGED
@@ -220,29 +220,46 @@ class OnnxBgeEmbeddings(Embeddings):
220
  def embed_query(self, text):
221
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
222
 
 
 
 
223
  # ---------------------------------------------------------
224
  # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
225
  # ---------------------------------------------------------
226
  class LLMEvaluator:
227
  def __init__(self):
228
  self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
 
 
229
  print(f"πŸ”„ Preparing LLM: {self.repo_id}...")
230
 
231
  # [CRITICAL FIX]
232
- # Download model to a specific LOCAL directory to avoid cache symlink errors
233
- print("πŸ“₯ Downloading model to local directory (this fixes the filesystem error)...")
234
- local_model_path = snapshot_download(
 
 
235
  repo_id=self.repo_id,
236
- local_dir="onnx_llama_local", # Downloads to ./onnx_llama_local/
237
- local_dir_use_symlinks=False # Forces real files, not symlinks
 
 
 
 
 
 
 
 
238
  )
239
- print("βœ… Download complete.")
240
 
241
- self.tokenizer = AutoTokenizer.from_pretrained(local_model_path)
242
 
243
- # Load from the local folder
 
244
  self.model = ORTModelForCausalLM.from_pretrained(
245
- local_model_path,
 
246
  use_cache=True,
247
  use_io_binding=False
248
  )
@@ -293,7 +310,6 @@ class LLMEvaluator:
293
  skip_special_tokens=True
294
  )
295
  return response
296
-
297
  # ---------------------------------------------------------
298
  # 3. Main Application Logic
299
  # ---------------------------------------------------------
 
220
  def embed_query(self, text):
221
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
222
 
223
+ # ---------------------------------------------------------
224
+ # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
225
+ # ---------------------------------------------------------
226
  # ---------------------------------------------------------
227
  # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
228
  # ---------------------------------------------------------
229
  class LLMEvaluator:
230
  def __init__(self):
231
  self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
232
+ self.local_dir = "onnx_llama_local"
233
+
234
  print(f"πŸ”„ Preparing LLM: {self.repo_id}...")
235
 
236
  # [CRITICAL FIX]
237
+ # We use 'allow_patterns' to download ONLY the specific FP16 model (~2GB)
238
+ # and ignore the huge standard and quantized files.
239
+ print(f"πŸ“₯ Downloading ONLY the FP16 version to {self.local_dir}...")
240
+
241
+ snapshot_download(
242
  repo_id=self.repo_id,
243
+ local_dir=self.local_dir,
244
+ local_dir_use_symlinks=False,
245
+ allow_patterns=[
246
+ "config.json",
247
+ "generation_config.json",
248
+ "tokenizer*",
249
+ "special_tokens_map.json",
250
+ "*.jinja",
251
+ "onnx/model_fp16.onnx" # <--- CHANGED to FP16 model
252
+ ]
253
  )
254
+ print("βœ… Download complete (Filtered to ~2GB).")
255
 
256
+ self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
257
 
258
+ # Load the specific FP16 ONNX file
259
+ # We point strictly to the file we downloaded
260
  self.model = ORTModelForCausalLM.from_pretrained(
261
+ self.local_dir,
262
+ file_name="onnx/model_fp16.onnx", # <--- CHANGED to load FP16 file
263
  use_cache=True,
264
  use_io_binding=False
265
  )
 
310
  skip_special_tokens=True
311
  )
312
  return response
 
313
  # ---------------------------------------------------------
314
  # 3. Main Application Logic
315
  # ---------------------------------------------------------