Spaces:

heerjtdev
/

answer_feedback

Sleeping

App Files Files Community

heerjtdev commited on Feb 3

Commit

7970482

verified ·

1 Parent(s): 229e510

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -10

app.py CHANGED Viewed

@@ -220,29 +220,46 @@ class OnnxBgeEmbeddings(Embeddings):
     def embed_query(self, text):
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
 # ---------------------------------------------------------
 # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
         self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
         print(f"🔄 Preparing LLM: {self.repo_id}...")
         # [CRITICAL FIX]
-        # Download model to a specific LOCAL directory to avoid cache symlink errors
-        print("📥 Downloading model to local directory (this fixes the filesystem error)...")
-        local_model_path = snapshot_download(
             repo_id=self.repo_id,
-            local_dir="onnx_llama_local",  # Downloads to ./onnx_llama_local/
-            local_dir_use_symlinks=False   # Forces real files, not symlinks
         )
-        print("✅ Download complete.")
-        self.tokenizer = AutoTokenizer.from_pretrained(local_model_path)
-        # Load from the local folder
         self.model = ORTModelForCausalLM.from_pretrained(
-            local_model_path,
             use_cache=True,
             use_io_binding=False
         )
@@ -293,7 +310,6 @@ class LLMEvaluator:
             skip_special_tokens=True
         )
         return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------

     def embed_query(self, text):
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
+# ---------------------------------------------------------
+# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
+# ---------------------------------------------------------
 # ---------------------------------------------------------
 # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
         self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
+        self.local_dir = "onnx_llama_local"
         print(f"🔄 Preparing LLM: {self.repo_id}...")
         # [CRITICAL FIX]
+        # We use 'allow_patterns' to download ONLY the specific FP16 model (~2GB)
+        # and ignore the huge standard and quantized files.
+        print(f"📥 Downloading ONLY the FP16 version to {self.local_dir}...")
+        snapshot_download(
             repo_id=self.repo_id,
+            local_dir=self.local_dir,
+            local_dir_use_symlinks=False,
+            allow_patterns=[
+                "config.json",
+                "generation_config.json",
+                "tokenizer*",
+                "special_tokens_map.json",
+                "*.jinja",
+                "onnx/model_fp16.onnx"  # <--- CHANGED to FP16 model
+            ]
         )
+        print("✅ Download complete (Filtered to ~2GB).")
+        self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
+        # Load the specific FP16 ONNX file
+        # We point strictly to the file we downloaded
         self.model = ORTModelForCausalLM.from_pretrained(
+            self.local_dir,
+            file_name="onnx/model_fp16.onnx", # <--- CHANGED to load FP16 file
             use_cache=True,
             use_io_binding=False
         )
             skip_special_tokens=True
         )
         return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------