Spaces:

heerjtdev
/

answer_feedback

Sleeping

App Files Files Community

heerjtdev commited on Feb 3

Commit

05fc0ae

verified ·

1 Parent(s): 7970482

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -13

app.py CHANGED Viewed

@@ -223,9 +223,7 @@ class OnnxBgeEmbeddings(Embeddings):
 # ---------------------------------------------------------
 # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
 # ---------------------------------------------------------
-# ---------------------------------------------------------
-# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
-# ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
         self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
@@ -233,11 +231,8 @@ class LLMEvaluator:
         print(f"🔄 Preparing LLM: {self.repo_id}...")
-        # [CRITICAL FIX]
-        # We use 'allow_patterns' to download ONLY the specific FP16 model (~2GB)
-        # and ignore the huge standard and quantized files.
-        print(f"📥 Downloading ONLY the FP16 version to {self.local_dir}...")
         snapshot_download(
             repo_id=self.repo_id,
             local_dir=self.local_dir,
@@ -248,18 +243,19 @@ class LLMEvaluator:
                 "tokenizer*",
                 "special_tokens_map.json",
                 "*.jinja",
-                "onnx/model_fp16.onnx"  # <--- CHANGED to FP16 model
             ]
         )
-        print("✅ Download complete (Filtered to ~2GB).")
         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
-        # Load the specific FP16 ONNX file
-        # We point strictly to the file we downloaded
         self.model = ORTModelForCausalLM.from_pretrained(
             self.local_dir,
-            file_name="onnx/model_fp16.onnx", # <--- CHANGED to load FP16 file
             use_cache=True,
             use_io_binding=False
         )
@@ -310,6 +306,7 @@ class LLMEvaluator:
             skip_special_tokens=True
         )
         return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------

 # ---------------------------------------------------------
 # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
         self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
         print(f"🔄 Preparing LLM: {self.repo_id}...")
+        # [FIXED DOWNLOADER]
+        print(f"📥 Downloading FP16 model + data to {self.local_dir}...")
         snapshot_download(
             repo_id=self.repo_id,
             local_dir=self.local_dir,
                 "tokenizer*",
                 "special_tokens_map.json",
                 "*.jinja",
+                "onnx/model_fp16.onnx*"  # WILDCARD '*' ensures we get .onnx AND .onnx_data
             ]
         )
+        print("✅ Download complete.")
         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
+        # [CRITICAL FIX]
+        # Separating 'subfolder' and 'file_name' is required by Optimum
         self.model = ORTModelForCausalLM.from_pretrained(
             self.local_dir,
+            subfolder="onnx",             # Point to the subfolder
+            file_name="model_fp16.onnx",  # Just the filename
             use_cache=True,
             use_io_binding=False
         )
             skip_special_tokens=True
         )
         return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------