Spaces:

heerjtdev
/

answer_feedback

Sleeping

App Files Files Community

heerjtdev commited on Feb 3

Commit

229e510

verified ·

1 Parent(s): d5b0c87

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -14

app.py CHANGED Viewed

@@ -179,7 +179,6 @@
 import gradio as gr
 import fitz  # PyMuPDF
 import torch
@@ -191,16 +190,19 @@ from langchain_community.vectorstores import FAISS
 from langchain_core.embeddings import Embeddings
 # --- ONNX & MODEL IMPORTS ---
-from transformers import AutoTokenizer, Pipeline
 from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
 # ---------------------------------------------------------
 # 1. Custom ONNX Embedding Class (BGE-Large)
 # ---------------------------------------------------------
 class OnnxBgeEmbeddings(Embeddings):
-    def __init__(self, model_name="BAAI/bge-large-en-v1.5", file_name="model.onnx"):
         print(f"🔄 Loading Embeddings: {model_name}...")
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
     def _process_batch(self, texts):
@@ -218,26 +220,31 @@ class OnnxBgeEmbeddings(Embeddings):
     def embed_query(self, text):
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
-# ---------------------------------------------------------
-# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
-# ---------------------------------------------------------
 # ---------------------------------------------------------
 # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
-        # Using the ONNX Community version of Llama 3.2 1B
-        self.model_id = "onnx-community/Llama-3.2-1B-Instruct"
-        print(f"🔄 Loading LLM: {self.model_id}...")
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
-        # FIX: Removed 'decoder_file_name' argument
-        # The library now automatically finds 'model.onnx' in the repo
         self.model = ORTModelForCausalLM.from_pretrained(
-            self.model_id,
             use_cache=True,
-            use_io_binding=False # Safe for CPU
         )
     def evaluate(self, context, question, student_answer):
@@ -286,6 +293,7 @@ class LLMEvaluator:
             skip_special_tokens=True
         )
         return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------

 import gradio as gr
 import fitz  # PyMuPDF
 import torch
 from langchain_core.embeddings import Embeddings
 # --- ONNX & MODEL IMPORTS ---
+from transformers import AutoTokenizer
 from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
+from huggingface_hub import snapshot_download
 # ---------------------------------------------------------
 # 1. Custom ONNX Embedding Class (BGE-Large)
 # ---------------------------------------------------------
 class OnnxBgeEmbeddings(Embeddings):
+    def __init__(self, model_name="BAAI/bge-large-en-v1.5"):
         print(f"🔄 Loading Embeddings: {model_name}...")
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Note: export=True will re-convert on every restart.
+        # For production, you'd want to save this permanently, but this works for now.
         self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
     def _process_batch(self, texts):
     def embed_query(self, text):
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
 # ---------------------------------------------------------
 # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
+        self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
+        print(f"🔄 Preparing LLM: {self.repo_id}...")
+        # [CRITICAL FIX]
+        # Download model to a specific LOCAL directory to avoid cache symlink errors
+        print("📥 Downloading model to local directory (this fixes the filesystem error)...")
+        local_model_path = snapshot_download(
+            repo_id=self.repo_id,
+            local_dir="onnx_llama_local",  # Downloads to ./onnx_llama_local/
+            local_dir_use_symlinks=False   # Forces real files, not symlinks
+        )
+        print("✅ Download complete.")
+        self.tokenizer = AutoTokenizer.from_pretrained(local_model_path)
+        # Load from the local folder
         self.model = ORTModelForCausalLM.from_pretrained(
+            local_model_path,
             use_cache=True,
+            use_io_binding=False
         )
     def evaluate(self, context, question, student_answer):
             skip_special_tokens=True
         )
         return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------