Spaces:

heerjtdev
/

answer_feedback

Sleeping

App Files Files Community

heerjtdev commited on Feb 3

Commit

d5b0c87

verified ·

1 Parent(s): 49671b5

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -13

app.py CHANGED Viewed

@@ -218,29 +218,32 @@ class OnnxBgeEmbeddings(Embeddings):
     def embed_query(self, text):
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
 # ---------------------------------------------------------
 # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
-        # Using the ONNX Community version of Llama 3.2 1B (Optimized for CPU)
         self.model_id = "onnx-community/Llama-3.2-1B-Instruct"
         print(f"🔄 Loading LLM: {self.model_id}...")
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
-        # Load the ONNX model for text generation
         self.model = ORTModelForCausalLM.from_pretrained(
             self.model_id,
-            decoder_file_name="model.onnx", # Standard ONNX filename
             use_cache=True,
-            use_io_binding=False # Safer for CPU spaces
         )
     def evaluate(self, context, question, student_answer):
         # Prompt Engineering for Llama 3
         messages = [
-            {"role": "system", "content": "You are a strict but helpful academic grader. You will be given a context, a question, and a student's answer. Your job is to grade the answer based ONLY on the provided context."},
             {"role": "user", "content": f"""
             ### CONTEXT:
             {context}
@@ -252,14 +255,19 @@ class LLMEvaluator:
             {student_answer}
             ### INSTRUCTIONS:
-            1. Determine if the student answer is correct based on the context.
-            2. Give a score out of 10.
-            3. Provide a brief explanation.
             """}
         ]
         # Format input using the chat template
-        input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = self.tokenizer(input_text, return_tensors="pt")
         # Generate response
@@ -267,15 +275,17 @@ class LLMEvaluator:
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=256,
-                temperature=0.3, # Low temp for factual grading
                 do_sample=True,
                 top_p=0.9
             )
-        # Decode and strip the prompt
-        response = self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
         return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------

     def embed_query(self, text):
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
+# ---------------------------------------------------------
+# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
+# ---------------------------------------------------------
 # ---------------------------------------------------------
 # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
+        # Using the ONNX Community version of Llama 3.2 1B
         self.model_id = "onnx-community/Llama-3.2-1B-Instruct"
         print(f"🔄 Loading LLM: {self.model_id}...")
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
+        # FIX: Removed 'decoder_file_name' argument
+        # The library now automatically finds 'model.onnx' in the repo
         self.model = ORTModelForCausalLM.from_pretrained(
             self.model_id,
             use_cache=True,
+            use_io_binding=False # Safe for CPU
         )
     def evaluate(self, context, question, student_answer):
         # Prompt Engineering for Llama 3
         messages = [
+            {"role": "system", "content": "You are a helpful academic grader. Grade the student answer based ONLY on the provided context."},
             {"role": "user", "content": f"""
             ### CONTEXT:
             {context}
             {student_answer}
             ### INSTRUCTIONS:
+            1. Is the answer correct?
+            2. Score out of 10.
+            3. Explanation.
             """}
         ]
         # Format input using the chat template
+        input_text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
         inputs = self.tokenizer(input_text, return_tensors="pt")
         # Generate response
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=256,
+                temperature=0.3,
                 do_sample=True,
                 top_p=0.9
             )
+        # Decode response
+        response = self.tokenizer.decode(
+            outputs[0][inputs.input_ids.shape[1]:],
+            skip_special_tokens=True
+        )
         return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------