Spaces:

heerjtdev
/

try_answer

Running

App Files Files Community

heerjtdev commited on Feb 3

Commit

c87fdf0

verified ·

1 Parent(s): 196a2a0

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -17

app.py CHANGED Viewed

@@ -37,7 +37,6 @@ class OnnxBgeEmbeddings(Embeddings):
     def _process_batch(self, texts):
         inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
-        # On CPU, we don't need to manually move to device, but it's good practice
         with torch.no_grad():
             outputs = self.model(**inputs)
         embeddings = outputs.last_hidden_state[:, 0]
@@ -51,7 +50,7 @@ class OnnxBgeEmbeddings(Embeddings):
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
 # ---------------------------------------------------------
-# 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - CPU VERSION
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
@@ -71,7 +70,6 @@ class LLMEvaluator:
         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
-        # CRITICAL: Disable Graph Optimizations to prevent crash
         sess_options = SessionOptions()
         sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
@@ -80,24 +78,42 @@ class LLMEvaluator:
             subfolder="onnx",
             file_name="model_fp16.onnx",
             use_cache=True,
-            use_io_binding=False, # DISABLED FOR CPU
             provider=PROVIDERS[0],
             session_options=sess_options
         )
     def evaluate(self, context, question, student_answer, max_marks):
         messages = [
-            {"role": "system", "content": "You are a strict academic grader. Verify the student answer against the context. Be harsh. Do not hallucinate."},
             {"role": "user", "content": f"""
-            CONTEXT: {context}
-            QUESTION: {question}
-            ANSWER: {student_answer}
-            TASK: Grade out of {max_marks}.
-            RULES:
-            1. If wrong, 0 marks.
-            2. Be strict.
-            3. Format: 'Score: X/{max_marks} \n Feedback: ...'
             """}
         ]
@@ -107,12 +123,12 @@ class LLMEvaluator:
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
-                max_new_tokens=75,
-                temperature=0.1,
-                do_sample=False
             )
-        # FIX: Access input_ids correctly
         input_length = inputs['input_ids'].shape[1]
         response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
         return response

     def _process_batch(self, texts):
         inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
         with torch.no_grad():
             outputs = self.model(**inputs)
         embeddings = outputs.last_hidden_state[:, 0]
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
 # ---------------------------------------------------------
+# 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
         sess_options = SessionOptions()
         sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
             subfolder="onnx",
             file_name="model_fp16.onnx",
             use_cache=True,
+            use_io_binding=False,
             provider=PROVIDERS[0],
             session_options=sess_options
         )
     def evaluate(self, context, question, student_answer, max_marks):
+        # OPTIMIZED PROMPT FOR SMALL MODELS (0.5B)
         messages = [
+            {"role": "system", "content": "You are a strictest, literal academic grader. You ONLY grade based on the provided text. You DO NOT use outside knowledge."},
             {"role": "user", "content": f"""
+            Task: Grade the student answer based ONLY on the Reference Text.
+            REFERENCE TEXT:
+            {context}
+            QUESTION:
+            {question}
+            STUDENT ANSWER:
+            {student_answer}
+            -----------------------------
+            GRADING LOGIC:
+            1. READ the Reference Text. What does it actually say about the Question?
+            2. COMPARE it to the Student Answer.
+            3 START with 0 marks and IF the answers line up to the reference text in a meaningful way, then add marks porportionally. ONLY GIVE MARKS FOR CORRECT STATEMENT STRICTLY BASED ON THE REFERENCE TEXT AND NOTHING ELSE IN THIS WORLD.
+            4. IF the Student Answer claims things not found in the text , he is incorrect and HALLUCINATING. Do not give marks for that statment/phrase
+            5. IF the Student Answer contradicts the text (e.g., Text says "hide personality" but Student says "show personality"), Do not give marks for that statment/phrase
+            VERDICT:
+            - If wrong: 0/{max_marks}
+            - If correct: {max_marks}/{max_marks}
+            OUTPUT FORMAT:
+            Score: [X]/{max_marks}
+            Feedback: [Brief explanation citing the text]
             """}
         ]
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
+                max_new_tokens=100,
+                temperature=0.05,    # 0.0 = logic only, no creativity
+                do_sample=False,
+                repetition_penalty=1.2
             )
         input_length = inputs['input_ids'].shape[1]
         response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
         return response