Spaces:

heerjtdev
/

answer_feedback

Sleeping

App Files Files Community

heerjtdev commited on 11 days ago

Commit

76cfdea

verified ·

1 Parent(s): feb61bd

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -21

app.py CHANGED Viewed

@@ -478,34 +478,27 @@ class LLMEvaluator:
         )
     def evaluate(self, context, question, student_answer):
-        # STRICT PROMPT (As requested)
-        system_prompt = """You are a strict academic grader.
         RULES:
-        1. BASE YOUR SCORE ONLY ON THE CONTEXT PROVIDED.
-        2. If the student answer contradicts the context, give a score of 0-2.
-        3. If the context says 'A is B', and the student says 'A is C', the student is WRONG.
-        4. Be harsh. Do not give credit for vague or hallucinatory answers."""
         user_prompt = f"""
-        ### SOURCE MATERIAL (CONTEXT):
         {context}
-        ### EXAM QUESTION:
         {question}
         ### STUDENT ANSWER:
         {student_answer}
-        ### INSTRUCTIONS:
-        Compare the Student Answer to the Source Material.
-        - Does the student explicitly mention the key points found in the text?
-        - If the student describes something NOT in the text (e.g., "looking in" vs "looking out"), mark it wrong.
-        OUTPUT FORMAT:
-        Score: [0-10]
-        Verdict: [Correct/Incorrect/Partially Correct]
-        Explanation: [1-2 sentences explaining why, citing the text]
         """
         messages = [
@@ -521,13 +514,18 @@ class LLMEvaluator:
         inputs = self.tokenizer(input_text, return_tensors="pt")
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=200,
-                temperature=0.1,
-                do_sample=True,
-                top_p=0.9
             )
         response = self.tokenizer.decode(
@@ -535,7 +533,6 @@ class LLMEvaluator:
             skip_special_tokens=True
         )
         return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------

         )
     def evaluate(self, context, question, student_answer):
+        # 3. STRICT PROMPT
+        system_prompt = """You are a strict academic grader.
+        Your goal is to check if the student's answer is supported by the context.
         RULES:
+        1. If the answer contradicts the context, score it 0-3.
+        2. If the answer describes things NOT in the text, mark it wrong.
+        3. Be direct. Do not repeat yourself."""
         user_prompt = f"""
+        ### CONTEXT:
         {context}
+        ### QUESTION:
         {question}
         ### STUDENT ANSWER:
         {student_answer}
+        ### TASK:
+        Grade the answer (0-10) and verify if it matches the text provided.
         """
         messages = [
         inputs = self.tokenizer(input_text, return_tensors="pt")
+        # 4. FIXED GENERATION PARAMETERS
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=200,
+                # [CRITICAL FIXES]
+                do_sample=False,          # Greedy Search (Faster, more deterministic)
+                repetition_penalty=1.2,   # Kills the "####. ####." loops
+                min_length=10,            # Forces it to write at least something
+                # Removed 'temperature' and 'top_p' because do_sample=False ignores them
             )
         response = self.tokenizer.decode(
             skip_special_tokens=True
         )
         return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------