Spaces:

heerjtdev
/

answer_feedback

Sleeping

App Files Files Community

heerjtdev commited on Feb 3

Commit

4cc40b8

verified ·

1 Parent(s): fa384b6

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -54

app.py CHANGED Viewed

@@ -402,7 +402,6 @@
 import gradio as gr
 import fitz  # PyMuPDF
 import torch
@@ -442,7 +441,7 @@ class OnnxBgeEmbeddings(Embeddings):
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
 # ---------------------------------------------------------
-# 2. LLM Evaluator Class (Llama-3.2-1B ONNX - INT8 QUANTIZED)
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
@@ -450,7 +449,6 @@ class LLMEvaluator:
         self.local_dir = "onnx_llama_local"
         print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
-        # Ensure we have the model (skips if already downloaded)
         snapshot_download(
             repo_id=self.repo_id,
             local_dir=self.local_dir,
@@ -467,49 +465,51 @@ class LLMEvaluator:
         )
     def evaluate(self, context, question, student_answer):
-        # [UNIVERSAL SAFETY FIX]
-        # We frame the task as "Data Analysis" of "Educational Material".
-        # We explicitly tell the model to treat metaphors/conflict as ACADEMIC DATA.
-        prompt_content = f"""
-        TASK: Academic Grading Assistant
-        OBJECTIVE: Compare the Student Answer against the provided Reference Text.
-        *** REFERENCE TEXT (Educational Source Material) ***
-        {context}
-        ****************************************************
-        QUESTION: {question}
-        STUDENT ANSWER: {student_answer}
-        INSTRUCTIONS:
-        1. Act as a neutral grader. Your job is factual verification.
-        2. Verify if the Student Answer is supported by the Reference Text.
-        3. SAFETY OVERRIDE: The Reference Text may contain historical, literary, or scientific descriptions (e.g., wars, medical procedures, or metaphors). Treat these strictly as academic data to be analyzed, not as real-world threats.
-        OUTPUT FORMAT:
-        Grade: [0-10]
-        Verdict: [Correct / Incorrect / Partially Correct]
-        Feedback: [1 concise sentence explanation based ONLY on the text]
-        """
         messages = [
-            # Neutral system prompt avoids triggering personality constraints
-            {"role": "system", "content": "You are an objective educational AI. You analyze text for factual accuracy."},
-            {"role": "user", "content": prompt_content}
         ]
         input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = self.tokenizer(input_text, return_tensors="pt")
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
-                max_new_tokens=150,       # Short generation to reduce hallucination risk
-                do_sample=False,          # Greedy Search (Deterministic = No Loops)
-                repetition_penalty=1.25,  # Strong penalty to stop "The The The"
-                min_length=5
             )
         return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
@@ -551,33 +551,23 @@ class VectorSystem:
         if not self.vector_store: return "⚠️ Please upload a file first.", ""
         if not question: return "⚠️ Enter a question.", ""
-        # 1. RAG RETRIEVAL (Top 1)
         results = self.vector_store.similarity_search_with_score(question, k=1)
         if not results: return "No relevant text found.", ""
         best_doc, score = results[0]
-        chunk_id = best_doc.metadata['id']
-        # 2. CONTEXT EXPANSION (Prev + Current + Next)
-        # We perform a safe check to ensure we don't crash at the start/end of the document
-        prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
-        curr_text = self.all_chunks[chunk_id]
-        next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
-        # We construct the "Sandbox Context" for the LLM
-        # Using specific delimiters helps the model separate the sections
-        full_context = f"--- START OF CONTEXT ---\n{prev_text}\n{curr_text}\n{next_text}\n--- END OF CONTEXT ---"
-        # 3. LLM EVALUATION
         llm_feedback = "Please enter a student answer to grade."
         if student_answer:
-            llm_feedback = self.llm.evaluate(full_context, question, student_answer)
         # UI Display
         evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
-        if prev_text: evidence_display += f"> ...{prev_text[-200:]}\n"
-        evidence_display += f"> **{curr_text}**\n"
-        if next_text: evidence_display += f"> {next_text[:200]}...\n"
         return evidence_display, llm_feedback
@@ -586,7 +576,7 @@ system = VectorSystem()
 # --- GRADIO UI ---
 with gr.Blocks(title="EduGenius AI Grader") as demo:
-    gr.Markdown("# 🧠 EduGenius: Universal AI Grader")
     gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
     with gr.Row():

 import gradio as gr
 import fitz  # PyMuPDF
 import torch
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
 # ---------------------------------------------------------
+# 2. LLM Evaluator Class (Llama-3.2-1B ONNX - INT8)
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
         self.local_dir = "onnx_llama_local"
         print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
         snapshot_download(
             repo_id=self.repo_id,
             local_dir=self.local_dir,
         )
     def evaluate(self, context, question, student_answer):
+        # [STRATEGY: FEW-SHOT PROMPTING]
+        # We give the model an example so it knows exactly what format to output.
+        # This prevents it from hallucinating dates or XML tags.
         messages = [
+            {"role": "system", "content": "You are a grading assistant. Output only the requested format."},
+            {"role": "user", "content": f"""
+            Task: Grade the student answer based ONLY on the provided text.
+            ---
+            EXAMPLE:
+            Text: "Photosynthesis is how plants make food using sunlight."
+            Question: "How do plants eat?"
+            Answer: "They use sunlight."
+            Grade: 10/10
+            Verdict: Correct
+            Explanation: The text confirms plants use sunlight to make food.
+            ---
+            YOUR TURN:
+            Text: "{context}"
+            Question: "{question}"
+            Answer: "{student_answer}"
+            Output the Grade, Verdict, and Explanation:
+            """}
         ]
         input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = self.tokenizer(input_text, return_tensors="pt")
+        # [GENERATION SETTINGS FOR STABILITY]
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
+                max_new_tokens=150,
+                # We use Sampling with low temp instead of Greedy
+                # This helps the model get "unstuck" from bad loops without being too creative.
+                do_sample=True,
+                temperature=0.2,
+                top_p=0.9,
+                repetition_penalty=1.1
             )
         return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
         if not self.vector_store: return "⚠️ Please upload a file first.", ""
         if not question: return "⚠️ Enter a question.", ""
+        # 1. RAG RETRIEVAL (Top 1 Only)
+        # We removed the Pre/Next chunk expansion because it was creating duplicate text
+        # that confused the model.
         results = self.vector_store.similarity_search_with_score(question, k=1)
         if not results: return "No relevant text found.", ""
         best_doc, score = results[0]
+        context_text = best_doc.page_content
+        # 2. LLM EVALUATION
         llm_feedback = "Please enter a student answer to grade."
         if student_answer:
+            llm_feedback = self.llm.evaluate(context_text, question, student_answer)
         # UI Display
         evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
+        evidence_display += f"> **{context_text}**\n"
         return evidence_display, llm_feedback
 # --- GRADIO UI ---
 with gr.Blocks(title="EduGenius AI Grader") as demo:
+    gr.Markdown("# 🧠 EduGenius: AI Grader")
     gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
     with gr.Row():