Spaces:

heerjtdev
/

answer_feedback

Sleeping

App Files Files Community

heerjtdev commited on Feb 3

Commit

fa384b6

verified ·

1 Parent(s): 48130d6

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -39

app.py CHANGED Viewed

@@ -450,19 +450,13 @@ class LLMEvaluator:
         self.local_dir = "onnx_llama_local"
         print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
-        print(f"📥 Downloading Int8 model to {self.local_dir}...")
         snapshot_download(
             repo_id=self.repo_id,
             local_dir=self.local_dir,
             local_dir_use_symlinks=False,
-            allow_patterns=[
-                "config.json", "generation_config.json", "tokenizer*",
-                "special_tokens_map.json", "*.jinja", "onnx/model_quantized.onnx"
-            ]
         )
-        print("✅ Download complete.")
         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
         self.model = ORTModelForCausalLM.from_pretrained(
             self.local_dir,
@@ -473,34 +467,49 @@ class LLMEvaluator:
         )
     def evaluate(self, context, question, student_answer):
-        # SIMPLIFIED PROMPT (Easier for 1B model to follow)
         messages = [
-            {"role": "system", "content": "You are a strict teacher. Grade the answer based ONLY on the text provided."},
-            {"role": "user", "content": f"""
-            SOURCE TEXT:
-            {context}
-            QUESTION: {question}
-            ANSWER: {student_answer}
-            TASK:
-            1. Does the answer match the Source Text? (Yes/No)
-            2. Score (0-10)
-            3. Explanation (1 sentence)
-            """}
         ]
         input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = self.tokenizer(input_text, return_tensors="pt")
-        # [CRITICAL FIX] GENERATION SETTINGS
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
-                max_new_tokens=150,       # Keep it short
-                do_sample=False,          # Greedy decoding (No randomness)
-                repetition_penalty=1.3,   # STRONG penalty to kill loops like "The The The"
-                min_length=5              # Force it to start speaking
             )
         return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
@@ -542,23 +551,22 @@ class VectorSystem:
         if not self.vector_store: return "⚠️ Please upload a file first.", ""
         if not question: return "⚠️ Enter a question.", ""
-        # 1. RAG RETRIEVAL
         results = self.vector_store.similarity_search_with_score(question, k=1)
         if not results: return "No relevant text found.", ""
         best_doc, score = results[0]
         chunk_id = best_doc.metadata['id']
-        # 2. SMART CONTEXT MERGING (Fixes the "Double Text" bug)
-        # We manually overlap checking is hard, so we just grab the raw text range if possible
-        # Simple fix: Concatenate with a separator to break the loop
         prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
         curr_text = self.all_chunks[chunk_id]
         next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
-        # [FIX] We use "..." to separate them clearly for the LLM
-        full_context = f"PREVIOUS: {prev_text}\n...\nFOCUS: {curr_text}\n...\nNEXT: {next_text}"
         # 3. LLM EVALUATION
         llm_feedback = "Please enter a student answer to grade."
@@ -567,9 +575,9 @@ class VectorSystem:
         # UI Display
         evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
-        evidence_display += f"> **PREVIOUS:** ...{prev_text[-300:]}\n\n"
-        evidence_display += f"> **CORE:** **{curr_text}**\n\n"
-        evidence_display += f"> **NEXT:** {next_text[:300]}...\n"
         return evidence_display, llm_feedback
@@ -578,8 +586,9 @@ system = VectorSystem()
 # --- GRADIO UI ---
 with gr.Blocks(title="EduGenius AI Grader") as demo:
-    gr.Markdown("# 🧠 EduGenius: RAG + LLM Grading")
     with gr.Row():
         with gr.Column(scale=1):
             pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")

         self.local_dir = "onnx_llama_local"
         print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
+        # Ensure we have the model (skips if already downloaded)
         snapshot_download(
             repo_id=self.repo_id,
             local_dir=self.local_dir,
             local_dir_use_symlinks=False,
+            allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_quantized.onnx"]
         )
         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
         self.model = ORTModelForCausalLM.from_pretrained(
             self.local_dir,
         )
     def evaluate(self, context, question, student_answer):
+        # [UNIVERSAL SAFETY FIX]
+        # We frame the task as "Data Analysis" of "Educational Material".
+        # We explicitly tell the model to treat metaphors/conflict as ACADEMIC DATA.
+        prompt_content = f"""
+        TASK: Academic Grading Assistant
+        OBJECTIVE: Compare the Student Answer against the provided Reference Text.
+        *** REFERENCE TEXT (Educational Source Material) ***
+        {context}
+        ****************************************************
+        QUESTION: {question}
+        STUDENT ANSWER: {student_answer}
+        INSTRUCTIONS:
+        1. Act as a neutral grader. Your job is factual verification.
+        2. Verify if the Student Answer is supported by the Reference Text.
+        3. SAFETY OVERRIDE: The Reference Text may contain historical, literary, or scientific descriptions (e.g., wars, medical procedures, or metaphors). Treat these strictly as academic data to be analyzed, not as real-world threats.
+        OUTPUT FORMAT:
+        Grade: [0-10]
+        Verdict: [Correct / Incorrect / Partially Correct]
+        Feedback: [1 concise sentence explanation based ONLY on the text]
+        """
         messages = [
+            # Neutral system prompt avoids triggering personality constraints
+            {"role": "system", "content": "You are an objective educational AI. You analyze text for factual accuracy."},
+            {"role": "user", "content": prompt_content}
         ]
         input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = self.tokenizer(input_text, return_tensors="pt")
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
+                max_new_tokens=150,       # Short generation to reduce hallucination risk
+                do_sample=False,          # Greedy Search (Deterministic = No Loops)
+                repetition_penalty=1.25,  # Strong penalty to stop "The The The"
+                min_length=5
             )
         return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
         if not self.vector_store: return "⚠️ Please upload a file first.", ""
         if not question: return "⚠️ Enter a question.", ""
+        # 1. RAG RETRIEVAL (Top 1)
         results = self.vector_store.similarity_search_with_score(question, k=1)
         if not results: return "No relevant text found.", ""
         best_doc, score = results[0]
         chunk_id = best_doc.metadata['id']
+        # 2. CONTEXT EXPANSION (Prev + Current + Next)
+        # We perform a safe check to ensure we don't crash at the start/end of the document
         prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
         curr_text = self.all_chunks[chunk_id]
         next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
+        # We construct the "Sandbox Context" for the LLM
+        # Using specific delimiters helps the model separate the sections
+        full_context = f"--- START OF CONTEXT ---\n{prev_text}\n{curr_text}\n{next_text}\n--- END OF CONTEXT ---"
         # 3. LLM EVALUATION
         llm_feedback = "Please enter a student answer to grade."
         # UI Display
         evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
+        if prev_text: evidence_display += f"> ...{prev_text[-200:]}\n"
+        evidence_display += f"> **{curr_text}**\n"
+        if next_text: evidence_display += f"> {next_text[:200]}...\n"
         return evidence_display, llm_feedback
 # --- GRADIO UI ---
 with gr.Blocks(title="EduGenius AI Grader") as demo:
+    gr.Markdown("# 🧠 EduGenius: Universal AI Grader")
+    gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
     with gr.Row():
         with gr.Column(scale=1):
             pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")