Spaces:

heerjtdev
/

answer_feedback

Sleeping

App Files Files Community

heerjtdev commited on Feb 3

Commit

48130d6

verified ·

1 Parent(s): 76cfdea

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -87

app.py CHANGED Viewed

@@ -450,89 +450,61 @@ class LLMEvaluator:
         self.local_dir = "onnx_llama_local"
         print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
         print(f"📥 Downloading Int8 model to {self.local_dir}...")
         snapshot_download(
             repo_id=self.repo_id,
             local_dir=self.local_dir,
             local_dir_use_symlinks=False,
             allow_patterns=[
-                "config.json",
-                "generation_config.json",
-                "tokenizer*",
-                "special_tokens_map.json",
-                "*.jinja",
-                "onnx/model_quantized.onnx"
             ]
         )
         print("✅ Download complete.")
         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
         self.model = ORTModelForCausalLM.from_pretrained(
             self.local_dir,
             subfolder="onnx",
-            file_name="model_quantized.onnx",
             use_cache=True,
             use_io_binding=False
         )
     def evaluate(self, context, question, student_answer):
-        # 3. STRICT PROMPT
-        system_prompt = """You are a strict academic grader.
-        Your goal is to check if the student's answer is supported by the context.
-        RULES:
-        1. If the answer contradicts the context, score it 0-3.
-        2. If the answer describes things NOT in the text, mark it wrong.
-        3. Be direct. Do not repeat yourself."""
-        user_prompt = f"""
-        ### CONTEXT:
-        {context}
-        ### QUESTION:
-        {question}
-        ### STUDENT ANSWER:
-        {student_answer}
-        ### TASK:
-        Grade the answer (0-10) and verify if it matches the text provided.
-        """
         messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt}
         ]
-        input_text = self.tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True
-        )
         inputs = self.tokenizer(input_text, return_tensors="pt")
-        # 4. FIXED GENERATION PARAMETERS
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
-                max_new_tokens=200,
-                # [CRITICAL FIXES]
-                do_sample=False,          # Greedy Search (Faster, more deterministic)
-                repetition_penalty=1.2,   # Kills the "####. ####." loops
-                min_length=10,            # Forces it to write at least something
-                # Removed 'temperature' and 'top_p' because do_sample=False ignores them
             )
-        response = self.tokenizer.decode(
-            outputs[0][inputs.input_ids.shape[1]:],
-            skip_special_tokens=True
-        )
-        return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------
@@ -560,7 +532,6 @@ class VectorSystem:
             if not self.all_chunks: return "File empty."
-            # We store the ID to look up neighbors later
             metadatas = [{"id": i} for i in range(len(self.all_chunks))]
             self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
             return f"✅ Indexed {len(self.all_chunks)} chunks."
@@ -571,47 +542,34 @@ class VectorSystem:
         if not self.vector_store: return "⚠️ Please upload a file first.", ""
         if not question: return "⚠️ Enter a question.", ""
-        # 1. RAG RETRIEVAL - Get ONLY the Top 1 Best Match
         results = self.vector_store.similarity_search_with_score(question, k=1)
-        if not results:
-            return "No relevant text found.", ""
-        # Get the ID of the best chunk
         best_doc, score = results[0]
         chunk_id = best_doc.metadata['id']
-        # 2. CONTEXT EXPANSION (Neighboring Chunks)
-        # We retrieve Preceding + Current + Succeeding to repair cut-off sentences.
-        # Get Preceding Chunk (if not at start)
         prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
-        # Get Current Chunk
         curr_text = self.all_chunks[chunk_id]
-        # Get Succeeding Chunk (if not at end)
         next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
-        # Join them into one solid block of text for the LLM
-        context_text = f"{prev_text}\n\n{curr_text}\n\n{next_text}"
-        # 3. UI DISPLAY
-        # We format this nicely so the user knows what part is the "Core Match"
-        evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
-        if prev_text:
-            evidence_display += f"> **PREVIOUS CONTEXT:**\n...{prev_text[-400:]}\n\n" # Show last 400 chars
-        evidence_display += f"> **CORE MATCH:**\n**{curr_text}**\n\n"
-        if next_text:
-            evidence_display += f"> **NEXT CONTEXT:**\n{next_text[:400]}...\n" # Show first 400 chars
-        # 4. LLM EVALUATION
         llm_feedback = "Please enter a student answer to grade."
         if student_answer:
-            llm_feedback = self.llm.evaluate(context_text, question, student_answer)
         return evidence_display, llm_feedback
@@ -621,8 +579,7 @@ system = VectorSystem()
 # --- GRADIO UI ---
 with gr.Blocks(title="EduGenius AI Grader") as demo:
     gr.Markdown("# 🧠 EduGenius: RAG + LLM Grading")
-    gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
     with gr.Row():
         with gr.Column(scale=1):
             pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")
@@ -635,8 +592,8 @@ with gr.Blocks(title="EduGenius AI Grader") as demo:
             run_btn = gr.Button("Retrieve & Grade", variant="secondary")
             with gr.Row():
-                evidence_box = gr.Markdown(label="Context Used for Grading")
-                grade_box = gr.Markdown(label="LLM Evaluation Result")
     upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
     run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])

         self.local_dir = "onnx_llama_local"
         print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
         print(f"📥 Downloading Int8 model to {self.local_dir}...")
         snapshot_download(
             repo_id=self.repo_id,
             local_dir=self.local_dir,
             local_dir_use_symlinks=False,
             allow_patterns=[
+                "config.json", "generation_config.json", "tokenizer*",
+                "special_tokens_map.json", "*.jinja", "onnx/model_quantized.onnx"
             ]
         )
         print("✅ Download complete.")
         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
         self.model = ORTModelForCausalLM.from_pretrained(
             self.local_dir,
             subfolder="onnx",
+            file_name="model_quantized.onnx",
             use_cache=True,
             use_io_binding=False
         )
     def evaluate(self, context, question, student_answer):
+        # SIMPLIFIED PROMPT (Easier for 1B model to follow)
         messages = [
+            {"role": "system", "content": "You are a strict teacher. Grade the answer based ONLY on the text provided."},
+            {"role": "user", "content": f"""
+            SOURCE TEXT:
+            {context}
+            QUESTION: {question}
+            ANSWER: {student_answer}
+            TASK:
+            1. Does the answer match the Source Text? (Yes/No)
+            2. Score (0-10)
+            3. Explanation (1 sentence)
+            """}
         ]
+        input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = self.tokenizer(input_text, return_tensors="pt")
+        # [CRITICAL FIX] GENERATION SETTINGS
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
+                max_new_tokens=150,       # Keep it short
+                do_sample=False,          # Greedy decoding (No randomness)
+                repetition_penalty=1.3,   # STRONG penalty to kill loops like "The The The"
+                min_length=5              # Force it to start speaking
             )
+        return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------
             if not self.all_chunks: return "File empty."
             metadatas = [{"id": i} for i in range(len(self.all_chunks))]
             self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
             return f"✅ Indexed {len(self.all_chunks)} chunks."
         if not self.vector_store: return "⚠️ Please upload a file first.", ""
         if not question: return "⚠️ Enter a question.", ""
+        # 1. RAG RETRIEVAL
         results = self.vector_store.similarity_search_with_score(question, k=1)
+        if not results: return "No relevant text found.", ""
         best_doc, score = results[0]
         chunk_id = best_doc.metadata['id']
+        # 2. SMART CONTEXT MERGING (Fixes the "Double Text" bug)
+        # We manually overlap checking is hard, so we just grab the raw text range if possible
+        # Simple fix: Concatenate with a separator to break the loop
         prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
         curr_text = self.all_chunks[chunk_id]
         next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
+        # [FIX] We use "..." to separate them clearly for the LLM
+        full_context = f"PREVIOUS: {prev_text}\n...\nFOCUS: {curr_text}\n...\nNEXT: {next_text}"
+        # 3. LLM EVALUATION
         llm_feedback = "Please enter a student answer to grade."
         if student_answer:
+            llm_feedback = self.llm.evaluate(full_context, question, student_answer)
+        # UI Display
+        evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
+        evidence_display += f"> **PREVIOUS:** ...{prev_text[-300:]}\n\n"
+        evidence_display += f"> **CORE:** **{curr_text}**\n\n"
+        evidence_display += f"> **NEXT:** {next_text[:300]}...\n"
         return evidence_display, llm_feedback
 # --- GRADIO UI ---
 with gr.Blocks(title="EduGenius AI Grader") as demo:
     gr.Markdown("# 🧠 EduGenius: RAG + LLM Grading")
     with gr.Row():
         with gr.Column(scale=1):
             pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")
             run_btn = gr.Button("Retrieve & Grade", variant="secondary")
             with gr.Row():
+                evidence_box = gr.Markdown(label="Context Used")
+                grade_box = gr.Markdown(label="LLM Result")
     upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
     run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])