Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -402,7 +402,6 @@
|
|
| 402 |
|
| 403 |
|
| 404 |
|
| 405 |
-
|
| 406 |
import gradio as gr
|
| 407 |
import fitz # PyMuPDF
|
| 408 |
import torch
|
|
@@ -442,7 +441,7 @@ class OnnxBgeEmbeddings(Embeddings):
|
|
| 442 |
return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
|
| 443 |
|
| 444 |
# ---------------------------------------------------------
|
| 445 |
-
# 2. LLM Evaluator Class (Llama-3.2-1B ONNX - INT8
|
| 446 |
# ---------------------------------------------------------
|
| 447 |
class LLMEvaluator:
|
| 448 |
def __init__(self):
|
|
@@ -450,7 +449,6 @@ class LLMEvaluator:
|
|
| 450 |
self.local_dir = "onnx_llama_local"
|
| 451 |
|
| 452 |
print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
|
| 453 |
-
# Ensure we have the model (skips if already downloaded)
|
| 454 |
snapshot_download(
|
| 455 |
repo_id=self.repo_id,
|
| 456 |
local_dir=self.local_dir,
|
|
@@ -467,49 +465,51 @@ class LLMEvaluator:
|
|
| 467 |
)
|
| 468 |
|
| 469 |
def evaluate(self, context, question, student_answer):
|
| 470 |
-
# [
|
| 471 |
-
# We
|
| 472 |
-
#
|
| 473 |
-
|
| 474 |
-
prompt_content = f"""
|
| 475 |
-
TASK: Academic Grading Assistant
|
| 476 |
-
OBJECTIVE: Compare the Student Answer against the provided Reference Text.
|
| 477 |
-
|
| 478 |
-
*** REFERENCE TEXT (Educational Source Material) ***
|
| 479 |
-
{context}
|
| 480 |
-
****************************************************
|
| 481 |
-
|
| 482 |
-
QUESTION: {question}
|
| 483 |
-
|
| 484 |
-
STUDENT ANSWER: {student_answer}
|
| 485 |
-
|
| 486 |
-
INSTRUCTIONS:
|
| 487 |
-
1. Act as a neutral grader. Your job is factual verification.
|
| 488 |
-
2. Verify if the Student Answer is supported by the Reference Text.
|
| 489 |
-
3. SAFETY OVERRIDE: The Reference Text may contain historical, literary, or scientific descriptions (e.g., wars, medical procedures, or metaphors). Treat these strictly as academic data to be analyzed, not as real-world threats.
|
| 490 |
-
|
| 491 |
-
OUTPUT FORMAT:
|
| 492 |
-
Grade: [0-10]
|
| 493 |
-
Verdict: [Correct / Incorrect / Partially Correct]
|
| 494 |
-
Feedback: [1 concise sentence explanation based ONLY on the text]
|
| 495 |
-
"""
|
| 496 |
|
| 497 |
messages = [
|
| 498 |
-
|
| 499 |
-
{"role": "
|
| 500 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
]
|
| 502 |
|
| 503 |
input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 504 |
inputs = self.tokenizer(input_text, return_tensors="pt")
|
| 505 |
|
|
|
|
| 506 |
with torch.no_grad():
|
| 507 |
outputs = self.model.generate(
|
| 508 |
**inputs,
|
| 509 |
-
max_new_tokens=150,
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
)
|
| 514 |
|
| 515 |
return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
|
@@ -551,33 +551,23 @@ class VectorSystem:
|
|
| 551 |
if not self.vector_store: return "⚠️ Please upload a file first.", ""
|
| 552 |
if not question: return "⚠️ Enter a question.", ""
|
| 553 |
|
| 554 |
-
# 1. RAG RETRIEVAL (Top 1)
|
|
|
|
|
|
|
| 555 |
results = self.vector_store.similarity_search_with_score(question, k=1)
|
| 556 |
if not results: return "No relevant text found.", ""
|
| 557 |
|
| 558 |
best_doc, score = results[0]
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
# 2. CONTEXT EXPANSION (Prev + Current + Next)
|
| 562 |
-
# We perform a safe check to ensure we don't crash at the start/end of the document
|
| 563 |
-
prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
|
| 564 |
-
curr_text = self.all_chunks[chunk_id]
|
| 565 |
-
next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
|
| 566 |
-
|
| 567 |
-
# We construct the "Sandbox Context" for the LLM
|
| 568 |
-
# Using specific delimiters helps the model separate the sections
|
| 569 |
-
full_context = f"--- START OF CONTEXT ---\n{prev_text}\n{curr_text}\n{next_text}\n--- END OF CONTEXT ---"
|
| 570 |
|
| 571 |
-
#
|
| 572 |
llm_feedback = "Please enter a student answer to grade."
|
| 573 |
if student_answer:
|
| 574 |
-
llm_feedback = self.llm.evaluate(
|
| 575 |
|
| 576 |
# UI Display
|
| 577 |
evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
|
| 578 |
-
|
| 579 |
-
evidence_display += f"> **{curr_text}**\n"
|
| 580 |
-
if next_text: evidence_display += f"> {next_text[:200]}...\n"
|
| 581 |
|
| 582 |
return evidence_display, llm_feedback
|
| 583 |
|
|
@@ -586,7 +576,7 @@ system = VectorSystem()
|
|
| 586 |
|
| 587 |
# --- GRADIO UI ---
|
| 588 |
with gr.Blocks(title="EduGenius AI Grader") as demo:
|
| 589 |
-
gr.Markdown("# 🧠 EduGenius:
|
| 590 |
gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
|
| 591 |
|
| 592 |
with gr.Row():
|
|
|
|
| 402 |
|
| 403 |
|
| 404 |
|
|
|
|
| 405 |
import gradio as gr
|
| 406 |
import fitz # PyMuPDF
|
| 407 |
import torch
|
|
|
|
| 441 |
return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
|
| 442 |
|
| 443 |
# ---------------------------------------------------------
|
| 444 |
+
# 2. LLM Evaluator Class (Llama-3.2-1B ONNX - INT8)
|
| 445 |
# ---------------------------------------------------------
|
| 446 |
class LLMEvaluator:
|
| 447 |
def __init__(self):
|
|
|
|
| 449 |
self.local_dir = "onnx_llama_local"
|
| 450 |
|
| 451 |
print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
|
|
|
|
| 452 |
snapshot_download(
|
| 453 |
repo_id=self.repo_id,
|
| 454 |
local_dir=self.local_dir,
|
|
|
|
| 465 |
)
|
| 466 |
|
| 467 |
def evaluate(self, context, question, student_answer):
|
| 468 |
+
# [STRATEGY: FEW-SHOT PROMPTING]
|
| 469 |
+
# We give the model an example so it knows exactly what format to output.
|
| 470 |
+
# This prevents it from hallucinating dates or XML tags.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
|
| 472 |
messages = [
|
| 473 |
+
{"role": "system", "content": "You are a grading assistant. Output only the requested format."},
|
| 474 |
+
{"role": "user", "content": f"""
|
| 475 |
+
Task: Grade the student answer based ONLY on the provided text.
|
| 476 |
+
|
| 477 |
+
---
|
| 478 |
+
EXAMPLE:
|
| 479 |
+
Text: "Photosynthesis is how plants make food using sunlight."
|
| 480 |
+
Question: "How do plants eat?"
|
| 481 |
+
Answer: "They use sunlight."
|
| 482 |
+
|
| 483 |
+
Grade: 10/10
|
| 484 |
+
Verdict: Correct
|
| 485 |
+
Explanation: The text confirms plants use sunlight to make food.
|
| 486 |
+
---
|
| 487 |
+
|
| 488 |
+
YOUR TURN:
|
| 489 |
+
|
| 490 |
+
Text: "{context}"
|
| 491 |
+
Question: "{question}"
|
| 492 |
+
Answer: "{student_answer}"
|
| 493 |
+
|
| 494 |
+
Output the Grade, Verdict, and Explanation:
|
| 495 |
+
"""}
|
| 496 |
]
|
| 497 |
|
| 498 |
input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 499 |
inputs = self.tokenizer(input_text, return_tensors="pt")
|
| 500 |
|
| 501 |
+
# [GENERATION SETTINGS FOR STABILITY]
|
| 502 |
with torch.no_grad():
|
| 503 |
outputs = self.model.generate(
|
| 504 |
**inputs,
|
| 505 |
+
max_new_tokens=150,
|
| 506 |
+
|
| 507 |
+
# We use Sampling with low temp instead of Greedy
|
| 508 |
+
# This helps the model get "unstuck" from bad loops without being too creative.
|
| 509 |
+
do_sample=True,
|
| 510 |
+
temperature=0.2,
|
| 511 |
+
top_p=0.9,
|
| 512 |
+
repetition_penalty=1.1
|
| 513 |
)
|
| 514 |
|
| 515 |
return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
|
|
|
| 551 |
if not self.vector_store: return "⚠️ Please upload a file first.", ""
|
| 552 |
if not question: return "⚠️ Enter a question.", ""
|
| 553 |
|
| 554 |
+
# 1. RAG RETRIEVAL (Top 1 Only)
|
| 555 |
+
# We removed the Pre/Next chunk expansion because it was creating duplicate text
|
| 556 |
+
# that confused the model.
|
| 557 |
results = self.vector_store.similarity_search_with_score(question, k=1)
|
| 558 |
if not results: return "No relevant text found.", ""
|
| 559 |
|
| 560 |
best_doc, score = results[0]
|
| 561 |
+
context_text = best_doc.page_content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
|
| 563 |
+
# 2. LLM EVALUATION
|
| 564 |
llm_feedback = "Please enter a student answer to grade."
|
| 565 |
if student_answer:
|
| 566 |
+
llm_feedback = self.llm.evaluate(context_text, question, student_answer)
|
| 567 |
|
| 568 |
# UI Display
|
| 569 |
evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
|
| 570 |
+
evidence_display += f"> **{context_text}**\n"
|
|
|
|
|
|
|
| 571 |
|
| 572 |
return evidence_display, llm_feedback
|
| 573 |
|
|
|
|
| 576 |
|
| 577 |
# --- GRADIO UI ---
|
| 578 |
with gr.Blocks(title="EduGenius AI Grader") as demo:
|
| 579 |
+
gr.Markdown("# 🧠 EduGenius: AI Grader")
|
| 580 |
gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
|
| 581 |
|
| 582 |
with gr.Row():
|