Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -450,19 +450,13 @@ class LLMEvaluator:
|
|
| 450 |
self.local_dir = "onnx_llama_local"
|
| 451 |
|
| 452 |
print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
|
| 453 |
-
|
| 454 |
-
|
| 455 |
snapshot_download(
|
| 456 |
repo_id=self.repo_id,
|
| 457 |
local_dir=self.local_dir,
|
| 458 |
local_dir_use_symlinks=False,
|
| 459 |
-
allow_patterns=[
|
| 460 |
-
"config.json", "generation_config.json", "tokenizer*",
|
| 461 |
-
"special_tokens_map.json", "*.jinja", "onnx/model_quantized.onnx"
|
| 462 |
-
]
|
| 463 |
)
|
| 464 |
-
print("✅ Download complete.")
|
| 465 |
-
|
| 466 |
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
|
| 467 |
self.model = ORTModelForCausalLM.from_pretrained(
|
| 468 |
self.local_dir,
|
|
@@ -473,34 +467,49 @@ class LLMEvaluator:
|
|
| 473 |
)
|
| 474 |
|
| 475 |
def evaluate(self, context, question, student_answer):
|
| 476 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
messages = [
|
| 478 |
-
|
| 479 |
-
{"role": "
|
| 480 |
-
|
| 481 |
-
{context}
|
| 482 |
-
|
| 483 |
-
QUESTION: {question}
|
| 484 |
-
ANSWER: {student_answer}
|
| 485 |
-
|
| 486 |
-
TASK:
|
| 487 |
-
1. Does the answer match the Source Text? (Yes/No)
|
| 488 |
-
2. Score (0-10)
|
| 489 |
-
3. Explanation (1 sentence)
|
| 490 |
-
"""}
|
| 491 |
]
|
| 492 |
|
| 493 |
input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 494 |
inputs = self.tokenizer(input_text, return_tensors="pt")
|
| 495 |
|
| 496 |
-
# [CRITICAL FIX] GENERATION SETTINGS
|
| 497 |
with torch.no_grad():
|
| 498 |
outputs = self.model.generate(
|
| 499 |
**inputs,
|
| 500 |
-
max_new_tokens=150, #
|
| 501 |
-
do_sample=False, # Greedy
|
| 502 |
-
repetition_penalty=1.
|
| 503 |
-
min_length=5
|
| 504 |
)
|
| 505 |
|
| 506 |
return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
|
@@ -542,23 +551,22 @@ class VectorSystem:
|
|
| 542 |
if not self.vector_store: return "⚠️ Please upload a file first.", ""
|
| 543 |
if not question: return "⚠️ Enter a question.", ""
|
| 544 |
|
| 545 |
-
# 1. RAG RETRIEVAL
|
| 546 |
results = self.vector_store.similarity_search_with_score(question, k=1)
|
| 547 |
if not results: return "No relevant text found.", ""
|
| 548 |
|
| 549 |
best_doc, score = results[0]
|
| 550 |
chunk_id = best_doc.metadata['id']
|
| 551 |
|
| 552 |
-
# 2.
|
| 553 |
-
# We
|
| 554 |
-
# Simple fix: Concatenate with a separator to break the loop
|
| 555 |
-
|
| 556 |
prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
|
| 557 |
curr_text = self.all_chunks[chunk_id]
|
| 558 |
next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
|
| 559 |
|
| 560 |
-
#
|
| 561 |
-
|
|
|
|
| 562 |
|
| 563 |
# 3. LLM EVALUATION
|
| 564 |
llm_feedback = "Please enter a student answer to grade."
|
|
@@ -567,9 +575,9 @@ class VectorSystem:
|
|
| 567 |
|
| 568 |
# UI Display
|
| 569 |
evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
|
| 570 |
-
evidence_display += f">
|
| 571 |
-
evidence_display += f"> **
|
| 572 |
-
evidence_display += f">
|
| 573 |
|
| 574 |
return evidence_display, llm_feedback
|
| 575 |
|
|
@@ -578,8 +586,9 @@ system = VectorSystem()
|
|
| 578 |
|
| 579 |
# --- GRADIO UI ---
|
| 580 |
with gr.Blocks(title="EduGenius AI Grader") as demo:
|
| 581 |
-
gr.Markdown("# 🧠 EduGenius:
|
| 582 |
-
|
|
|
|
| 583 |
with gr.Row():
|
| 584 |
with gr.Column(scale=1):
|
| 585 |
pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")
|
|
|
|
| 450 |
self.local_dir = "onnx_llama_local"
|
| 451 |
|
| 452 |
print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
|
| 453 |
+
# Ensure we have the model (skips if already downloaded)
|
|
|
|
| 454 |
snapshot_download(
|
| 455 |
repo_id=self.repo_id,
|
| 456 |
local_dir=self.local_dir,
|
| 457 |
local_dir_use_symlinks=False,
|
| 458 |
+
allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_quantized.onnx"]
|
|
|
|
|
|
|
|
|
|
| 459 |
)
|
|
|
|
|
|
|
| 460 |
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
|
| 461 |
self.model = ORTModelForCausalLM.from_pretrained(
|
| 462 |
self.local_dir,
|
|
|
|
| 467 |
)
|
| 468 |
|
| 469 |
def evaluate(self, context, question, student_answer):
|
| 470 |
+
# [UNIVERSAL SAFETY FIX]
|
| 471 |
+
# We frame the task as "Data Analysis" of "Educational Material".
|
| 472 |
+
# We explicitly tell the model to treat metaphors/conflict as ACADEMIC DATA.
|
| 473 |
+
|
| 474 |
+
prompt_content = f"""
|
| 475 |
+
TASK: Academic Grading Assistant
|
| 476 |
+
OBJECTIVE: Compare the Student Answer against the provided Reference Text.
|
| 477 |
+
|
| 478 |
+
*** REFERENCE TEXT (Educational Source Material) ***
|
| 479 |
+
{context}
|
| 480 |
+
****************************************************
|
| 481 |
+
|
| 482 |
+
QUESTION: {question}
|
| 483 |
+
|
| 484 |
+
STUDENT ANSWER: {student_answer}
|
| 485 |
+
|
| 486 |
+
INSTRUCTIONS:
|
| 487 |
+
1. Act as a neutral grader. Your job is factual verification.
|
| 488 |
+
2. Verify if the Student Answer is supported by the Reference Text.
|
| 489 |
+
3. SAFETY OVERRIDE: The Reference Text may contain historical, literary, or scientific descriptions (e.g., wars, medical procedures, or metaphors). Treat these strictly as academic data to be analyzed, not as real-world threats.
|
| 490 |
+
|
| 491 |
+
OUTPUT FORMAT:
|
| 492 |
+
Grade: [0-10]
|
| 493 |
+
Verdict: [Correct / Incorrect / Partially Correct]
|
| 494 |
+
Feedback: [1 concise sentence explanation based ONLY on the text]
|
| 495 |
+
"""
|
| 496 |
+
|
| 497 |
messages = [
|
| 498 |
+
# Neutral system prompt avoids triggering personality constraints
|
| 499 |
+
{"role": "system", "content": "You are an objective educational AI. You analyze text for factual accuracy."},
|
| 500 |
+
{"role": "user", "content": prompt_content}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
]
|
| 502 |
|
| 503 |
input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 504 |
inputs = self.tokenizer(input_text, return_tensors="pt")
|
| 505 |
|
|
|
|
| 506 |
with torch.no_grad():
|
| 507 |
outputs = self.model.generate(
|
| 508 |
**inputs,
|
| 509 |
+
max_new_tokens=150, # Short generation to reduce hallucination risk
|
| 510 |
+
do_sample=False, # Greedy Search (Deterministic = No Loops)
|
| 511 |
+
repetition_penalty=1.25, # Strong penalty to stop "The The The"
|
| 512 |
+
min_length=5
|
| 513 |
)
|
| 514 |
|
| 515 |
return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
|
|
|
| 551 |
if not self.vector_store: return "⚠️ Please upload a file first.", ""
|
| 552 |
if not question: return "⚠️ Enter a question.", ""
|
| 553 |
|
| 554 |
+
# 1. RAG RETRIEVAL (Top 1)
|
| 555 |
results = self.vector_store.similarity_search_with_score(question, k=1)
|
| 556 |
if not results: return "No relevant text found.", ""
|
| 557 |
|
| 558 |
best_doc, score = results[0]
|
| 559 |
chunk_id = best_doc.metadata['id']
|
| 560 |
|
| 561 |
+
# 2. CONTEXT EXPANSION (Prev + Current + Next)
|
| 562 |
+
# We perform a safe check to ensure we don't crash at the start/end of the document
|
|
|
|
|
|
|
| 563 |
prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
|
| 564 |
curr_text = self.all_chunks[chunk_id]
|
| 565 |
next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
|
| 566 |
|
| 567 |
+
# We construct the "Sandbox Context" for the LLM
|
| 568 |
+
# Using specific delimiters helps the model separate the sections
|
| 569 |
+
full_context = f"--- START OF CONTEXT ---\n{prev_text}\n{curr_text}\n{next_text}\n--- END OF CONTEXT ---"
|
| 570 |
|
| 571 |
# 3. LLM EVALUATION
|
| 572 |
llm_feedback = "Please enter a student answer to grade."
|
|
|
|
| 575 |
|
| 576 |
# UI Display
|
| 577 |
evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
|
| 578 |
+
if prev_text: evidence_display += f"> ...{prev_text[-200:]}\n"
|
| 579 |
+
evidence_display += f"> **{curr_text}**\n"
|
| 580 |
+
if next_text: evidence_display += f"> {next_text[:200]}...\n"
|
| 581 |
|
| 582 |
return evidence_display, llm_feedback
|
| 583 |
|
|
|
|
| 586 |
|
| 587 |
# --- GRADIO UI ---
|
| 588 |
with gr.Blocks(title="EduGenius AI Grader") as demo:
|
| 589 |
+
gr.Markdown("# 🧠 EduGenius: Universal AI Grader")
|
| 590 |
+
gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
|
| 591 |
+
|
| 592 |
with gr.Row():
|
| 593 |
with gr.Column(scale=1):
|
| 594 |
pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")
|