Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -450,89 +450,61 @@ class LLMEvaluator:
|
|
| 450 |
self.local_dir = "onnx_llama_local"
|
| 451 |
|
| 452 |
print(f"π Preparing LLM: {self.repo_id} (Int8 Quantized)...")
|
| 453 |
-
|
| 454 |
print(f"π₯ Downloading Int8 model to {self.local_dir}...")
|
|
|
|
| 455 |
snapshot_download(
|
| 456 |
repo_id=self.repo_id,
|
| 457 |
local_dir=self.local_dir,
|
| 458 |
local_dir_use_symlinks=False,
|
| 459 |
allow_patterns=[
|
| 460 |
-
"config.json",
|
| 461 |
-
"
|
| 462 |
-
"tokenizer*",
|
| 463 |
-
"special_tokens_map.json",
|
| 464 |
-
"*.jinja",
|
| 465 |
-
"onnx/model_quantized.onnx"
|
| 466 |
]
|
| 467 |
)
|
| 468 |
print("β
Download complete.")
|
| 469 |
|
| 470 |
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
|
| 471 |
-
|
| 472 |
self.model = ORTModelForCausalLM.from_pretrained(
|
| 473 |
self.local_dir,
|
| 474 |
subfolder="onnx",
|
| 475 |
-
file_name="model_quantized.onnx",
|
| 476 |
use_cache=True,
|
| 477 |
use_io_binding=False
|
| 478 |
)
|
| 479 |
|
| 480 |
def evaluate(self, context, question, student_answer):
|
| 481 |
-
#
|
| 482 |
-
system_prompt = """You are a strict academic grader.
|
| 483 |
-
Your goal is to check if the student's answer is supported by the context.
|
| 484 |
-
|
| 485 |
-
RULES:
|
| 486 |
-
1. If the answer contradicts the context, score it 0-3.
|
| 487 |
-
2. If the answer describes things NOT in the text, mark it wrong.
|
| 488 |
-
3. Be direct. Do not repeat yourself."""
|
| 489 |
-
|
| 490 |
-
user_prompt = f"""
|
| 491 |
-
### CONTEXT:
|
| 492 |
-
{context}
|
| 493 |
-
|
| 494 |
-
### QUESTION:
|
| 495 |
-
{question}
|
| 496 |
-
|
| 497 |
-
### STUDENT ANSWER:
|
| 498 |
-
{student_answer}
|
| 499 |
-
|
| 500 |
-
### TASK:
|
| 501 |
-
Grade the answer (0-10) and verify if it matches the text provided.
|
| 502 |
-
"""
|
| 503 |
-
|
| 504 |
messages = [
|
| 505 |
-
{"role": "system", "content":
|
| 506 |
-
{"role": "user", "content":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
]
|
| 508 |
|
| 509 |
-
input_text = self.tokenizer.apply_chat_template(
|
| 510 |
-
messages,
|
| 511 |
-
tokenize=False,
|
| 512 |
-
add_generation_prompt=True
|
| 513 |
-
)
|
| 514 |
-
|
| 515 |
inputs = self.tokenizer(input_text, return_tensors="pt")
|
| 516 |
|
| 517 |
-
#
|
| 518 |
with torch.no_grad():
|
| 519 |
outputs = self.model.generate(
|
| 520 |
**inputs,
|
| 521 |
-
max_new_tokens=
|
| 522 |
-
|
| 523 |
-
#
|
| 524 |
-
|
| 525 |
-
repetition_penalty=1.2, # Kills the "####. ####." loops
|
| 526 |
-
min_length=10, # Forces it to write at least something
|
| 527 |
-
|
| 528 |
-
# Removed 'temperature' and 'top_p' because do_sample=False ignores them
|
| 529 |
)
|
| 530 |
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
skip_special_tokens=True
|
| 534 |
-
)
|
| 535 |
-
return response
|
| 536 |
# ---------------------------------------------------------
|
| 537 |
# 3. Main Application Logic
|
| 538 |
# ---------------------------------------------------------
|
|
@@ -560,7 +532,6 @@ class VectorSystem:
|
|
| 560 |
|
| 561 |
if not self.all_chunks: return "File empty."
|
| 562 |
|
| 563 |
-
# We store the ID to look up neighbors later
|
| 564 |
metadatas = [{"id": i} for i in range(len(self.all_chunks))]
|
| 565 |
self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
|
| 566 |
return f"β
Indexed {len(self.all_chunks)} chunks."
|
|
@@ -571,47 +542,34 @@ class VectorSystem:
|
|
| 571 |
if not self.vector_store: return "β οΈ Please upload a file first.", ""
|
| 572 |
if not question: return "β οΈ Enter a question.", ""
|
| 573 |
|
| 574 |
-
# 1. RAG RETRIEVAL
|
| 575 |
results = self.vector_store.similarity_search_with_score(question, k=1)
|
| 576 |
-
|
| 577 |
-
if not results:
|
| 578 |
-
return "No relevant text found.", ""
|
| 579 |
|
| 580 |
-
# Get the ID of the best chunk
|
| 581 |
best_doc, score = results[0]
|
| 582 |
chunk_id = best_doc.metadata['id']
|
| 583 |
|
| 584 |
-
# 2. CONTEXT
|
| 585 |
-
# We
|
|
|
|
| 586 |
|
| 587 |
-
# Get Preceding Chunk (if not at start)
|
| 588 |
prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
|
| 589 |
-
|
| 590 |
-
# Get Current Chunk
|
| 591 |
curr_text = self.all_chunks[chunk_id]
|
| 592 |
-
|
| 593 |
-
# Get Succeeding Chunk (if not at end)
|
| 594 |
next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
|
| 595 |
|
| 596 |
-
#
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
# 3. UI DISPLAY
|
| 600 |
-
# We format this nicely so the user knows what part is the "Core Match"
|
| 601 |
-
evidence_display = f"### π― Best Match (Score: {score:.4f})\n"
|
| 602 |
-
|
| 603 |
-
if prev_text:
|
| 604 |
-
evidence_display += f"> **PREVIOUS CONTEXT:**\n...{prev_text[-400:]}\n\n" # Show last 400 chars
|
| 605 |
-
|
| 606 |
-
evidence_display += f"> **CORE MATCH:**\n**{curr_text}**\n\n"
|
| 607 |
-
|
| 608 |
-
if next_text:
|
| 609 |
-
evidence_display += f"> **NEXT CONTEXT:**\n{next_text[:400]}...\n" # Show first 400 chars
|
| 610 |
|
| 611 |
-
#
|
| 612 |
llm_feedback = "Please enter a student answer to grade."
|
| 613 |
if student_answer:
|
| 614 |
-
llm_feedback = self.llm.evaluate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 615 |
|
| 616 |
return evidence_display, llm_feedback
|
| 617 |
|
|
@@ -621,8 +579,7 @@ system = VectorSystem()
|
|
| 621 |
# --- GRADIO UI ---
|
| 622 |
with gr.Blocks(title="EduGenius AI Grader") as demo:
|
| 623 |
gr.Markdown("# π§ EduGenius: RAG + LLM Grading")
|
| 624 |
-
|
| 625 |
-
|
| 626 |
with gr.Row():
|
| 627 |
with gr.Column(scale=1):
|
| 628 |
pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")
|
|
@@ -635,8 +592,8 @@ with gr.Blocks(title="EduGenius AI Grader") as demo:
|
|
| 635 |
run_btn = gr.Button("Retrieve & Grade", variant="secondary")
|
| 636 |
|
| 637 |
with gr.Row():
|
| 638 |
-
evidence_box = gr.Markdown(label="Context Used
|
| 639 |
-
grade_box = gr.Markdown(label="LLM
|
| 640 |
|
| 641 |
upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
|
| 642 |
run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])
|
|
|
|
| 450 |
self.local_dir = "onnx_llama_local"
|
| 451 |
|
| 452 |
print(f"π Preparing LLM: {self.repo_id} (Int8 Quantized)...")
|
|
|
|
| 453 |
print(f"π₯ Downloading Int8 model to {self.local_dir}...")
|
| 454 |
+
|
| 455 |
snapshot_download(
|
| 456 |
repo_id=self.repo_id,
|
| 457 |
local_dir=self.local_dir,
|
| 458 |
local_dir_use_symlinks=False,
|
| 459 |
allow_patterns=[
|
| 460 |
+
"config.json", "generation_config.json", "tokenizer*",
|
| 461 |
+
"special_tokens_map.json", "*.jinja", "onnx/model_quantized.onnx"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
]
|
| 463 |
)
|
| 464 |
print("β
Download complete.")
|
| 465 |
|
| 466 |
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
|
|
|
|
| 467 |
self.model = ORTModelForCausalLM.from_pretrained(
|
| 468 |
self.local_dir,
|
| 469 |
subfolder="onnx",
|
| 470 |
+
file_name="model_quantized.onnx",
|
| 471 |
use_cache=True,
|
| 472 |
use_io_binding=False
|
| 473 |
)
|
| 474 |
|
| 475 |
def evaluate(self, context, question, student_answer):
|
| 476 |
+
# SIMPLIFIED PROMPT (Easier for 1B model to follow)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
messages = [
|
| 478 |
+
{"role": "system", "content": "You are a strict teacher. Grade the answer based ONLY on the text provided."},
|
| 479 |
+
{"role": "user", "content": f"""
|
| 480 |
+
SOURCE TEXT:
|
| 481 |
+
{context}
|
| 482 |
+
|
| 483 |
+
QUESTION: {question}
|
| 484 |
+
ANSWER: {student_answer}
|
| 485 |
+
|
| 486 |
+
TASK:
|
| 487 |
+
1. Does the answer match the Source Text? (Yes/No)
|
| 488 |
+
2. Score (0-10)
|
| 489 |
+
3. Explanation (1 sentence)
|
| 490 |
+
"""}
|
| 491 |
]
|
| 492 |
|
| 493 |
+
input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
inputs = self.tokenizer(input_text, return_tensors="pt")
|
| 495 |
|
| 496 |
+
# [CRITICAL FIX] GENERATION SETTINGS
|
| 497 |
with torch.no_grad():
|
| 498 |
outputs = self.model.generate(
|
| 499 |
**inputs,
|
| 500 |
+
max_new_tokens=150, # Keep it short
|
| 501 |
+
do_sample=False, # Greedy decoding (No randomness)
|
| 502 |
+
repetition_penalty=1.3, # STRONG penalty to kill loops like "The The The"
|
| 503 |
+
min_length=5 # Force it to start speaking
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
)
|
| 505 |
|
| 506 |
+
return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
| 507 |
+
|
|
|
|
|
|
|
|
|
|
| 508 |
# ---------------------------------------------------------
|
| 509 |
# 3. Main Application Logic
|
| 510 |
# ---------------------------------------------------------
|
|
|
|
| 532 |
|
| 533 |
if not self.all_chunks: return "File empty."
|
| 534 |
|
|
|
|
| 535 |
metadatas = [{"id": i} for i in range(len(self.all_chunks))]
|
| 536 |
self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
|
| 537 |
return f"β
Indexed {len(self.all_chunks)} chunks."
|
|
|
|
| 542 |
if not self.vector_store: return "β οΈ Please upload a file first.", ""
|
| 543 |
if not question: return "β οΈ Enter a question.", ""
|
| 544 |
|
| 545 |
+
# 1. RAG RETRIEVAL
|
| 546 |
results = self.vector_store.similarity_search_with_score(question, k=1)
|
| 547 |
+
if not results: return "No relevant text found.", ""
|
|
|
|
|
|
|
| 548 |
|
|
|
|
| 549 |
best_doc, score = results[0]
|
| 550 |
chunk_id = best_doc.metadata['id']
|
| 551 |
|
| 552 |
+
# 2. SMART CONTEXT MERGING (Fixes the "Double Text" bug)
|
| 553 |
+
# We manually overlap checking is hard, so we just grab the raw text range if possible
|
| 554 |
+
# Simple fix: Concatenate with a separator to break the loop
|
| 555 |
|
|
|
|
| 556 |
prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
|
|
|
|
|
|
|
| 557 |
curr_text = self.all_chunks[chunk_id]
|
|
|
|
|
|
|
| 558 |
next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
|
| 559 |
|
| 560 |
+
# [FIX] We use "..." to separate them clearly for the LLM
|
| 561 |
+
full_context = f"PREVIOUS: {prev_text}\n...\nFOCUS: {curr_text}\n...\nNEXT: {next_text}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
|
| 563 |
+
# 3. LLM EVALUATION
|
| 564 |
llm_feedback = "Please enter a student answer to grade."
|
| 565 |
if student_answer:
|
| 566 |
+
llm_feedback = self.llm.evaluate(full_context, question, student_answer)
|
| 567 |
+
|
| 568 |
+
# UI Display
|
| 569 |
+
evidence_display = f"### π― Best Match (Score: {score:.4f})\n"
|
| 570 |
+
evidence_display += f"> **PREVIOUS:** ...{prev_text[-300:]}\n\n"
|
| 571 |
+
evidence_display += f"> **CORE:** **{curr_text}**\n\n"
|
| 572 |
+
evidence_display += f"> **NEXT:** {next_text[:300]}...\n"
|
| 573 |
|
| 574 |
return evidence_display, llm_feedback
|
| 575 |
|
|
|
|
| 579 |
# --- GRADIO UI ---
|
| 580 |
with gr.Blocks(title="EduGenius AI Grader") as demo:
|
| 581 |
gr.Markdown("# π§ EduGenius: RAG + LLM Grading")
|
| 582 |
+
|
|
|
|
| 583 |
with gr.Row():
|
| 584 |
with gr.Column(scale=1):
|
| 585 |
pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")
|
|
|
|
| 592 |
run_btn = gr.Button("Retrieve & Grade", variant="secondary")
|
| 593 |
|
| 594 |
with gr.Row():
|
| 595 |
+
evidence_box = gr.Markdown(label="Context Used")
|
| 596 |
+
grade_box = gr.Markdown(label="LLM Result")
|
| 597 |
|
| 598 |
upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
|
| 599 |
run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])
|