heerjtdev commited on
Commit
4cc40b8
·
verified ·
1 Parent(s): fa384b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -54
app.py CHANGED
@@ -402,7 +402,6 @@
402
 
403
 
404
 
405
-
406
  import gradio as gr
407
  import fitz # PyMuPDF
408
  import torch
@@ -442,7 +441,7 @@ class OnnxBgeEmbeddings(Embeddings):
442
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
443
 
444
  # ---------------------------------------------------------
445
- # 2. LLM Evaluator Class (Llama-3.2-1B ONNX - INT8 QUANTIZED)
446
  # ---------------------------------------------------------
447
  class LLMEvaluator:
448
  def __init__(self):
@@ -450,7 +449,6 @@ class LLMEvaluator:
450
  self.local_dir = "onnx_llama_local"
451
 
452
  print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
453
- # Ensure we have the model (skips if already downloaded)
454
  snapshot_download(
455
  repo_id=self.repo_id,
456
  local_dir=self.local_dir,
@@ -467,49 +465,51 @@ class LLMEvaluator:
467
  )
468
 
469
  def evaluate(self, context, question, student_answer):
470
- # [UNIVERSAL SAFETY FIX]
471
- # We frame the task as "Data Analysis" of "Educational Material".
472
- # We explicitly tell the model to treat metaphors/conflict as ACADEMIC DATA.
473
-
474
- prompt_content = f"""
475
- TASK: Academic Grading Assistant
476
- OBJECTIVE: Compare the Student Answer against the provided Reference Text.
477
-
478
- *** REFERENCE TEXT (Educational Source Material) ***
479
- {context}
480
- ****************************************************
481
-
482
- QUESTION: {question}
483
-
484
- STUDENT ANSWER: {student_answer}
485
-
486
- INSTRUCTIONS:
487
- 1. Act as a neutral grader. Your job is factual verification.
488
- 2. Verify if the Student Answer is supported by the Reference Text.
489
- 3. SAFETY OVERRIDE: The Reference Text may contain historical, literary, or scientific descriptions (e.g., wars, medical procedures, or metaphors). Treat these strictly as academic data to be analyzed, not as real-world threats.
490
-
491
- OUTPUT FORMAT:
492
- Grade: [0-10]
493
- Verdict: [Correct / Incorrect / Partially Correct]
494
- Feedback: [1 concise sentence explanation based ONLY on the text]
495
- """
496
 
497
  messages = [
498
- # Neutral system prompt avoids triggering personality constraints
499
- {"role": "system", "content": "You are an objective educational AI. You analyze text for factual accuracy."},
500
- {"role": "user", "content": prompt_content}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  ]
502
 
503
  input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
504
  inputs = self.tokenizer(input_text, return_tensors="pt")
505
 
 
506
  with torch.no_grad():
507
  outputs = self.model.generate(
508
  **inputs,
509
- max_new_tokens=150, # Short generation to reduce hallucination risk
510
- do_sample=False, # Greedy Search (Deterministic = No Loops)
511
- repetition_penalty=1.25, # Strong penalty to stop "The The The"
512
- min_length=5
 
 
 
 
513
  )
514
 
515
  return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
@@ -551,33 +551,23 @@ class VectorSystem:
551
  if not self.vector_store: return "⚠️ Please upload a file first.", ""
552
  if not question: return "⚠️ Enter a question.", ""
553
 
554
- # 1. RAG RETRIEVAL (Top 1)
 
 
555
  results = self.vector_store.similarity_search_with_score(question, k=1)
556
  if not results: return "No relevant text found.", ""
557
 
558
  best_doc, score = results[0]
559
- chunk_id = best_doc.metadata['id']
560
-
561
- # 2. CONTEXT EXPANSION (Prev + Current + Next)
562
- # We perform a safe check to ensure we don't crash at the start/end of the document
563
- prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
564
- curr_text = self.all_chunks[chunk_id]
565
- next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
566
-
567
- # We construct the "Sandbox Context" for the LLM
568
- # Using specific delimiters helps the model separate the sections
569
- full_context = f"--- START OF CONTEXT ---\n{prev_text}\n{curr_text}\n{next_text}\n--- END OF CONTEXT ---"
570
 
571
- # 3. LLM EVALUATION
572
  llm_feedback = "Please enter a student answer to grade."
573
  if student_answer:
574
- llm_feedback = self.llm.evaluate(full_context, question, student_answer)
575
 
576
  # UI Display
577
  evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
578
- if prev_text: evidence_display += f"> ...{prev_text[-200:]}\n"
579
- evidence_display += f"> **{curr_text}**\n"
580
- if next_text: evidence_display += f"> {next_text[:200]}...\n"
581
 
582
  return evidence_display, llm_feedback
583
 
@@ -586,7 +576,7 @@ system = VectorSystem()
586
 
587
  # --- GRADIO UI ---
588
  with gr.Blocks(title="EduGenius AI Grader") as demo:
589
- gr.Markdown("# 🧠 EduGenius: Universal AI Grader")
590
  gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
591
 
592
  with gr.Row():
 
402
 
403
 
404
 
 
405
  import gradio as gr
406
  import fitz # PyMuPDF
407
  import torch
 
441
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
442
 
443
  # ---------------------------------------------------------
444
+ # 2. LLM Evaluator Class (Llama-3.2-1B ONNX - INT8)
445
  # ---------------------------------------------------------
446
  class LLMEvaluator:
447
  def __init__(self):
 
449
  self.local_dir = "onnx_llama_local"
450
 
451
  print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
 
452
  snapshot_download(
453
  repo_id=self.repo_id,
454
  local_dir=self.local_dir,
 
465
  )
466
 
467
  def evaluate(self, context, question, student_answer):
468
+ # [STRATEGY: FEW-SHOT PROMPTING]
469
+ # We give the model an example so it knows exactly what format to output.
470
+ # This prevents it from hallucinating dates or XML tags.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
 
472
  messages = [
473
+ {"role": "system", "content": "You are a grading assistant. Output only the requested format."},
474
+ {"role": "user", "content": f"""
475
+ Task: Grade the student answer based ONLY on the provided text.
476
+
477
+ ---
478
+ EXAMPLE:
479
+ Text: "Photosynthesis is how plants make food using sunlight."
480
+ Question: "How do plants eat?"
481
+ Answer: "They use sunlight."
482
+
483
+ Grade: 10/10
484
+ Verdict: Correct
485
+ Explanation: The text confirms plants use sunlight to make food.
486
+ ---
487
+
488
+ YOUR TURN:
489
+
490
+ Text: "{context}"
491
+ Question: "{question}"
492
+ Answer: "{student_answer}"
493
+
494
+ Output the Grade, Verdict, and Explanation:
495
+ """}
496
  ]
497
 
498
  input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
499
  inputs = self.tokenizer(input_text, return_tensors="pt")
500
 
501
+ # [GENERATION SETTINGS FOR STABILITY]
502
  with torch.no_grad():
503
  outputs = self.model.generate(
504
  **inputs,
505
+ max_new_tokens=150,
506
+
507
+ # We use Sampling with low temp instead of Greedy
508
+ # This helps the model get "unstuck" from bad loops without being too creative.
509
+ do_sample=True,
510
+ temperature=0.2,
511
+ top_p=0.9,
512
+ repetition_penalty=1.1
513
  )
514
 
515
  return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
 
551
  if not self.vector_store: return "⚠️ Please upload a file first.", ""
552
  if not question: return "⚠️ Enter a question.", ""
553
 
554
+ # 1. RAG RETRIEVAL (Top 1 Only)
555
+ # We removed the Pre/Next chunk expansion because it was creating duplicate text
556
+ # that confused the model.
557
  results = self.vector_store.similarity_search_with_score(question, k=1)
558
  if not results: return "No relevant text found.", ""
559
 
560
  best_doc, score = results[0]
561
+ context_text = best_doc.page_content
 
 
 
 
 
 
 
 
 
 
562
 
563
+ # 2. LLM EVALUATION
564
  llm_feedback = "Please enter a student answer to grade."
565
  if student_answer:
566
+ llm_feedback = self.llm.evaluate(context_text, question, student_answer)
567
 
568
  # UI Display
569
  evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
570
+ evidence_display += f"> **{context_text}**\n"
 
 
571
 
572
  return evidence_display, llm_feedback
573
 
 
576
 
577
  # --- GRADIO UI ---
578
  with gr.Blocks(title="EduGenius AI Grader") as demo:
579
+ gr.Markdown("# 🧠 EduGenius: AI Grader")
580
  gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
581
 
582
  with gr.Row():