heerjtdev commited on
Commit
48130d6
Β·
verified Β·
1 Parent(s): 76cfdea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -87
app.py CHANGED
@@ -450,89 +450,61 @@ class LLMEvaluator:
450
  self.local_dir = "onnx_llama_local"
451
 
452
  print(f"πŸ”„ Preparing LLM: {self.repo_id} (Int8 Quantized)...")
453
-
454
  print(f"πŸ“₯ Downloading Int8 model to {self.local_dir}...")
 
455
  snapshot_download(
456
  repo_id=self.repo_id,
457
  local_dir=self.local_dir,
458
  local_dir_use_symlinks=False,
459
  allow_patterns=[
460
- "config.json",
461
- "generation_config.json",
462
- "tokenizer*",
463
- "special_tokens_map.json",
464
- "*.jinja",
465
- "onnx/model_quantized.onnx"
466
  ]
467
  )
468
  print("βœ… Download complete.")
469
 
470
  self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
471
-
472
  self.model = ORTModelForCausalLM.from_pretrained(
473
  self.local_dir,
474
  subfolder="onnx",
475
- file_name="model_quantized.onnx",
476
  use_cache=True,
477
  use_io_binding=False
478
  )
479
 
480
  def evaluate(self, context, question, student_answer):
481
- # 3. STRICT PROMPT
482
- system_prompt = """You are a strict academic grader.
483
- Your goal is to check if the student's answer is supported by the context.
484
-
485
- RULES:
486
- 1. If the answer contradicts the context, score it 0-3.
487
- 2. If the answer describes things NOT in the text, mark it wrong.
488
- 3. Be direct. Do not repeat yourself."""
489
-
490
- user_prompt = f"""
491
- ### CONTEXT:
492
- {context}
493
-
494
- ### QUESTION:
495
- {question}
496
-
497
- ### STUDENT ANSWER:
498
- {student_answer}
499
-
500
- ### TASK:
501
- Grade the answer (0-10) and verify if it matches the text provided.
502
- """
503
-
504
  messages = [
505
- {"role": "system", "content": system_prompt},
506
- {"role": "user", "content": user_prompt}
 
 
 
 
 
 
 
 
 
 
 
507
  ]
508
 
509
- input_text = self.tokenizer.apply_chat_template(
510
- messages,
511
- tokenize=False,
512
- add_generation_prompt=True
513
- )
514
-
515
  inputs = self.tokenizer(input_text, return_tensors="pt")
516
 
517
- # 4. FIXED GENERATION PARAMETERS
518
  with torch.no_grad():
519
  outputs = self.model.generate(
520
  **inputs,
521
- max_new_tokens=200,
522
-
523
- # [CRITICAL FIXES]
524
- do_sample=False, # Greedy Search (Faster, more deterministic)
525
- repetition_penalty=1.2, # Kills the "####. ####." loops
526
- min_length=10, # Forces it to write at least something
527
-
528
- # Removed 'temperature' and 'top_p' because do_sample=False ignores them
529
  )
530
 
531
- response = self.tokenizer.decode(
532
- outputs[0][inputs.input_ids.shape[1]:],
533
- skip_special_tokens=True
534
- )
535
- return response
536
  # ---------------------------------------------------------
537
  # 3. Main Application Logic
538
  # ---------------------------------------------------------
@@ -560,7 +532,6 @@ class VectorSystem:
560
 
561
  if not self.all_chunks: return "File empty."
562
 
563
- # We store the ID to look up neighbors later
564
  metadatas = [{"id": i} for i in range(len(self.all_chunks))]
565
  self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
566
  return f"βœ… Indexed {len(self.all_chunks)} chunks."
@@ -571,47 +542,34 @@ class VectorSystem:
571
  if not self.vector_store: return "⚠️ Please upload a file first.", ""
572
  if not question: return "⚠️ Enter a question.", ""
573
 
574
- # 1. RAG RETRIEVAL - Get ONLY the Top 1 Best Match
575
  results = self.vector_store.similarity_search_with_score(question, k=1)
576
-
577
- if not results:
578
- return "No relevant text found.", ""
579
 
580
- # Get the ID of the best chunk
581
  best_doc, score = results[0]
582
  chunk_id = best_doc.metadata['id']
583
 
584
- # 2. CONTEXT EXPANSION (Neighboring Chunks)
585
- # We retrieve Preceding + Current + Succeeding to repair cut-off sentences.
 
586
 
587
- # Get Preceding Chunk (if not at start)
588
  prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
589
-
590
- # Get Current Chunk
591
  curr_text = self.all_chunks[chunk_id]
592
-
593
- # Get Succeeding Chunk (if not at end)
594
  next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
595
 
596
- # Join them into one solid block of text for the LLM
597
- context_text = f"{prev_text}\n\n{curr_text}\n\n{next_text}"
598
-
599
- # 3. UI DISPLAY
600
- # We format this nicely so the user knows what part is the "Core Match"
601
- evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
602
-
603
- if prev_text:
604
- evidence_display += f"> **PREVIOUS CONTEXT:**\n...{prev_text[-400:]}\n\n" # Show last 400 chars
605
-
606
- evidence_display += f"> **CORE MATCH:**\n**{curr_text}**\n\n"
607
-
608
- if next_text:
609
- evidence_display += f"> **NEXT CONTEXT:**\n{next_text[:400]}...\n" # Show first 400 chars
610
 
611
- # 4. LLM EVALUATION
612
  llm_feedback = "Please enter a student answer to grade."
613
  if student_answer:
614
- llm_feedback = self.llm.evaluate(context_text, question, student_answer)
 
 
 
 
 
 
615
 
616
  return evidence_display, llm_feedback
617
 
@@ -621,8 +579,7 @@ system = VectorSystem()
621
  # --- GRADIO UI ---
622
  with gr.Blocks(title="EduGenius AI Grader") as demo:
623
  gr.Markdown("# 🧠 EduGenius: RAG + LLM Grading")
624
- gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
625
-
626
  with gr.Row():
627
  with gr.Column(scale=1):
628
  pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")
@@ -635,8 +592,8 @@ with gr.Blocks(title="EduGenius AI Grader") as demo:
635
  run_btn = gr.Button("Retrieve & Grade", variant="secondary")
636
 
637
  with gr.Row():
638
- evidence_box = gr.Markdown(label="Context Used for Grading")
639
- grade_box = gr.Markdown(label="LLM Evaluation Result")
640
 
641
  upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
642
  run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])
 
450
  self.local_dir = "onnx_llama_local"
451
 
452
  print(f"πŸ”„ Preparing LLM: {self.repo_id} (Int8 Quantized)...")
 
453
  print(f"πŸ“₯ Downloading Int8 model to {self.local_dir}...")
454
+
455
  snapshot_download(
456
  repo_id=self.repo_id,
457
  local_dir=self.local_dir,
458
  local_dir_use_symlinks=False,
459
  allow_patterns=[
460
+ "config.json", "generation_config.json", "tokenizer*",
461
+ "special_tokens_map.json", "*.jinja", "onnx/model_quantized.onnx"
 
 
 
 
462
  ]
463
  )
464
  print("βœ… Download complete.")
465
 
466
  self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
 
467
  self.model = ORTModelForCausalLM.from_pretrained(
468
  self.local_dir,
469
  subfolder="onnx",
470
+ file_name="model_quantized.onnx",
471
  use_cache=True,
472
  use_io_binding=False
473
  )
474
 
475
  def evaluate(self, context, question, student_answer):
476
+ # SIMPLIFIED PROMPT (Easier for 1B model to follow)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
  messages = [
478
+ {"role": "system", "content": "You are a strict teacher. Grade the answer based ONLY on the text provided."},
479
+ {"role": "user", "content": f"""
480
+ SOURCE TEXT:
481
+ {context}
482
+
483
+ QUESTION: {question}
484
+ ANSWER: {student_answer}
485
+
486
+ TASK:
487
+ 1. Does the answer match the Source Text? (Yes/No)
488
+ 2. Score (0-10)
489
+ 3. Explanation (1 sentence)
490
+ """}
491
  ]
492
 
493
+ input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
 
 
 
494
  inputs = self.tokenizer(input_text, return_tensors="pt")
495
 
496
+ # [CRITICAL FIX] GENERATION SETTINGS
497
  with torch.no_grad():
498
  outputs = self.model.generate(
499
  **inputs,
500
+ max_new_tokens=150, # Keep it short
501
+ do_sample=False, # Greedy decoding (No randomness)
502
+ repetition_penalty=1.3, # STRONG penalty to kill loops like "The The The"
503
+ min_length=5 # Force it to start speaking
 
 
 
 
504
  )
505
 
506
+ return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
507
+
 
 
 
508
  # ---------------------------------------------------------
509
  # 3. Main Application Logic
510
  # ---------------------------------------------------------
 
532
 
533
  if not self.all_chunks: return "File empty."
534
 
 
535
  metadatas = [{"id": i} for i in range(len(self.all_chunks))]
536
  self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
537
  return f"βœ… Indexed {len(self.all_chunks)} chunks."
 
542
  if not self.vector_store: return "⚠️ Please upload a file first.", ""
543
  if not question: return "⚠️ Enter a question.", ""
544
 
545
+ # 1. RAG RETRIEVAL
546
  results = self.vector_store.similarity_search_with_score(question, k=1)
547
+ if not results: return "No relevant text found.", ""
 
 
548
 
 
549
  best_doc, score = results[0]
550
  chunk_id = best_doc.metadata['id']
551
 
552
+ # 2. SMART CONTEXT MERGING (Fixes the "Double Text" bug)
553
+ # We manually overlap checking is hard, so we just grab the raw text range if possible
554
+ # Simple fix: Concatenate with a separator to break the loop
555
 
 
556
  prev_text = self.all_chunks[chunk_id - 1] if chunk_id > 0 else ""
 
 
557
  curr_text = self.all_chunks[chunk_id]
 
 
558
  next_text = self.all_chunks[chunk_id + 1] if chunk_id < len(self.all_chunks) - 1 else ""
559
 
560
+ # [FIX] We use "..." to separate them clearly for the LLM
561
+ full_context = f"PREVIOUS: {prev_text}\n...\nFOCUS: {curr_text}\n...\nNEXT: {next_text}"
 
 
 
 
 
 
 
 
 
 
 
 
562
 
563
+ # 3. LLM EVALUATION
564
  llm_feedback = "Please enter a student answer to grade."
565
  if student_answer:
566
+ llm_feedback = self.llm.evaluate(full_context, question, student_answer)
567
+
568
+ # UI Display
569
+ evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
570
+ evidence_display += f"> **PREVIOUS:** ...{prev_text[-300:]}\n\n"
571
+ evidence_display += f"> **CORE:** **{curr_text}**\n\n"
572
+ evidence_display += f"> **NEXT:** {next_text[:300]}...\n"
573
 
574
  return evidence_display, llm_feedback
575
 
 
579
  # --- GRADIO UI ---
580
  with gr.Blocks(title="EduGenius AI Grader") as demo:
581
  gr.Markdown("# 🧠 EduGenius: RAG + LLM Grading")
582
+
 
583
  with gr.Row():
584
  with gr.Column(scale=1):
585
  pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")
 
592
  run_btn = gr.Button("Retrieve & Grade", variant="secondary")
593
 
594
  with gr.Row():
595
+ evidence_box = gr.Markdown(label="Context Used")
596
+ grade_box = gr.Markdown(label="LLM Result")
597
 
598
  upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
599
  run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])