heerjtdev commited on
Commit
d2523b4
Β·
verified Β·
1 Parent(s): 9aae92e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +732 -102
app.py CHANGED
@@ -288,6 +288,12 @@
288
 
289
 
290
 
 
 
 
 
 
 
291
 
292
 
293
 
@@ -634,6 +640,367 @@
634
 
635
 
636
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
637
 
638
 
639
 
@@ -648,6 +1015,8 @@ import fitz # PyMuPDF
648
  import torch
649
  import os
650
  import numpy as np
 
 
651
 
652
  # --- IMPORT SESSION OPTIONS ---
653
  from onnxruntime import SessionOptions, GraphOptimizationLevel
@@ -668,7 +1037,7 @@ PROVIDERS = ["CPUExecutionProvider"]
668
  print(f"⚑ Running on: {PROVIDERS}")
669
 
670
  # ---------------------------------------------------------
671
- # 1. OPTIMIZED EMBEDDINGS (BGE-SMALL)
672
  # ---------------------------------------------------------
673
  class OnnxBgeEmbeddings(Embeddings):
674
  def __init__(self):
@@ -697,11 +1066,54 @@ class OnnxBgeEmbeddings(Embeddings):
697
 
698
 
699
  # ---------------------------------------------------------
700
- # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
701
  # ---------------------------------------------------------
702
  class LLMEvaluator:
703
  def __init__(self):
704
- # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
705
  self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
706
  self.local_dir = "onnx_qwen_local"
707
 
@@ -731,53 +1143,72 @@ class LLMEvaluator:
731
  session_options=sess_options
732
  )
733
 
734
- def evaluate(self, context, question, student_answer, max_marks):
735
- # --- IMPROVED PROMPT STRATEGY ---
736
- system_prompt = f"""You are a strict Logic Validator. You are NOT a helpful assistant.
737
- Your job is to check if the Student Answer is FACTUALLY present in the Context.
738
-
739
- GRADING ALGORITHM:
740
- 1. IF the Student Answer mentions things NOT in the Context -> PENALTY (-50% of the marks).
741
- 2. IF the Student Answer interprets the text opposite to its meaning -> PENALTY (-100% of the marks).
742
- 3. IF the Student Answer is generic fluff -> SCORE: 0.
743
-
744
- --- EXAMPLE 1 (HALLUCINATION) ---
745
- Context: The sky is blue due to Rayleigh scattering.
746
- Question: Why is the sky blue?
747
- Student Answer: Because the ocean reflects the water into the sky.
748
- Analysis: The Context mentions 'Rayleigh scattering'. The student mentions 'ocean reflection'. These are different. The student is hallucinating outside facts.
749
- Score: 0/{max_marks}
750
-
751
- --- EXAMPLE 2 (CONTRADICTION) ---
752
- Context: One must efface one's own personality. Good prose is like a windowpane.
753
- Question: What does the author mean?
754
- Student Answer: It means we should see the author's personality clearly.
755
- Analysis: The text says 'efface' (remove) personality. The student says 'see' personality. This is a direct contradiction.
756
- Score: 0/{max_marks}
757
-
758
- --- EXAMPLE 3 (CORRECT) ---
759
- Context: Mitochondria is the powerhouse of the cell.
760
- Question: What is mitochondria?
761
- Student Answer: It is the cell's powerhouse.
762
- Analysis: Matches the text meaning exactly.
763
- Score: {max_marks}/{max_marks}
764
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
 
766
  user_prompt = f"""
767
- --- YOUR TASK ---
768
- Context:
769
- {context}
770
 
771
- Question:
772
- {question}
773
 
774
- Student Answer:
775
- {student_answer}
776
 
777
- OUTPUT FORMAT:
778
- Analysis: [Compare Student Answer vs Context. List any hallucinations or contradictions.]
779
- Score: [X]/{max_marks}
780
- """
781
 
782
  messages = [
783
  {"role": "system", "content": system_prompt},
@@ -787,29 +1218,129 @@ class LLMEvaluator:
787
  input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
788
  inputs = self.tokenizer(input_text, return_tensors="pt")
789
 
790
- # Lower temperature for strictness
791
  with torch.no_grad():
792
  outputs = self.model.generate(
793
  **inputs,
794
- max_new_tokens=150,
795
- temperature=0.1, # Strict logic, no creativity
796
- top_p=0.2, # Cut off unlikely tokens
797
  do_sample=True,
798
- repetition_penalty=1.2 # Penalize repetition
799
  )
800
 
801
  input_length = inputs['input_ids'].shape[1]
802
  response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
803
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
804
 
805
 
806
  # ---------------------------------------------------------
807
- # 3. NEW: ONNX RERANKER (Cross-Encoder)
808
- # Uses existing 'optimum' & 'transformers' libs (No new deps)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  # ---------------------------------------------------------
810
  class OnnxReranker:
811
  def __init__(self):
812
- # TinyBERT is ~17MB and very fast on CPU
813
  self.model_name = "Xenova/ms-marco-TinyBERT-L-2-v2"
814
  print(f"πŸ”„ Loading Reranker: {self.model_name}...")
815
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
@@ -823,7 +1354,6 @@ class OnnxReranker:
823
  if not docs:
824
  return []
825
 
826
- # Prepare pairs: [query, doc_text]
827
  pairs = [[query, doc.page_content] for doc in docs]
828
 
829
  inputs = self.tokenizer(
@@ -837,34 +1367,33 @@ class OnnxReranker:
837
  with torch.no_grad():
838
  outputs = self.model(**inputs)
839
 
840
- # Get logits (Relevance scores)
841
- # MS-Marco models typically output a single logit or [irrelevant, relevant]
842
  logits = outputs.logits
843
  if logits.shape[1] == 2:
844
- scores = logits[:, 1] # Take the "relevant" class score
845
  else:
846
  scores = logits.flatten()
847
 
848
- # Sort docs by score (descending)
849
  scores = scores.numpy().tolist()
850
  doc_score_pairs = list(zip(docs, scores))
851
  doc_score_pairs.sort(key=lambda x: x[1], reverse=True)
852
 
853
- # Return top K docs
854
  return [doc for doc, score in doc_score_pairs[:top_k]]
855
 
856
 
857
  # ---------------------------------------------------------
858
- # 4. Main Application Logic
859
  # ---------------------------------------------------------
860
- class VectorSystem:
861
  def __init__(self):
862
  self.vector_store = None
863
  self.embeddings = OnnxBgeEmbeddings()
864
  self.llm = LLMEvaluator()
865
- self.reranker = OnnxReranker() # Initialize Reranker
866
- self.all_chunks = []
 
 
867
  self.total_chunks = 0
 
868
 
869
  def process_content(self, file_obj, raw_text):
870
  has_file = file_obj is not None
@@ -881,87 +1410,188 @@ class VectorSystem:
881
  if has_file:
882
  if file_obj.name.endswith('.pdf'):
883
  doc = fitz.open(file_obj.name)
884
- for page in doc: text += page.get_text()
 
885
  elif file_obj.name.endswith('.txt'):
886
- with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
 
887
  else:
888
  return "❌ Error: Only .pdf and .txt supported."
889
  else:
890
  text = raw_text
891
 
892
- # Smaller chunks for Reranking precision (500 chars)
893
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
894
  texts = text_splitter.split_text(text)
895
- self.all_chunks = texts # Keep plain text list for reference
896
 
897
- # Create Document objects with metadata
898
  docs = [Document(page_content=t, metadata={"id": i}) for i, t in enumerate(texts)]
899
  self.total_chunks = len(docs)
900
 
901
- if not docs: return "Content empty."
 
902
 
903
  self.vector_store = FAISS.from_documents(docs, self.embeddings)
904
 
905
- return f"βœ… Indexed {self.total_chunks} chunks."
906
  except Exception as e:
907
  return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
908
 
909
- def process_query(self, question, student_answer, max_marks):
910
- if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
911
- if not question: return "⚠️ Enter a question.", ""
 
 
 
 
 
912
 
913
- # Step A: Wide Net Retrieval (Get top 15 candidates)
914
- # We fetch more than we need to ensure the answer is in the candidate pool
915
- initial_docs = self.vector_store.similarity_search(question, k=15)
916
 
917
- # Step B: Rerank (Get top 3 best matches)
918
- # The Cross-Encoder strictly judges relevance
 
 
 
919
  top_docs = self.reranker.rank(question, initial_docs, top_k=3)
920
-
921
- # Step C: Construct Context
922
- # We merge the top 3 specific chunks
923
  expanded_context = "\n\n---\n\n".join([d.page_content for d in top_docs])
924
 
925
- evidence_display = f"### πŸ“š Optimized Context (Top {len(top_docs)} chunks after Reranking):\n"
926
- evidence_display += f"> {expanded_context} ..."
927
 
928
- llm_feedback = "Please enter a student answer to grade."
929
- if student_answer:
930
- llm_feedback = self.llm.evaluate(expanded_context, question, student_answer, max_marks)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
931
 
932
  return evidence_display, llm_feedback
933
 
934
- system = VectorSystem()
935
 
936
- with gr.Blocks(title="EduGenius AI Grader") as demo:
937
- gr.Markdown("# ⚑ EduGenius: CPU Optimized RAG")
938
- gr.Markdown("Powered by **Qwen-2.5-0.5B**, **BGE-Small** & **TinyBERT Reranker**")
 
 
 
 
 
 
939
 
940
  with gr.Row():
941
  with gr.Column(scale=1):
942
- gr.Markdown("### Source Input (Choose One)")
943
- pdf_input = gr.File(label="Option A: Upload Chapter (PDF/TXT)")
944
  gr.Markdown("**OR**")
945
- text_input = gr.Textbox(label="Option B: Paste Context", placeholder="Paste text here if you don't have a file...", lines=5)
946
 
947
- upload_btn = gr.Button("Index Content", variant="primary")
948
  status_msg = gr.Textbox(label="Status", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
949
 
950
  with gr.Column(scale=2):
 
 
951
  with gr.Row():
952
  q_input = gr.Textbox(label="Question", scale=2)
953
  max_marks = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max Marks")
954
 
955
- a_input = gr.TextArea(label="Student Answer")
956
- run_btn = gr.Button("Retrieve & Grade", variant="secondary")
957
 
958
  with gr.Row():
959
- evidence_box = gr.Markdown(label="Context Used")
960
- grade_box = gr.Markdown(label="Grading Result")
961
-
962
- # Pass both inputs to the process_content function
963
- upload_btn.click(system.process_content, inputs=[pdf_input, text_input], outputs=[status_msg])
964
- run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
965
 
966
  if __name__ == "__main__":
967
  demo.launch()
 
288
 
289
 
290
 
291
+
292
+
293
+
294
+
295
+
296
+
297
 
298
 
299
 
 
640
 
641
 
642
 
643
+
644
+
645
+
646
+
647
+
648
+
649
+
650
+
651
+
652
+
653
+
654
+
655
+
656
+
657
+
658
+
659
+
660
+
661
+
662
+ # import gradio as gr
663
+ # import fitz # PyMuPDF
664
+ # import torch
665
+ # import os
666
+ # import numpy as np
667
+
668
+ # # --- IMPORT SESSION OPTIONS ---
669
+ # from onnxruntime import SessionOptions, GraphOptimizationLevel
670
+
671
+ # # --- LANGCHAIN & RAG IMPORTS ---
672
+ # from langchain_text_splitters import RecursiveCharacterTextSplitter
673
+ # from langchain_community.vectorstores import FAISS
674
+ # from langchain_core.embeddings import Embeddings
675
+ # from langchain_core.documents import Document
676
+
677
+ # # --- ONNX & MODEL IMPORTS ---
678
+ # from transformers import AutoTokenizer
679
+ # from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM, ORTModelForSequenceClassification
680
+ # from huggingface_hub import snapshot_download
681
+
682
+ # # Force CPU Provider
683
+ # PROVIDERS = ["CPUExecutionProvider"]
684
+ # print(f"⚑ Running on: {PROVIDERS}")
685
+
686
+ # # ---------------------------------------------------------
687
+ # # 1. OPTIMIZED EMBEDDINGS (BGE-SMALL)
688
+ # # ---------------------------------------------------------
689
+ # class OnnxBgeEmbeddings(Embeddings):
690
+ # def __init__(self):
691
+ # model_name = "Xenova/bge-small-en-v1.5"
692
+ # print(f"πŸ”„ Loading Embeddings: {model_name}...")
693
+ # self.tokenizer = AutoTokenizer.from_pretrained(model_name)
694
+ # self.model = ORTModelForFeatureExtraction.from_pretrained(
695
+ # model_name,
696
+ # export=False,
697
+ # provider=PROVIDERS[0]
698
+ # )
699
+
700
+ # def _process_batch(self, texts):
701
+ # inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
702
+ # with torch.no_grad():
703
+ # outputs = self.model(**inputs)
704
+ # embeddings = outputs.last_hidden_state[:, 0]
705
+ # embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
706
+ # return embeddings.numpy().tolist()
707
+
708
+ # def embed_documents(self, texts):
709
+ # return self._process_batch(texts)
710
+
711
+ # def embed_query(self, text):
712
+ # return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
713
+
714
+
715
+ # # ---------------------------------------------------------
716
+ # # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
717
+ # # ---------------------------------------------------------
718
+ # class LLMEvaluator:
719
+ # def __init__(self):
720
+ # # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
721
+ # self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
722
+ # self.local_dir = "onnx_qwen_local"
723
+
724
+ # print(f"πŸ”„ Preparing CPU LLM: {self.repo_id}...")
725
+
726
+ # if not os.path.exists(self.local_dir):
727
+ # print(f"πŸ“₯ Downloading FP16 model to {self.local_dir}...")
728
+ # snapshot_download(
729
+ # repo_id=self.repo_id,
730
+ # local_dir=self.local_dir,
731
+ # allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
732
+ # )
733
+ # print("βœ… Download complete.")
734
+
735
+ # self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
736
+
737
+ # sess_options = SessionOptions()
738
+ # sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
739
+
740
+ # self.model = ORTModelForCausalLM.from_pretrained(
741
+ # self.local_dir,
742
+ # subfolder="onnx",
743
+ # file_name="model_fp16.onnx",
744
+ # use_cache=True,
745
+ # use_io_binding=False,
746
+ # provider=PROVIDERS[0],
747
+ # session_options=sess_options
748
+ # )
749
+
750
+ # def evaluate(self, context, question, student_answer, max_marks):
751
+ # # --- IMPROVED PROMPT STRATEGY ---
752
+ # system_prompt = f"""You are a strict Logic Validator. You are NOT a helpful assistant.
753
+ # Your job is to check if the Student Answer is FACTUALLY present in the Context.
754
+
755
+ # GRADING ALGORITHM:
756
+ # 1. IF the Student Answer mentions things NOT in the Context -> PENALTY (-50% of the marks).
757
+ # 2. IF the Student Answer interprets the text opposite to its meaning -> PENALTY (-100% of the marks).
758
+ # 3. IF the Student Answer is generic fluff -> SCORE: 0.
759
+
760
+ # --- EXAMPLE 1 (HALLUCINATION) ---
761
+ # Context: The sky is blue due to Rayleigh scattering.
762
+ # Question: Why is the sky blue?
763
+ # Student Answer: Because the ocean reflects the water into the sky.
764
+ # Analysis: The Context mentions 'Rayleigh scattering'. The student mentions 'ocean reflection'. These are different. The student is hallucinating outside facts.
765
+ # Score: 0/{max_marks}
766
+
767
+ # --- EXAMPLE 2 (CONTRADICTION) ---
768
+ # Context: One must efface one's own personality. Good prose is like a windowpane.
769
+ # Question: What does the author mean?
770
+ # Student Answer: It means we should see the author's personality clearly.
771
+ # Analysis: The text says 'efface' (remove) personality. The student says 'see' personality. This is a direct contradiction.
772
+ # Score: 0/{max_marks}
773
+
774
+ # --- EXAMPLE 3 (CORRECT) ---
775
+ # Context: Mitochondria is the powerhouse of the cell.
776
+ # Question: What is mitochondria?
777
+ # Student Answer: It is the cell's powerhouse.
778
+ # Analysis: Matches the text meaning exactly.
779
+ # Score: {max_marks}/{max_marks}
780
+ # """
781
+
782
+ # user_prompt = f"""
783
+ # --- YOUR TASK ---
784
+ # Context:
785
+ # {context}
786
+
787
+ # Question:
788
+ # {question}
789
+
790
+ # Student Answer:
791
+ # {student_answer}
792
+
793
+ # OUTPUT FORMAT:
794
+ # Analysis: [Compare Student Answer vs Context. List any hallucinations or contradictions.]
795
+ # Score: [X]/{max_marks}
796
+ # """
797
+
798
+ # messages = [
799
+ # {"role": "system", "content": system_prompt},
800
+ # {"role": "user", "content": user_prompt}
801
+ # ]
802
+
803
+ # input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
804
+ # inputs = self.tokenizer(input_text, return_tensors="pt")
805
+
806
+ # # Lower temperature for strictness
807
+ # with torch.no_grad():
808
+ # outputs = self.model.generate(
809
+ # **inputs,
810
+ # max_new_tokens=150,
811
+ # temperature=0.1, # Strict logic, no creativity
812
+ # top_p=0.2, # Cut off unlikely tokens
813
+ # do_sample=True,
814
+ # repetition_penalty=1.2 # Penalize repetition
815
+ # )
816
+
817
+ # input_length = inputs['input_ids'].shape[1]
818
+ # response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
819
+ # return response
820
+
821
+
822
+ # # ---------------------------------------------------------
823
+ # # 3. NEW: ONNX RERANKER (Cross-Encoder)
824
+ # # Uses existing 'optimum' & 'transformers' libs (No new deps)
825
+ # # ---------------------------------------------------------
826
+ # class OnnxReranker:
827
+ # def __init__(self):
828
+ # # TinyBERT is ~17MB and very fast on CPU
829
+ # self.model_name = "Xenova/ms-marco-TinyBERT-L-2-v2"
830
+ # print(f"πŸ”„ Loading Reranker: {self.model_name}...")
831
+ # self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
832
+ # self.model = ORTModelForSequenceClassification.from_pretrained(
833
+ # self.model_name,
834
+ # export=False,
835
+ # provider=PROVIDERS[0]
836
+ # )
837
+
838
+ # def rank(self, query, docs, top_k=3):
839
+ # if not docs:
840
+ # return []
841
+
842
+ # # Prepare pairs: [query, doc_text]
843
+ # pairs = [[query, doc.page_content] for doc in docs]
844
+
845
+ # inputs = self.tokenizer(
846
+ # pairs,
847
+ # padding=True,
848
+ # truncation=True,
849
+ # max_length=512,
850
+ # return_tensors="pt"
851
+ # )
852
+
853
+ # with torch.no_grad():
854
+ # outputs = self.model(**inputs)
855
+
856
+ # # Get logits (Relevance scores)
857
+ # # MS-Marco models typically output a single logit or [irrelevant, relevant]
858
+ # logits = outputs.logits
859
+ # if logits.shape[1] == 2:
860
+ # scores = logits[:, 1] # Take the "relevant" class score
861
+ # else:
862
+ # scores = logits.flatten()
863
+
864
+ # # Sort docs by score (descending)
865
+ # scores = scores.numpy().tolist()
866
+ # doc_score_pairs = list(zip(docs, scores))
867
+ # doc_score_pairs.sort(key=lambda x: x[1], reverse=True)
868
+
869
+ # # Return top K docs
870
+ # return [doc for doc, score in doc_score_pairs[:top_k]]
871
+
872
+
873
+ # # ---------------------------------------------------------
874
+ # # 4. Main Application Logic
875
+ # # ---------------------------------------------------------
876
+ # class VectorSystem:
877
+ # def __init__(self):
878
+ # self.vector_store = None
879
+ # self.embeddings = OnnxBgeEmbeddings()
880
+ # self.llm = LLMEvaluator()
881
+ # self.reranker = OnnxReranker() # Initialize Reranker
882
+ # self.all_chunks = []
883
+ # self.total_chunks = 0
884
+
885
+ # def process_content(self, file_obj, raw_text):
886
+ # has_file = file_obj is not None
887
+ # has_text = raw_text is not None and len(raw_text.strip()) > 0
888
+
889
+ # if has_file and has_text:
890
+ # return "❌ Error: Please provide EITHER a file OR paste text, not both at the same time."
891
+
892
+ # if not has_file and not has_text:
893
+ # return "⚠️ No content provided. Please upload a file or paste text."
894
+
895
+ # try:
896
+ # text = ""
897
+ # if has_file:
898
+ # if file_obj.name.endswith('.pdf'):
899
+ # doc = fitz.open(file_obj.name)
900
+ # for page in doc: text += page.get_text()
901
+ # elif file_obj.name.endswith('.txt'):
902
+ # with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
903
+ # else:
904
+ # return "❌ Error: Only .pdf and .txt supported."
905
+ # else:
906
+ # text = raw_text
907
+
908
+ # # Smaller chunks for Reranking precision (500 chars)
909
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
910
+ # texts = text_splitter.split_text(text)
911
+ # self.all_chunks = texts # Keep plain text list for reference
912
+
913
+ # # Create Document objects with metadata
914
+ # docs = [Document(page_content=t, metadata={"id": i}) for i, t in enumerate(texts)]
915
+ # self.total_chunks = len(docs)
916
+
917
+ # if not docs: return "Content empty."
918
+
919
+ # self.vector_store = FAISS.from_documents(docs, self.embeddings)
920
+
921
+ # return f"βœ… Indexed {self.total_chunks} chunks."
922
+ # except Exception as e:
923
+ # return f"Error: {str(e)}"
924
+
925
+ # def process_query(self, question, student_answer, max_marks):
926
+ # if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
927
+ # if not question: return "⚠️ Enter a question.", ""
928
+
929
+ # # Step A: Wide Net Retrieval (Get top 15 candidates)
930
+ # # We fetch more than we need to ensure the answer is in the candidate pool
931
+ # initial_docs = self.vector_store.similarity_search(question, k=15)
932
+
933
+ # # Step B: Rerank (Get top 3 best matches)
934
+ # # The Cross-Encoder strictly judges relevance
935
+ # top_docs = self.reranker.rank(question, initial_docs, top_k=3)
936
+
937
+ # # Step C: Construct Context
938
+ # # We merge the top 3 specific chunks
939
+ # expanded_context = "\n\n---\n\n".join([d.page_content for d in top_docs])
940
+
941
+ # evidence_display = f"### πŸ“š Optimized Context (Top {len(top_docs)} chunks after Reranking):\n"
942
+ # evidence_display += f"> {expanded_context} ..."
943
+
944
+ # llm_feedback = "Please enter a student answer to grade."
945
+ # if student_answer:
946
+ # llm_feedback = self.llm.evaluate(expanded_context, question, student_answer, max_marks)
947
+
948
+ # return evidence_display, llm_feedback
949
+
950
+ # system = VectorSystem()
951
+
952
+ # with gr.Blocks(title="EduGenius AI Grader") as demo:
953
+ # gr.Markdown("# ⚑ EduGenius: CPU Optimized RAG")
954
+ # gr.Markdown("Powered by **Qwen-2.5-0.5B**, **BGE-Small** & **TinyBERT Reranker**")
955
+
956
+ # with gr.Row():
957
+ # with gr.Column(scale=1):
958
+ # gr.Markdown("### Source Input (Choose One)")
959
+ # pdf_input = gr.File(label="Option A: Upload Chapter (PDF/TXT)")
960
+ # gr.Markdown("**OR**")
961
+ # text_input = gr.Textbox(label="Option B: Paste Context", placeholder="Paste text here if you don't have a file...", lines=5)
962
+
963
+ # upload_btn = gr.Button("Index Content", variant="primary")
964
+ # status_msg = gr.Textbox(label="Status", interactive=False)
965
+
966
+ # with gr.Column(scale=2):
967
+ # with gr.Row():
968
+ # q_input = gr.Textbox(label="Question", scale=2)
969
+ # max_marks = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max Marks")
970
+
971
+ # a_input = gr.TextArea(label="Student Answer")
972
+ # run_btn = gr.Button("Retrieve & Grade", variant="secondary")
973
+
974
+ # with gr.Row():
975
+ # evidence_box = gr.Markdown(label="Context Used")
976
+ # grade_box = gr.Markdown(label="Grading Result")
977
+
978
+ # # Pass both inputs to the process_content function
979
+ # upload_btn.click(system.process_content, inputs=[pdf_input, text_input], outputs=[status_msg])
980
+ # run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])
981
+
982
+ # if __name__ == "__main__":
983
+ # demo.launch()
984
+
985
+
986
+
987
+
988
+
989
+
990
+
991
+
992
+
993
+
994
+
995
+
996
+
997
+
998
+
999
+
1000
+
1001
+
1002
+
1003
+
1004
 
1005
 
1006
 
 
1015
  import torch
1016
  import os
1017
  import numpy as np
1018
+ import re
1019
+ from typing import List, Dict, Tuple, Optional
1020
 
1021
  # --- IMPORT SESSION OPTIONS ---
1022
  from onnxruntime import SessionOptions, GraphOptimizationLevel
 
1037
  print(f"⚑ Running on: {PROVIDERS}")
1038
 
1039
  # ---------------------------------------------------------
1040
+ # 1. OPTIMIZED EMBEDDINGS (BGE-SMALL) - UNCHANGED
1041
  # ---------------------------------------------------------
1042
  class OnnxBgeEmbeddings(Embeddings):
1043
  def __init__(self):
 
1066
 
1067
 
1068
  # ---------------------------------------------------------
1069
+ # 2. NEW: ANSWER PRESENCE CHECKER
1070
+ # Paper insight: Prevent grading blank/missing answers
1071
+ # ---------------------------------------------------------
1072
+ class AnswerPresenceChecker:
1073
+ """Checks if a student answer actually exists and contains substance."""
1074
+
1075
+ def __init__(self):
1076
+ self.min_length = 10 # Minimum characters for valid answer
1077
+ self.min_words = 3 # Minimum words for valid answer
1078
+
1079
+ def check_presence(self, student_answer: str) -> Tuple[bool, str]:
1080
+ """
1081
+ Returns: (is_present, reason)
1082
+ """
1083
+ if not student_answer or len(student_answer.strip()) == 0:
1084
+ return False, "Answer is empty"
1085
+
1086
+ answer = student_answer.strip()
1087
+
1088
+ # Check minimum length
1089
+ if len(answer) < self.min_length:
1090
+ return False, f"Answer too short ({len(answer)} chars, need {self.min_length})"
1091
+
1092
+ # Check minimum word count
1093
+ words = answer.split()
1094
+ if len(words) < self.min_words:
1095
+ return False, f"Answer too brief ({len(words)} words, need {self.min_words})"
1096
+
1097
+ # Check for placeholder text
1098
+ placeholder_patterns = [
1099
+ r'^[.\s]*$', # Only dots/spaces
1100
+ r'^[?]+$', # Only question marks
1101
+ r'^(n/?a|na|idk|dunno)\s*$', # Common non-answers
1102
+ ]
1103
+
1104
+ for pattern in placeholder_patterns:
1105
+ if re.match(pattern, answer.lower()):
1106
+ return False, "Answer appears to be placeholder text"
1107
+
1108
+ return True, "Answer present and valid"
1109
+
1110
+
1111
+ # ---------------------------------------------------------
1112
+ # 3. ENHANCED LLM EVALUATOR WITH ENSEMBLE SUPPORT
1113
+ # Paper insights: Structured prompting, reference grounding, ensemble grading
1114
  # ---------------------------------------------------------
1115
  class LLMEvaluator:
1116
  def __init__(self):
 
1117
  self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
1118
  self.local_dir = "onnx_qwen_local"
1119
 
 
1143
  session_options=sess_options
1144
  )
1145
 
1146
+ def evaluate_single(self, context: str, question: str, student_answer: str,
1147
+ max_marks: int, grader_id: int = 1,
1148
+ reference_summary: Optional[str] = None) -> Dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1149
  """
1150
+ Single grader evaluation with structured output.
1151
+ Paper insight: Use rigid templates with deterministic validation.
1152
+
1153
+ Returns structured dict with:
1154
+ - analysis: str
1155
+ - score: int
1156
+ - raw_response: str
1157
+ """
1158
+
1159
+ # Enhanced system prompt with reference grounding
1160
+ system_prompt = f"""You are Grader #{grader_id}, a strict Logic Validator for educational assessment.
1161
+
1162
+ YOUR GRADING ALGORITHM:
1163
+ 1. Compare Student Answer ONLY against the provided Context
1164
+ 2. IF Student Answer mentions facts NOT in Context β†’ PENALTY (-50% of marks)
1165
+ 3. IF Student Answer contradicts the Context β†’ PENALTY (-100% of marks)
1166
+ 4. IF Student Answer is vague/generic without specific facts β†’ SCORE: 0-20%
1167
+ 5. IF Student Answer accurately reflects Context β†’ SCORE: 80-100%
1168
+
1169
+ CRITICAL RULES:
1170
+ [R1] Grade ONLY based on Context provided, not general knowledge
1171
+ [R2] Penalize hallucinations (facts not in Context) heavily
1172
+ [R3] Penalize contradictions (opposite meaning) completely
1173
+ [R4] Reward specific, accurate paraphrasing from Context
1174
+ [R5] Partial credit for partially correct answers
1175
+
1176
+ OUTPUT FORMAT (MANDATORY):
1177
+ You MUST output in this exact format:
1178
+
1179
+ ## Analysis
1180
+ [Your detailed comparison of Student Answer vs Context]
1181
+
1182
+ ## Score
1183
+ [X]/{max_marks}
1184
+
1185
+ Do NOT deviate from this format."""
1186
+
1187
+ # Add reference summary if provided (paper's key insight)
1188
+ reference_section = ""
1189
+ if reference_summary:
1190
+ reference_section = f"""
1191
+
1192
+ ### REFERENCE SOLUTION (Perfect Answer Example):
1193
+ {reference_summary}
1194
+
1195
+ Use this as calibration for what a 100% answer looks like."""
1196
 
1197
  user_prompt = f"""
1198
+ ### Context (Retrieved from Source):
1199
+ {context}
1200
+ {reference_section}
1201
 
1202
+ ### Question:
1203
+ {question}
1204
 
1205
+ ### Student Answer:
1206
+ {student_answer}
1207
 
1208
+ ### Maximum Marks: {max_marks}
1209
+
1210
+ Provide your grading following the mandatory output format.
1211
+ """
1212
 
1213
  messages = [
1214
  {"role": "system", "content": system_prompt},
 
1218
  input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
1219
  inputs = self.tokenizer(input_text, return_tensors="pt")
1220
 
1221
+ # Strict sampling for consistency
1222
  with torch.no_grad():
1223
  outputs = self.model.generate(
1224
  **inputs,
1225
+ max_new_tokens=200, # Increased for structured output
1226
+ temperature=0.1, # Very strict
1227
+ top_p=0.2,
1228
  do_sample=True,
1229
+ repetition_penalty=1.2
1230
  )
1231
 
1232
  input_length = inputs['input_ids'].shape[1]
1233
  response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
1234
+
1235
+ # Parse structured output
1236
+ analysis, score = self._parse_response(response, max_marks)
1237
+
1238
+ return {
1239
+ "grader_id": grader_id,
1240
+ "analysis": analysis,
1241
+ "score": score,
1242
+ "raw_response": response
1243
+ }
1244
+
1245
+ def _parse_response(self, response: str, max_marks: int) -> Tuple[str, int]:
1246
+ """
1247
+ Parse structured response to extract analysis and score.
1248
+ Paper insight: Deterministic parsing of rigid templates.
1249
+ """
1250
+ # Extract score using regex
1251
+ score_pattern = r'##\s*Score\s*\n\s*\[?(\d+)\]?/\d+'
1252
+ score_match = re.search(score_pattern, response, re.IGNORECASE)
1253
+
1254
+ if score_match:
1255
+ score = int(score_match.group(1))
1256
+ score = min(score, max_marks) # Cap at max
1257
+ else:
1258
+ # Fallback: look for any number/max pattern
1259
+ fallback_pattern = r'(\d+)\s*/\s*\d+'
1260
+ fallback_match = re.search(fallback_pattern, response)
1261
+ if fallback_match:
1262
+ score = min(int(fallback_match.group(1)), max_marks)
1263
+ else:
1264
+ score = 0 # Default if parsing fails
1265
+
1266
+ # Extract analysis
1267
+ analysis_pattern = r'##\s*Analysis\s*\n(.*?)(?=##\s*Score|$)'
1268
+ analysis_match = re.search(analysis_pattern, response, re.DOTALL | re.IGNORECASE)
1269
+
1270
+ if analysis_match:
1271
+ analysis = analysis_match.group(1).strip()
1272
+ else:
1273
+ # Fallback: use everything before score section
1274
+ analysis = response.split('##')[0].strip() if '##' in response else response
1275
+
1276
+ return analysis, score
1277
 
1278
 
1279
  # ---------------------------------------------------------
1280
+ # 4. NEW: SUPERVISOR AGGREGATOR
1281
+ # Paper insight: Merge ensemble outputs into final decision
1282
+ # ---------------------------------------------------------
1283
+ class SupervisorAggregator:
1284
+ """
1285
+ Aggregates multiple grader outputs into a final consensus grade.
1286
+ Paper uses another LLM call; we use statistical aggregation for CPU efficiency.
1287
+ """
1288
+
1289
+ def aggregate(self, grader_results: List[Dict], max_marks: int) -> Dict:
1290
+ """
1291
+ Aggregate K=3 grader results into final score.
1292
+
1293
+ Returns:
1294
+ - final_score: int (median of ensemble)
1295
+ - disagreement: int (max - min score)
1296
+ - needs_review: bool (high disagreement flag)
1297
+ - consensus_analysis: str
1298
+ """
1299
+ scores = [r['score'] for r in grader_results]
1300
+
1301
+ # Use median for robustness (paper uses supervisor LLM call)
1302
+ final_score = int(np.median(scores))
1303
+
1304
+ # Calculate disagreement
1305
+ disagreement = max(scores) - min(scores)
1306
+
1307
+ # Flag for manual review if disagreement too high
1308
+ # Paper uses Dmax thresholds; we use 40% of max marks
1309
+ needs_review = disagreement >= (0.4 * max_marks)
1310
+
1311
+ # Merge analyses
1312
+ consensus_analysis = self._merge_analyses(grader_results, final_score, disagreement)
1313
+
1314
+ return {
1315
+ "final_score": final_score,
1316
+ "individual_scores": scores,
1317
+ "disagreement": disagreement,
1318
+ "needs_review": needs_review,
1319
+ "consensus_analysis": consensus_analysis,
1320
+ "grader_details": grader_results
1321
+ }
1322
+
1323
+ def _merge_analyses(self, results: List[Dict], final_score: int, disagreement: int) -> str:
1324
+ """Create consensus analysis from multiple graders."""
1325
+
1326
+ output = f"**Ensemble Grading Results** (Final: {final_score}, Disagreement: Β±{disagreement})\n\n"
1327
+
1328
+ for i, result in enumerate(results, 1):
1329
+ output += f"**Grader {i} ({result['score']} points):**\n{result['analysis']}\n\n"
1330
+
1331
+ if disagreement > 0:
1332
+ output += f"\n⚠️ **Note:** Graders disagreed by {disagreement} points. "
1333
+ if disagreement >= 5:
1334
+ output += "Consider manual review."
1335
+
1336
+ return output
1337
+
1338
+
1339
+ # ---------------------------------------------------------
1340
+ # 5. ONNX RERANKER - UNCHANGED
1341
  # ---------------------------------------------------------
1342
  class OnnxReranker:
1343
  def __init__(self):
 
1344
  self.model_name = "Xenova/ms-marco-TinyBERT-L-2-v2"
1345
  print(f"πŸ”„ Loading Reranker: {self.model_name}...")
1346
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
 
1354
  if not docs:
1355
  return []
1356
 
 
1357
  pairs = [[query, doc.page_content] for doc in docs]
1358
 
1359
  inputs = self.tokenizer(
 
1367
  with torch.no_grad():
1368
  outputs = self.model(**inputs)
1369
 
 
 
1370
  logits = outputs.logits
1371
  if logits.shape[1] == 2:
1372
+ scores = logits[:, 1]
1373
  else:
1374
  scores = logits.flatten()
1375
 
 
1376
  scores = scores.numpy().tolist()
1377
  doc_score_pairs = list(zip(docs, scores))
1378
  doc_score_pairs.sort(key=lambda x: x[1], reverse=True)
1379
 
 
1380
  return [doc for doc, score in doc_score_pairs[:top_k]]
1381
 
1382
 
1383
  # ---------------------------------------------------------
1384
+ # 6. ENHANCED MAIN SYSTEM WITH MULTI-STAGE PIPELINE
1385
  # ---------------------------------------------------------
1386
+ class EnhancedVectorSystem:
1387
  def __init__(self):
1388
  self.vector_store = None
1389
  self.embeddings = OnnxBgeEmbeddings()
1390
  self.llm = LLMEvaluator()
1391
+ self.reranker = OnnxReranker()
1392
+ self.presence_checker = AnswerPresenceChecker()
1393
+ self.supervisor = SupervisorAggregator()
1394
+ self.all_chunks = []
1395
  self.total_chunks = 0
1396
+ self.reference_summary = None # Store reference answer summary
1397
 
1398
  def process_content(self, file_obj, raw_text):
1399
  has_file = file_obj is not None
 
1410
  if has_file:
1411
  if file_obj.name.endswith('.pdf'):
1412
  doc = fitz.open(file_obj.name)
1413
+ for page in doc:
1414
+ text += page.get_text()
1415
  elif file_obj.name.endswith('.txt'):
1416
+ with open(file_obj.name, 'r', encoding='utf-8') as f:
1417
+ text = f.read()
1418
  else:
1419
  return "❌ Error: Only .pdf and .txt supported."
1420
  else:
1421
  text = raw_text
1422
 
1423
+ # Smaller chunks for precision
1424
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
1425
  texts = text_splitter.split_text(text)
1426
+ self.all_chunks = texts
1427
 
 
1428
  docs = [Document(page_content=t, metadata={"id": i}) for i, t in enumerate(texts)]
1429
  self.total_chunks = len(docs)
1430
 
1431
+ if not docs:
1432
+ return "Content empty."
1433
 
1434
  self.vector_store = FAISS.from_documents(docs, self.embeddings)
1435
 
1436
+ return f"βœ… Indexed {self.total_chunks} chunks. Ready for grading."
1437
  except Exception as e:
1438
  return f"Error: {str(e)}"
1439
+
1440
+ def set_reference_answer(self, reference_text: str) -> str:
1441
+ """
1442
+ Set reference answer for grading calibration.
1443
+ Paper insight: Reference grounding prevents over-grading.
1444
+ """
1445
+ if not reference_text or len(reference_text.strip()) == 0:
1446
+ self.reference_summary = None
1447
+ return "ℹ️ Reference answer cleared."
1448
+
1449
+ self.reference_summary = reference_text.strip()
1450
+ return f"βœ… Reference answer set ({len(self.reference_summary)} chars). Will be used to calibrate grading."
1451
 
1452
+ def process_query(self, question, student_answer, max_marks, enable_ensemble=True):
1453
+ """
1454
+ Enhanced grading pipeline with multi-stage processing.
1455
+ """
1456
+ if not self.vector_store:
1457
+ return "⚠️ Please upload a file or paste text first.", ""
1458
+ if not question:
1459
+ return "⚠️ Enter a question.", ""
1460
 
1461
+ # Stage 1: Presence Check (Paper insight)
1462
+ is_present, presence_reason = self.presence_checker.check_presence(student_answer)
 
1463
 
1464
+ if not is_present:
1465
+ return f"⚠️ **No valid answer detected:** {presence_reason}", f"**Score: 0/{max_marks}**\n\nNo answer to grade."
1466
+
1467
+ # Stage 2: Retrieval + Reranking
1468
+ initial_docs = self.vector_store.similarity_search(question, k=15)
1469
  top_docs = self.reranker.rank(question, initial_docs, top_k=3)
 
 
 
1470
  expanded_context = "\n\n---\n\n".join([d.page_content for d in top_docs])
1471
 
1472
+ evidence_display = f"### πŸ“š Retrieved Context (Top {len(top_docs)} chunks):\n"
1473
+ evidence_display += f"> {expanded_context[:500]}..."
1474
 
1475
+ # Stage 3: Ensemble Grading (Paper's key innovation)
1476
+ if not student_answer:
1477
+ return evidence_display, "Please enter a student answer to grade."
1478
+
1479
+ if enable_ensemble:
1480
+ # Run K=3 independent graders
1481
+ grader_results = []
1482
+ for grader_id in range(1, 4): # K=3 ensemble
1483
+ result = self.llm.evaluate_single(
1484
+ context=expanded_context,
1485
+ question=question,
1486
+ student_answer=student_answer,
1487
+ max_marks=max_marks,
1488
+ grader_id=grader_id,
1489
+ reference_summary=self.reference_summary
1490
+ )
1491
+ grader_results.append(result)
1492
+
1493
+ # Stage 4: Supervisor Aggregation
1494
+ final_result = self.supervisor.aggregate(grader_results, max_marks)
1495
+
1496
+ # Format output
1497
+ llm_feedback = f"# πŸŽ“ Final Grade: {final_result['final_score']}/{max_marks}\n\n"
1498
+
1499
+ if final_result['needs_review']:
1500
+ llm_feedback += "⚠️ **Manual Review Recommended** (High grader disagreement)\n\n"
1501
+
1502
+ llm_feedback += final_result['consensus_analysis']
1503
+
1504
+ # Add statistics
1505
+ llm_feedback += f"\n\n---\n**Grading Statistics:**\n"
1506
+ llm_feedback += f"- Individual Scores: {final_result['individual_scores']}\n"
1507
+ llm_feedback += f"- Score Range: {min(final_result['individual_scores'])}-{max(final_result['individual_scores'])}\n"
1508
+ llm_feedback += f"- Disagreement: Β±{final_result['disagreement']} points\n"
1509
+
1510
+ else:
1511
+ # Single grader mode (for comparison)
1512
+ result = self.llm.evaluate_single(
1513
+ context=expanded_context,
1514
+ question=question,
1515
+ student_answer=student_answer,
1516
+ max_marks=max_marks,
1517
+ grader_id=1,
1518
+ reference_summary=self.reference_summary
1519
+ )
1520
+ llm_feedback = f"# πŸŽ“ Grade: {result['score']}/{max_marks}\n\n{result['analysis']}"
1521
 
1522
  return evidence_display, llm_feedback
1523
 
 
1524
 
1525
+ # ---------------------------------------------------------
1526
+ # 7. GRADIO INTERFACE
1527
+ # ---------------------------------------------------------
1528
+ system = EnhancedVectorSystem()
1529
+
1530
+ with gr.Blocks(title="EduGenius AI Grader - Enhanced", theme=gr.themes.Soft()) as demo:
1531
+ gr.Markdown("# ⚑ EduGenius: Enhanced RAG-Based Grader")
1532
+ gr.Markdown("Powered by **Ensemble Grading**, **Reference Grounding** & **Presence Checking**")
1533
+ gr.Markdown("*Implements multi-stage pipeline from research: arXiv:2601.00730*")
1534
 
1535
  with gr.Row():
1536
  with gr.Column(scale=1):
1537
+ gr.Markdown("### πŸ“„ Source Content")
1538
+ pdf_input = gr.File(label="Option A: Upload Document (PDF/TXT)")
1539
  gr.Markdown("**OR**")
1540
+ text_input = gr.Textbox(label="Option B: Paste Text", placeholder="Paste context here...", lines=5)
1541
 
1542
+ upload_btn = gr.Button("πŸ“₯ Index Content", variant="primary")
1543
  status_msg = gr.Textbox(label="Status", interactive=False)
1544
+
1545
+ gr.Markdown("---")
1546
+ gr.Markdown("### 🎯 Reference Answer (Optional)")
1547
+ gr.Markdown("*Providing a reference answer improves grading accuracy*")
1548
+ reference_input = gr.Textbox(
1549
+ label="Perfect Answer Example",
1550
+ placeholder="What would a 100% answer look like?",
1551
+ lines=3
1552
+ )
1553
+ ref_btn = gr.Button("Set Reference", variant="secondary")
1554
+ ref_status = gr.Textbox(label="Reference Status", interactive=False)
1555
 
1556
  with gr.Column(scale=2):
1557
+ gr.Markdown("### ❓ Grading Interface")
1558
+
1559
  with gr.Row():
1560
  q_input = gr.Textbox(label="Question", scale=2)
1561
  max_marks = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max Marks")
1562
 
1563
+ a_input = gr.TextArea(label="Student Answer", lines=4)
 
1564
 
1565
  with gr.Row():
1566
+ ensemble_check = gr.Checkbox(label="Enable Ensemble Grading (K=3)", value=True)
1567
+ run_btn = gr.Button("πŸš€ Grade Answer", variant="primary", scale=2)
1568
+
1569
+ gr.Markdown("---")
1570
+
1571
+ with gr.Row():
1572
+ with gr.Column():
1573
+ evidence_box = gr.Markdown(label="πŸ“š Retrieved Context")
1574
+ with gr.Column():
1575
+ grade_box = gr.Markdown(label="πŸŽ“ Grading Result")
1576
+
1577
+ # Event handlers
1578
+ upload_btn.click(
1579
+ system.process_content,
1580
+ inputs=[pdf_input, text_input],
1581
+ outputs=[status_msg]
1582
+ )
1583
+
1584
+ ref_btn.click(
1585
+ system.set_reference_answer,
1586
+ inputs=[reference_input],
1587
+ outputs=[ref_status]
1588
+ )
1589
+
1590
+ run_btn.click(
1591
+ system.process_query,
1592
+ inputs=[q_input, a_input, max_marks, ensemble_check],
1593
+ outputs=[evidence_box, grade_box]
1594
+ )
1595
 
1596
  if __name__ == "__main__":
1597
  demo.launch()