heerjtdev commited on
Commit
0b67337
·
verified ·
1 Parent(s): cc1712a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -418
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  # import gradio as gr
2
  # import fitz # PyMuPDF
3
  # import torch
@@ -21,6 +22,9 @@
21
  # PROVIDERS = ["CPUExecutionProvider"]
22
  # print(f"⚡ Running on: {PROVIDERS}")
23
 
 
 
 
24
  # # ---------------------------------------------------------
25
  # # 1. OPTIMIZED EMBEDDINGS (BGE-SMALL)
26
  # # ---------------------------------------------------------
@@ -49,94 +53,6 @@
49
  # def embed_query(self, text):
50
  # return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
51
 
52
- # # ---------------------------------------------------------
53
- # # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
54
- # # ---------------------------------------------------------
55
- # # class LLMEvaluator:
56
- # # def __init__(self):
57
- # # self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
58
- # # self.local_dir = "onnx_qwen_local"
59
-
60
- # # print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
61
-
62
- # # if not os.path.exists(self.local_dir):
63
- # # print(f"📥 Downloading FP16 model to {self.local_dir}...")
64
- # # snapshot_download(
65
- # # repo_id=self.repo_id,
66
- # # local_dir=self.local_dir,
67
- # # allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
68
- # # )
69
- # # print("✅ Download complete.")
70
-
71
- # # self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
72
-
73
- # # sess_options = SessionOptions()
74
- # # sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
75
-
76
- # # self.model = ORTModelForCausalLM.from_pretrained(
77
- # # self.local_dir,
78
- # # subfolder="onnx",
79
- # # file_name="model_fp16.onnx",
80
- # # use_cache=True,
81
- # # use_io_binding=False,
82
- # # provider=PROVIDERS[0],
83
- # # session_options=sess_options
84
- # # )
85
-
86
- # # def evaluate(self, context, question, student_answer, max_marks):
87
- # # # OPTIMIZED PROMPT FOR SMALL MODELS (0.5B)
88
- # # messages = [
89
- # # {"role": "system", "content": "You are a strictest, literal academic grader in the whole. You ONLY grade based on the provided text. You DO NOT use outside knowledge."},
90
- # # {"role": "user", "content": f"""
91
- # # Task: Grade the student answer based ONLY on the Reference Text.
92
-
93
- # # REFERENCE TEXT:
94
- # # {context}
95
-
96
- # # QUESTION:
97
- # # {question}
98
-
99
- # # STUDENT ANSWER:
100
- # # {student_answer}
101
-
102
- # # -----------------------------
103
- # # GRADING LOGIC:
104
- # # 1. READ the Reference Text and use that as the ground truth. What does it actually say about the Question?
105
- # # 2. COMPARE it to the Student Answer, do not forcefully agree with the answer by seeing things that are not there. You are to penalise irrelevant text and contradictions whenever you encounter them.
106
- # # 3 START with 0 marks and IF the answers line up to the reference text in a meaningful way, then add marks porportionally. ONLY GIVE MARKS FOR CORRECT STATEMENT STRICTLY BASED ON THE REFERENCE TEXT AND NOTHING ELSE IN THIS WORLD.
107
- # # 4. IF the Student Answer claims things not found in the text , he is incorrect and HALLUCINATING. Do not give marks for that statment/phrase
108
- # # 5. IF the Student Answer contradicts the text (e.g., Text says "hide personality" but Student says "show personality"), Do not give marks for that statment/phrase
109
-
110
- # # VERDICT:
111
- # # - If wrong: 0/{max_marks}
112
- # # - If correct: {max_marks}/{max_marks}
113
-
114
- # # OUTPUT FORMAT:
115
- # # Score: [X]/{max_marks}
116
- # # Feedback: [Brief explanation citing the text]
117
- # # """}
118
- # # ]
119
-
120
- # # input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
121
- # # inputs = self.tokenizer(input_text, return_tensors="pt")
122
-
123
- # # with torch.no_grad():
124
- # # outputs = self.model.generate(
125
- # # **inputs,
126
- # # max_new_tokens=100,
127
- # # temperature=0.00, # 0.0 = logic only, no creativity
128
- # # do_sample=False,
129
- # # repetition_penalty=1.2
130
- # # )
131
-
132
- # # input_length = inputs['input_ids'].shape[1]
133
- # # response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
134
- # # return response
135
-
136
-
137
-
138
-
139
-
140
 
141
  # # ---------------------------------------------------------
142
  # # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
@@ -144,7 +60,7 @@
144
  # class LLMEvaluator:
145
  # def __init__(self):
146
  # # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
147
- # self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
148
  # self.local_dir = "onnx_qwen_local"
149
 
150
  # print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
@@ -182,8 +98,8 @@
182
  # Your job is to check if the Student Answer is FACTUALLY present in the Context.
183
 
184
  # GRADING ALGORITHM:
185
- # 1. IF the Student Answer mentions things NOT in the Context -> PENALTY (-100%).
186
- # 2. IF the Student Answer interprets the text opposite to its meaning -> PENALTY (-100%).
187
  # 3. IF the Student Answer is generic fluff -> SCORE: 0.
188
 
189
  # --- EXAMPLE 1 (HALLUCINATION) ---
@@ -248,110 +164,6 @@
248
  # return response
249
 
250
 
251
- # # # ---------------------------------------------------------
252
- # # # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
253
- # # # ---------------------------------------------------------
254
- # # class LLMEvaluator:
255
- # # def __init__(self):
256
- # # # Qwen 0.5B is great for speed, but needs VERY specific prompts to be strict.
257
- # # self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
258
- # # self.local_dir = "onnx_qwen_local"
259
-
260
- # # print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
261
-
262
- # # if not os.path.exists(self.local_dir):
263
- # # print(f"📥 Downloading FP16 model to {self.local_dir}...")
264
- # # snapshot_download(
265
- # # repo_id=self.repo_id,
266
- # # local_dir=self.local_dir,
267
- # # allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
268
- # # )
269
- # # print("✅ Download complete.")
270
-
271
- # # self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
272
-
273
- # # sess_options = SessionOptions()
274
- # # sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
275
-
276
- # # self.model = ORTModelForCausalLM.from_pretrained(
277
- # # self.local_dir,
278
- # # subfolder="onnx",
279
- # # file_name="model_fp16.onnx",
280
- # # use_cache=True,
281
- # # use_io_binding=False,
282
- # # provider=PROVIDERS[0],
283
- # # session_options=sess_options
284
- # # )
285
-
286
- # # def evaluate(self, context, question, student_answer, max_marks):
287
- # # # --- STRATEGY: FEW-SHOT PROMPTING & CHAIN OF THOUGHT ---
288
- # # # Small models (0.5B) need examples to understand "Strictness".
289
-
290
- # # system_prompt = """You are a strict automated grader. You grade ONLY based on the provided Context.
291
-
292
- # # RULES:
293
- # # 1. If the Student Answer contains facts NOT found in the Context, Score is 0.
294
- # # 2. If the Student Answer contradicts the Context, Score is 0.
295
- # # 3. Do not use outside knowledge. If it's not in the text, it's wrong.
296
-
297
- # # --- EXAMPLE 1 (WRONG ANSWER) ---
298
- # # Context: The sky is blue because of Rayleigh scattering.
299
- # # Question: Why is the sky blue?
300
- # # Student Answer: Because the ocean reflects into it.
301
- # # Analysis: The context mentions Rayleigh scattering. The student mentioned ocean reflection. These do not match.
302
- # # Score: 0/{max_marks}
303
-
304
- # # --- EXAMPLE 2 (CORRECT ANSWER) ---
305
- # # Context: Mitochondria is the powerhouse of the cell.
306
- # # Question: What is the mitochondria?
307
- # # Student Answer: It is the powerhouse of the cell.
308
- # # Analysis: The student answer matches the context text exactly.
309
- # # Score: {max_marks}/{max_marks}
310
- # # """
311
-
312
- # # user_prompt = f"""
313
- # # --- NOW GRADE THIS ---
314
- # # Context:
315
- # # {context}
316
-
317
- # # Question:
318
- # # {question}
319
-
320
- # # Student Answer:
321
- # # {student_answer}
322
-
323
- # # Task:
324
- # # 1. Analyze if the specific keywords in Student Answer exist in Context.
325
- # # 2. Assign a Score.
326
-
327
- # # Output format:
328
- # # Analysis: [Analysis here]
329
- # # Score: [X]/{max_marks}
330
- # # """
331
-
332
- # # messages = [
333
- # # {"role": "system", "content": system_prompt},
334
- # # {"role": "user", "content": user_prompt}
335
- # # ]
336
-
337
- # # input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
338
- # # inputs = self.tokenizer(input_text, return_tensors="pt")
339
-
340
- # # with torch.no_grad():
341
- # # outputs = self.model.generate(
342
- # # **inputs,
343
- # # max_new_tokens=150,
344
- # # temperature=0.1, # Low temperature for facts
345
- # # top_p=0.1, # Reduce creativity
346
- # # do_sample=True,
347
- # # repetition_penalty=1.1
348
- # # )
349
-
350
- # # input_length = inputs['input_ids'].shape[1]
351
- # # response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
352
- # # return response
353
-
354
-
355
  # # ---------------------------------------------------------
356
  # # 3. Main Application Logic
357
  # # ---------------------------------------------------------
@@ -363,23 +175,38 @@
363
  # self.all_chunks = []
364
  # self.total_chunks = 0
365
 
366
- # def process_file(self, file_obj):
367
- # if file_obj is None: return "No file uploaded."
 
 
 
 
 
 
 
 
 
368
  # try:
369
  # text = ""
370
- # if file_obj.name.endswith('.pdf'):
371
- # doc = fitz.open(file_obj.name)
372
- # for page in doc: text += page.get_text()
373
- # elif file_obj.name.endswith('.txt'):
374
- # with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
 
 
 
 
 
 
375
  # else:
376
- # return "❌ Error: Only .pdf and .txt supported."
377
 
378
  # text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
379
  # self.all_chunks = text_splitter.split_text(text)
380
  # self.total_chunks = len(self.all_chunks)
381
 
382
- # if not self.all_chunks: return "File empty."
383
 
384
  # metadatas = [{"id": i} for i in range(self.total_chunks)]
385
  # self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
@@ -389,7 +216,7 @@
389
  # return f"Error: {str(e)}"
390
 
391
  # def process_query(self, question, student_answer, max_marks):
392
- # if not self.vector_store: return "⚠️ Please upload a file first.", ""
393
  # if not question: return "⚠️ Enter a question.", ""
394
 
395
  # results = self.vector_store.similarity_search_with_score(question, k=1)
@@ -420,7 +247,11 @@
420
 
421
  # with gr.Row():
422
  # with gr.Column(scale=1):
423
- # pdf_input = gr.File(label="1. Upload Chapter")
 
 
 
 
424
  # upload_btn = gr.Button("Index Content", variant="primary")
425
  # status_msg = gr.Textbox(label="Status", interactive=False)
426
 
@@ -436,7 +267,8 @@
436
  # evidence_box = gr.Markdown(label="Context Used")
437
  # grade_box = gr.Markdown(label="Grading Result")
438
 
439
- # upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
 
440
  # run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])
441
 
442
  # if __name__ == "__main__":
@@ -453,11 +285,19 @@
453
 
454
 
455
 
 
 
 
 
 
 
 
 
456
  import gradio as gr
457
  import fitz # PyMuPDF
458
  import torch
459
  import os
460
- import onnxruntime as ort
461
 
462
  # --- IMPORT SESSION OPTIONS ---
463
  from onnxruntime import SessionOptions, GraphOptimizationLevel
@@ -466,10 +306,11 @@ from onnxruntime import SessionOptions, GraphOptimizationLevel
466
  from langchain_text_splitters import RecursiveCharacterTextSplitter
467
  from langchain_community.vectorstores import FAISS
468
  from langchain_core.embeddings import Embeddings
 
469
 
470
  # --- ONNX & MODEL IMPORTS ---
471
  from transformers import AutoTokenizer
472
- from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
473
  from huggingface_hub import snapshot_download
474
 
475
  # Force CPU Provider
@@ -504,94 +345,6 @@ class OnnxBgeEmbeddings(Embeddings):
504
  def embed_query(self, text):
505
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
506
 
507
- # ---------------------------------------------------------
508
- # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
509
- # ---------------------------------------------------------
510
- # class LLMEvaluator:
511
- # def __init__(self):
512
- # self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
513
- # self.local_dir = "onnx_qwen_local"
514
-
515
- # print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
516
-
517
- # if not os.path.exists(self.local_dir):
518
- # print(f"📥 Downloading FP16 model to {self.local_dir}...")
519
- # snapshot_download(
520
- # repo_id=self.repo_id,
521
- # local_dir=self.local_dir,
522
- # allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
523
- # )
524
- # print("✅ Download complete.")
525
-
526
- # self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
527
-
528
- # sess_options = SessionOptions()
529
- # sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
530
-
531
- # self.model = ORTModelForCausalLM.from_pretrained(
532
- # self.local_dir,
533
- # subfolder="onnx",
534
- # file_name="model_fp16.onnx",
535
- # use_cache=True,
536
- # use_io_binding=False,
537
- # provider=PROVIDERS[0],
538
- # session_options=sess_options
539
- # )
540
-
541
- # def evaluate(self, context, question, student_answer, max_marks):
542
- # # OPTIMIZED PROMPT FOR SMALL MODELS (0.5B)
543
- # messages = [
544
- # {"role": "system", "content": "You are a strictest, literal academic grader in the whole. You ONLY grade based on the provided text. You DO NOT use outside knowledge."},
545
- # {"role": "user", "content": f"""
546
- # Task: Grade the student answer based ONLY on the Reference Text.
547
-
548
- # REFERENCE TEXT:
549
- # {context}
550
-
551
- # QUESTION:
552
- # {question}
553
-
554
- # STUDENT ANSWER:
555
- # {student_answer}
556
-
557
- # -----------------------------
558
- # GRADING LOGIC:
559
- # 1. READ the Reference Text and use that as the ground truth. What does it actually say about the Question?
560
- # 2. COMPARE it to the Student Answer, do not forcefully agree with the answer by seeing things that are not there. You are to penalise irrelevant text and contradictions whenever you encounter them.
561
- # 3 START with 0 marks and IF the answers line up to the reference text in a meaningful way, then add marks porportionally. ONLY GIVE MARKS FOR CORRECT STATEMENT STRICTLY BASED ON THE REFERENCE TEXT AND NOTHING ELSE IN THIS WORLD.
562
- # 4. IF the Student Answer claims things not found in the text , he is incorrect and HALLUCINATING. Do not give marks for that statment/phrase
563
- # 5. IF the Student Answer contradicts the text (e.g., Text says "hide personality" but Student says "show personality"), Do not give marks for that statment/phrase
564
-
565
- # VERDICT:
566
- # - If wrong: 0/{max_marks}
567
- # - If correct: {max_marks}/{max_marks}
568
-
569
- # OUTPUT FORMAT:
570
- # Score: [X]/{max_marks}
571
- # Feedback: [Brief explanation citing the text]
572
- # """}
573
- # ]
574
-
575
- # input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
576
- # inputs = self.tokenizer(input_text, return_tensors="pt")
577
-
578
- # with torch.no_grad():
579
- # outputs = self.model.generate(
580
- # **inputs,
581
- # max_new_tokens=100,
582
- # temperature=0.00, # 0.0 = logic only, no creativity
583
- # do_sample=False,
584
- # repetition_penalty=1.2
585
- # )
586
-
587
- # input_length = inputs['input_ids'].shape[1]
588
- # response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
589
- # return response
590
-
591
-
592
-
593
-
594
-
595
 
596
  # ---------------------------------------------------------
597
  # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
@@ -630,9 +383,6 @@ class LLMEvaluator:
630
 
631
  def evaluate(self, context, question, student_answer, max_marks):
632
  # --- IMPROVED PROMPT STRATEGY ---
633
- # 1. Role: We set the persona to a "Strict Logical Validator" not a "Teacher".
634
- # 2. Few-Shot: We give examples of HALLUCINATIONS getting 0 marks.
635
-
636
  system_prompt = f"""You are a strict Logic Validator. You are NOT a helpful assistant.
637
  Your job is to check if the Student Answer is FACTUALLY present in the Context.
638
 
@@ -703,123 +453,70 @@ class LLMEvaluator:
703
  return response
704
 
705
 
706
- # # ---------------------------------------------------------
707
- # # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
708
- # # ---------------------------------------------------------
709
- # class LLMEvaluator:
710
- # def __init__(self):
711
- # # Qwen 0.5B is great for speed, but needs VERY specific prompts to be strict.
712
- # self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
713
- # self.local_dir = "onnx_qwen_local"
714
-
715
- # print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
716
-
717
- # if not os.path.exists(self.local_dir):
718
- # print(f"📥 Downloading FP16 model to {self.local_dir}...")
719
- # snapshot_download(
720
- # repo_id=self.repo_id,
721
- # local_dir=self.local_dir,
722
- # allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
723
- # )
724
- # print("✅ Download complete.")
725
-
726
- # self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
727
-
728
- # sess_options = SessionOptions()
729
- # sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
730
-
731
- # self.model = ORTModelForCausalLM.from_pretrained(
732
- # self.local_dir,
733
- # subfolder="onnx",
734
- # file_name="model_fp16.onnx",
735
- # use_cache=True,
736
- # use_io_binding=False,
737
- # provider=PROVIDERS[0],
738
- # session_options=sess_options
739
- # )
740
 
741
- # def evaluate(self, context, question, student_answer, max_marks):
742
- # # --- STRATEGY: FEW-SHOT PROMPTING & CHAIN OF THOUGHT ---
743
- # # Small models (0.5B) need examples to understand "Strictness".
744
 
745
- # system_prompt = """You are a strict automated grader. You grade ONLY based on the provided Context.
746
-
747
- # RULES:
748
- # 1. If the Student Answer contains facts NOT found in the Context, Score is 0.
749
- # 2. If the Student Answer contradicts the Context, Score is 0.
750
- # 3. Do not use outside knowledge. If it's not in the text, it's wrong.
751
-
752
- # --- EXAMPLE 1 (WRONG ANSWER) ---
753
- # Context: The sky is blue because of Rayleigh scattering.
754
- # Question: Why is the sky blue?
755
- # Student Answer: Because the ocean reflects into it.
756
- # Analysis: The context mentions Rayleigh scattering. The student mentioned ocean reflection. These do not match.
757
- # Score: 0/{max_marks}
758
 
759
- # --- EXAMPLE 2 (CORRECT ANSWER) ---
760
- # Context: Mitochondria is the powerhouse of the cell.
761
- # Question: What is the mitochondria?
762
- # Student Answer: It is the powerhouse of the cell.
763
- # Analysis: The student answer matches the context text exactly.
764
- # Score: {max_marks}/{max_marks}
765
- # """
766
-
767
- # user_prompt = f"""
768
- # --- NOW GRADE THIS ---
769
- # Context:
770
- # {context}
771
-
772
- # Question:
773
- # {question}
774
-
775
- # Student Answer:
776
- # {student_answer}
777
-
778
- # Task:
779
- # 1. Analyze if the specific keywords in Student Answer exist in Context.
780
- # 2. Assign a Score.
781
-
782
- # Output format:
783
- # Analysis: [Analysis here]
784
- # Score: [X]/{max_marks}
785
- # """
786
-
787
- # messages = [
788
- # {"role": "system", "content": system_prompt},
789
- # {"role": "user", "content": user_prompt}
790
- # ]
791
 
792
- # input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
793
- # inputs = self.tokenizer(input_text, return_tensors="pt")
794
 
795
- # with torch.no_grad():
796
- # outputs = self.model.generate(
797
- # **inputs,
798
- # max_new_tokens=150,
799
- # temperature=0.1, # Low temperature for facts
800
- # top_p=0.1, # Reduce creativity
801
- # do_sample=True,
802
- # repetition_penalty=1.1
803
- # )
 
 
 
804
 
805
- # input_length = inputs['input_ids'].shape[1]
806
- # response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
807
- # return response
808
 
809
 
810
  # ---------------------------------------------------------
811
- # 3. Main Application Logic
812
  # ---------------------------------------------------------
813
  class VectorSystem:
814
  def __init__(self):
815
  self.vector_store = None
816
  self.embeddings = OnnxBgeEmbeddings()
817
  self.llm = LLMEvaluator()
 
818
  self.all_chunks = []
819
  self.total_chunks = 0
820
 
821
  def process_content(self, file_obj, raw_text):
822
- # LOGIC: Check for exclusivity (Cannot have both file and text)
823
  has_file = file_obj is not None
824
  has_text = raw_text is not None and len(raw_text.strip()) > 0
825
 
@@ -831,7 +528,6 @@ class VectorSystem:
831
 
832
  try:
833
  text = ""
834
- # Case 1: Process File
835
  if has_file:
836
  if file_obj.name.endswith('.pdf'):
837
  doc = fitz.open(file_obj.name)
@@ -840,19 +536,21 @@ class VectorSystem:
840
  with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
841
  else:
842
  return "❌ Error: Only .pdf and .txt supported."
843
-
844
- # Case 2: Process Raw Text
845
  else:
846
  text = raw_text
847
 
848
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
849
- self.all_chunks = text_splitter.split_text(text)
850
- self.total_chunks = len(self.all_chunks)
 
 
 
 
 
851
 
852
- if not self.all_chunks: return "Content empty."
853
 
854
- metadatas = [{"id": i} for i in range(self.total_chunks)]
855
- self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
856
 
857
  return f"✅ Indexed {self.total_chunks} chunks."
858
  except Exception as e:
@@ -862,19 +560,20 @@ class VectorSystem:
862
  if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
863
  if not question: return "⚠️ Enter a question.", ""
864
 
865
- results = self.vector_store.similarity_search_with_score(question, k=1)
866
- top_doc, score = results[0]
 
867
 
868
- center_id = top_doc.metadata['id']
869
- start_id = max(0, center_id - 1)
870
- end_id = min(self.total_chunks - 1, center_id + 1)
871
 
872
- expanded_context = ""
873
- for i in range(start_id, end_id + 1):
874
- expanded_context += self.all_chunks[i] + "\n"
875
 
876
- evidence_display = f"### 📚 Expanded Context (Chunks {start_id} to {end_id}):\n"
877
- evidence_display += f"> ... {expanded_context} ..."
878
 
879
  llm_feedback = "Please enter a student answer to grade."
880
  if student_answer:
@@ -886,7 +585,7 @@ system = VectorSystem()
886
 
887
  with gr.Blocks(title="EduGenius AI Grader") as demo:
888
  gr.Markdown("# ⚡ EduGenius: CPU Optimized RAG")
889
- gr.Markdown("Powered by **Qwen-2.5-0.5B** and **BGE-Small** (ONNX Optimized)")
890
 
891
  with gr.Row():
892
  with gr.Column(scale=1):
 
1
+
2
  # import gradio as gr
3
  # import fitz # PyMuPDF
4
  # import torch
 
22
  # PROVIDERS = ["CPUExecutionProvider"]
23
  # print(f"⚡ Running on: {PROVIDERS}")
24
 
25
+
26
+
27
+
28
  # # ---------------------------------------------------------
29
  # # 1. OPTIMIZED EMBEDDINGS (BGE-SMALL)
30
  # # ---------------------------------------------------------
 
53
  # def embed_query(self, text):
54
  # return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # # ---------------------------------------------------------
58
  # # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
 
60
  # class LLMEvaluator:
61
  # def __init__(self):
62
  # # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
63
+ # self.repo_id = "onnx-community/Qwen2.5-1.5B-Instruct"
64
  # self.local_dir = "onnx_qwen_local"
65
 
66
  # print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
 
98
  # Your job is to check if the Student Answer is FACTUALLY present in the Context.
99
 
100
  # GRADING ALGORITHM:
101
+ # 1. IF the Student Answer mentions things NOT in the Context -> PENALTY (-50% of the marks).
102
+ # 2. IF the Student Answer interprets the text opposite to its meaning -> PENALTY (-100% of the marks).
103
  # 3. IF the Student Answer is generic fluff -> SCORE: 0.
104
 
105
  # --- EXAMPLE 1 (HALLUCINATION) ---
 
164
  # return response
165
 
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  # # ---------------------------------------------------------
168
  # # 3. Main Application Logic
169
  # # ---------------------------------------------------------
 
175
  # self.all_chunks = []
176
  # self.total_chunks = 0
177
 
178
+ # def process_content(self, file_obj, raw_text):
179
+ # # LOGIC: Check for exclusivity (Cannot have both file and text)
180
+ # has_file = file_obj is not None
181
+ # has_text = raw_text is not None and len(raw_text.strip()) > 0
182
+
183
+ # if has_file and has_text:
184
+ # return "❌ Error: Please provide EITHER a file OR paste text, not both at the same time."
185
+
186
+ # if not has_file and not has_text:
187
+ # return "⚠️ No content provided. Please upload a file or paste text."
188
+
189
  # try:
190
  # text = ""
191
+ # # Case 1: Process File
192
+ # if has_file:
193
+ # if file_obj.name.endswith('.pdf'):
194
+ # doc = fitz.open(file_obj.name)
195
+ # for page in doc: text += page.get_text()
196
+ # elif file_obj.name.endswith('.txt'):
197
+ # with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
198
+ # else:
199
+ # return "❌ Error: Only .pdf and .txt supported."
200
+
201
+ # # Case 2: Process Raw Text
202
  # else:
203
+ # text = raw_text
204
 
205
  # text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
206
  # self.all_chunks = text_splitter.split_text(text)
207
  # self.total_chunks = len(self.all_chunks)
208
 
209
+ # if not self.all_chunks: return "Content empty."
210
 
211
  # metadatas = [{"id": i} for i in range(self.total_chunks)]
212
  # self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
 
216
  # return f"Error: {str(e)}"
217
 
218
  # def process_query(self, question, student_answer, max_marks):
219
+ # if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
220
  # if not question: return "⚠️ Enter a question.", ""
221
 
222
  # results = self.vector_store.similarity_search_with_score(question, k=1)
 
247
 
248
  # with gr.Row():
249
  # with gr.Column(scale=1):
250
+ # gr.Markdown("### Source Input (Choose One)")
251
+ # pdf_input = gr.File(label="Option A: Upload Chapter (PDF/TXT)")
252
+ # gr.Markdown("**OR**")
253
+ # text_input = gr.Textbox(label="Option B: Paste Context", placeholder="Paste text here if you don't have a file...", lines=5)
254
+
255
  # upload_btn = gr.Button("Index Content", variant="primary")
256
  # status_msg = gr.Textbox(label="Status", interactive=False)
257
 
 
267
  # evidence_box = gr.Markdown(label="Context Used")
268
  # grade_box = gr.Markdown(label="Grading Result")
269
 
270
+ # # Pass both inputs to the process_content function
271
+ # upload_btn.click(system.process_content, inputs=[pdf_input, text_input], outputs=[status_msg])
272
  # run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])
273
 
274
  # if __name__ == "__main__":
 
285
 
286
 
287
 
288
+
289
+
290
+
291
+
292
+
293
+
294
+
295
+
296
  import gradio as gr
297
  import fitz # PyMuPDF
298
  import torch
299
  import os
300
+ import numpy as np
301
 
302
  # --- IMPORT SESSION OPTIONS ---
303
  from onnxruntime import SessionOptions, GraphOptimizationLevel
 
306
  from langchain_text_splitters import RecursiveCharacterTextSplitter
307
  from langchain_community.vectorstores import FAISS
308
  from langchain_core.embeddings import Embeddings
309
+ from langchain_core.documents import Document
310
 
311
  # --- ONNX & MODEL IMPORTS ---
312
  from transformers import AutoTokenizer
313
+ from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM, ORTModelForSequenceClassification
314
  from huggingface_hub import snapshot_download
315
 
316
  # Force CPU Provider
 
345
  def embed_query(self, text):
346
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
  # ---------------------------------------------------------
350
  # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
 
383
 
384
  def evaluate(self, context, question, student_answer, max_marks):
385
  # --- IMPROVED PROMPT STRATEGY ---
 
 
 
386
  system_prompt = f"""You are a strict Logic Validator. You are NOT a helpful assistant.
387
  Your job is to check if the Student Answer is FACTUALLY present in the Context.
388
 
 
453
  return response
454
 
455
 
456
+ # ---------------------------------------------------------
457
+ # 3. NEW: ONNX RERANKER (Cross-Encoder)
458
+ # Uses existing 'optimum' & 'transformers' libs (No new deps)
459
+ # ---------------------------------------------------------
460
+ class OnnxReranker:
461
+ def __init__(self):
462
+ # TinyBERT is ~17MB and very fast on CPU
463
+ self.model_name = "Xenova/ms-marco-TinyBERT-L-2-v2"
464
+ print(f"🔄 Loading Reranker: {self.model_name}...")
465
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
466
+ self.model = ORTModelForSequenceClassification.from_pretrained(
467
+ self.model_name,
468
+ export=False,
469
+ provider=PROVIDERS[0]
470
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
 
472
+ def rank(self, query, docs, top_k=3):
473
+ if not docs:
474
+ return []
475
 
476
+ # Prepare pairs: [query, doc_text]
477
+ pairs = [[query, doc.page_content] for doc in docs]
 
 
 
 
 
 
 
 
 
 
 
478
 
479
+ inputs = self.tokenizer(
480
+ pairs,
481
+ padding=True,
482
+ truncation=True,
483
+ max_length=512,
484
+ return_tensors="pt"
485
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
 
487
+ with torch.no_grad():
488
+ outputs = self.model(**inputs)
489
 
490
+ # Get logits (Relevance scores)
491
+ # MS-Marco models typically output a single logit or [irrelevant, relevant]
492
+ logits = outputs.logits
493
+ if logits.shape[1] == 2:
494
+ scores = logits[:, 1] # Take the "relevant" class score
495
+ else:
496
+ scores = logits.flatten()
497
+
498
+ # Sort docs by score (descending)
499
+ scores = scores.numpy().tolist()
500
+ doc_score_pairs = list(zip(docs, scores))
501
+ doc_score_pairs.sort(key=lambda x: x[1], reverse=True)
502
 
503
+ # Return top K docs
504
+ return [doc for doc, score in doc_score_pairs[:top_k]]
 
505
 
506
 
507
  # ---------------------------------------------------------
508
+ # 4. Main Application Logic
509
  # ---------------------------------------------------------
510
  class VectorSystem:
511
  def __init__(self):
512
  self.vector_store = None
513
  self.embeddings = OnnxBgeEmbeddings()
514
  self.llm = LLMEvaluator()
515
+ self.reranker = OnnxReranker() # Initialize Reranker
516
  self.all_chunks = []
517
  self.total_chunks = 0
518
 
519
  def process_content(self, file_obj, raw_text):
 
520
  has_file = file_obj is not None
521
  has_text = raw_text is not None and len(raw_text.strip()) > 0
522
 
 
528
 
529
  try:
530
  text = ""
 
531
  if has_file:
532
  if file_obj.name.endswith('.pdf'):
533
  doc = fitz.open(file_obj.name)
 
536
  with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
537
  else:
538
  return "❌ Error: Only .pdf and .txt supported."
 
 
539
  else:
540
  text = raw_text
541
 
542
+ # Smaller chunks for Reranking precision (500 chars)
543
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
544
+ texts = text_splitter.split_text(text)
545
+ self.all_chunks = texts # Keep plain text list for reference
546
+
547
+ # Create Document objects with metadata
548
+ docs = [Document(page_content=t, metadata={"id": i}) for i, t in enumerate(texts)]
549
+ self.total_chunks = len(docs)
550
 
551
+ if not docs: return "Content empty."
552
 
553
+ self.vector_store = FAISS.from_documents(docs, self.embeddings)
 
554
 
555
  return f"✅ Indexed {self.total_chunks} chunks."
556
  except Exception as e:
 
560
  if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
561
  if not question: return "⚠️ Enter a question.", ""
562
 
563
+ # Step A: Wide Net Retrieval (Get top 15 candidates)
564
+ # We fetch more than we need to ensure the answer is in the candidate pool
565
+ initial_docs = self.vector_store.similarity_search(question, k=15)
566
 
567
+ # Step B: Rerank (Get top 3 best matches)
568
+ # The Cross-Encoder strictly judges relevance
569
+ top_docs = self.reranker.rank(question, initial_docs, top_k=3)
570
 
571
+ # Step C: Construct Context
572
+ # We merge the top 3 specific chunks
573
+ expanded_context = "\n\n---\n\n".join([d.page_content for d in top_docs])
574
 
575
+ evidence_display = f"### 📚 Optimized Context (Top {len(top_docs)} chunks after Reranking):\n"
576
+ evidence_display += f"> {expanded_context} ..."
577
 
578
  llm_feedback = "Please enter a student answer to grade."
579
  if student_answer:
 
585
 
586
  with gr.Blocks(title="EduGenius AI Grader") as demo:
587
  gr.Markdown("# ⚡ EduGenius: CPU Optimized RAG")
588
+ gr.Markdown("Powered by **Qwen-2.5-0.5B**, **BGE-Small** & **TinyBERT Reranker**")
589
 
590
  with gr.Row():
591
  with gr.Column(scale=1):