heerjtdev commited on
Commit
18a182e
·
verified ·
1 Parent(s): 6662485

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +487 -12
app.py CHANGED
@@ -1,3 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import fitz # PyMuPDF
3
  import torch
@@ -363,23 +818,38 @@ class VectorSystem:
363
  self.all_chunks = []
364
  self.total_chunks = 0
365
 
366
- def process_file(self, file_obj):
367
- if file_obj is None: return "No file uploaded."
 
 
 
 
 
 
 
 
 
368
  try:
369
  text = ""
370
- if file_obj.name.endswith('.pdf'):
371
- doc = fitz.open(file_obj.name)
372
- for page in doc: text += page.get_text()
373
- elif file_obj.name.endswith('.txt'):
374
- with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
 
 
 
 
 
 
375
  else:
376
- return "❌ Error: Only .pdf and .txt supported."
377
 
378
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
379
  self.all_chunks = text_splitter.split_text(text)
380
  self.total_chunks = len(self.all_chunks)
381
 
382
- if not self.all_chunks: return "File empty."
383
 
384
  metadatas = [{"id": i} for i in range(self.total_chunks)]
385
  self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
@@ -389,7 +859,7 @@ class VectorSystem:
389
  return f"Error: {str(e)}"
390
 
391
  def process_query(self, question, student_answer, max_marks):
392
- if not self.vector_store: return "⚠️ Please upload a file first.", ""
393
  if not question: return "⚠️ Enter a question.", ""
394
 
395
  results = self.vector_store.similarity_search_with_score(question, k=1)
@@ -420,7 +890,11 @@ with gr.Blocks(title="EduGenius AI Grader") as demo:
420
 
421
  with gr.Row():
422
  with gr.Column(scale=1):
423
- pdf_input = gr.File(label="1. Upload Chapter")
 
 
 
 
424
  upload_btn = gr.Button("Index Content", variant="primary")
425
  status_msg = gr.Textbox(label="Status", interactive=False)
426
 
@@ -436,7 +910,8 @@ with gr.Blocks(title="EduGenius AI Grader") as demo:
436
  evidence_box = gr.Markdown(label="Context Used")
437
  grade_box = gr.Markdown(label="Grading Result")
438
 
439
- upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
 
440
  run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])
441
 
442
  if __name__ == "__main__":
 
1
+ # import gradio as gr
2
+ # import fitz # PyMuPDF
3
+ # import torch
4
+ # import os
5
+ # import onnxruntime as ort
6
+
7
+ # # --- IMPORT SESSION OPTIONS ---
8
+ # from onnxruntime import SessionOptions, GraphOptimizationLevel
9
+
10
+ # # --- LANGCHAIN & RAG IMPORTS ---
11
+ # from langchain_text_splitters import RecursiveCharacterTextSplitter
12
+ # from langchain_community.vectorstores import FAISS
13
+ # from langchain_core.embeddings import Embeddings
14
+
15
+ # # --- ONNX & MODEL IMPORTS ---
16
+ # from transformers import AutoTokenizer
17
+ # from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
18
+ # from huggingface_hub import snapshot_download
19
+
20
+ # # Force CPU Provider
21
+ # PROVIDERS = ["CPUExecutionProvider"]
22
+ # print(f"⚡ Running on: {PROVIDERS}")
23
+
24
+ # # ---------------------------------------------------------
25
+ # # 1. OPTIMIZED EMBEDDINGS (BGE-SMALL)
26
+ # # ---------------------------------------------------------
27
+ # class OnnxBgeEmbeddings(Embeddings):
28
+ # def __init__(self):
29
+ # model_name = "Xenova/bge-small-en-v1.5"
30
+ # print(f"🔄 Loading Embeddings: {model_name}...")
31
+ # self.tokenizer = AutoTokenizer.from_pretrained(model_name)
32
+ # self.model = ORTModelForFeatureExtraction.from_pretrained(
33
+ # model_name,
34
+ # export=False,
35
+ # provider=PROVIDERS[0]
36
+ # )
37
+
38
+ # def _process_batch(self, texts):
39
+ # inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
40
+ # with torch.no_grad():
41
+ # outputs = self.model(**inputs)
42
+ # embeddings = outputs.last_hidden_state[:, 0]
43
+ # embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
44
+ # return embeddings.numpy().tolist()
45
+
46
+ # def embed_documents(self, texts):
47
+ # return self._process_batch(texts)
48
+
49
+ # def embed_query(self, text):
50
+ # return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
51
+
52
+ # # ---------------------------------------------------------
53
+ # # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
54
+ # # ---------------------------------------------------------
55
+ # # class LLMEvaluator:
56
+ # # def __init__(self):
57
+ # # self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
58
+ # # self.local_dir = "onnx_qwen_local"
59
+
60
+ # # print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
61
+
62
+ # # if not os.path.exists(self.local_dir):
63
+ # # print(f"📥 Downloading FP16 model to {self.local_dir}...")
64
+ # # snapshot_download(
65
+ # # repo_id=self.repo_id,
66
+ # # local_dir=self.local_dir,
67
+ # # allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
68
+ # # )
69
+ # # print("✅ Download complete.")
70
+
71
+ # # self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
72
+
73
+ # # sess_options = SessionOptions()
74
+ # # sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
75
+
76
+ # # self.model = ORTModelForCausalLM.from_pretrained(
77
+ # # self.local_dir,
78
+ # # subfolder="onnx",
79
+ # # file_name="model_fp16.onnx",
80
+ # # use_cache=True,
81
+ # # use_io_binding=False,
82
+ # # provider=PROVIDERS[0],
83
+ # # session_options=sess_options
84
+ # # )
85
+
86
+ # # def evaluate(self, context, question, student_answer, max_marks):
87
+ # # # OPTIMIZED PROMPT FOR SMALL MODELS (0.5B)
88
+ # # messages = [
89
+ # # {"role": "system", "content": "You are a strictest, literal academic grader in the whole. You ONLY grade based on the provided text. You DO NOT use outside knowledge."},
90
+ # # {"role": "user", "content": f"""
91
+ # # Task: Grade the student answer based ONLY on the Reference Text.
92
+
93
+ # # REFERENCE TEXT:
94
+ # # {context}
95
+
96
+ # # QUESTION:
97
+ # # {question}
98
+
99
+ # # STUDENT ANSWER:
100
+ # # {student_answer}
101
+
102
+ # # -----------------------------
103
+ # # GRADING LOGIC:
104
+ # # 1. READ the Reference Text and use that as the ground truth. What does it actually say about the Question?
105
+ # # 2. COMPARE it to the Student Answer, do not forcefully agree with the answer by seeing things that are not there. You are to penalise irrelevant text and contradictions whenever you encounter them.
106
+ # # 3 START with 0 marks and IF the answers line up to the reference text in a meaningful way, then add marks porportionally. ONLY GIVE MARKS FOR CORRECT STATEMENT STRICTLY BASED ON THE REFERENCE TEXT AND NOTHING ELSE IN THIS WORLD.
107
+ # # 4. IF the Student Answer claims things not found in the text , he is incorrect and HALLUCINATING. Do not give marks for that statment/phrase
108
+ # # 5. IF the Student Answer contradicts the text (e.g., Text says "hide personality" but Student says "show personality"), Do not give marks for that statment/phrase
109
+
110
+ # # VERDICT:
111
+ # # - If wrong: 0/{max_marks}
112
+ # # - If correct: {max_marks}/{max_marks}
113
+
114
+ # # OUTPUT FORMAT:
115
+ # # Score: [X]/{max_marks}
116
+ # # Feedback: [Brief explanation citing the text]
117
+ # # """}
118
+ # # ]
119
+
120
+ # # input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
121
+ # # inputs = self.tokenizer(input_text, return_tensors="pt")
122
+
123
+ # # with torch.no_grad():
124
+ # # outputs = self.model.generate(
125
+ # # **inputs,
126
+ # # max_new_tokens=100,
127
+ # # temperature=0.00, # 0.0 = logic only, no creativity
128
+ # # do_sample=False,
129
+ # # repetition_penalty=1.2
130
+ # # )
131
+
132
+ # # input_length = inputs['input_ids'].shape[1]
133
+ # # response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
134
+ # # return response
135
+
136
+
137
+
138
+
139
+
140
+
141
+ # # ---------------------------------------------------------
142
+ # # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
143
+ # # ---------------------------------------------------------
144
+ # class LLMEvaluator:
145
+ # def __init__(self):
146
+ # # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
147
+ # self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
148
+ # self.local_dir = "onnx_qwen_local"
149
+
150
+ # print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
151
+
152
+ # if not os.path.exists(self.local_dir):
153
+ # print(f"📥 Downloading FP16 model to {self.local_dir}...")
154
+ # snapshot_download(
155
+ # repo_id=self.repo_id,
156
+ # local_dir=self.local_dir,
157
+ # allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
158
+ # )
159
+ # print("✅ Download complete.")
160
+
161
+ # self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
162
+
163
+ # sess_options = SessionOptions()
164
+ # sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
165
+
166
+ # self.model = ORTModelForCausalLM.from_pretrained(
167
+ # self.local_dir,
168
+ # subfolder="onnx",
169
+ # file_name="model_fp16.onnx",
170
+ # use_cache=True,
171
+ # use_io_binding=False,
172
+ # provider=PROVIDERS[0],
173
+ # session_options=sess_options
174
+ # )
175
+
176
+ # def evaluate(self, context, question, student_answer, max_marks):
177
+ # # --- IMPROVED PROMPT STRATEGY ---
178
+ # # 1. Role: We set the persona to a "Strict Logical Validator" not a "Teacher".
179
+ # # 2. Few-Shot: We give examples of HALLUCINATIONS getting 0 marks.
180
+
181
+ # system_prompt = f"""You are a strict Logic Validator. You are NOT a helpful assistant.
182
+ # Your job is to check if the Student Answer is FACTUALLY present in the Context.
183
+
184
+ # GRADING ALGORITHM:
185
+ # 1. IF the Student Answer mentions things NOT in the Context -> PENALTY (-100%).
186
+ # 2. IF the Student Answer interprets the text opposite to its meaning -> PENALTY (-100%).
187
+ # 3. IF the Student Answer is generic fluff -> SCORE: 0.
188
+
189
+ # --- EXAMPLE 1 (HALLUCINATION) ---
190
+ # Context: The sky is blue due to Rayleigh scattering.
191
+ # Question: Why is the sky blue?
192
+ # Student Answer: Because the ocean reflects the water into the sky.
193
+ # Analysis: The Context mentions 'Rayleigh scattering'. The student mentions 'ocean reflection'. These are different. The student is hallucinating outside facts.
194
+ # Score: 0/{max_marks}
195
+
196
+ # --- EXAMPLE 2 (CONTRADICTION) ---
197
+ # Context: One must efface one's own personality. Good prose is like a windowpane.
198
+ # Question: What does the author mean?
199
+ # Student Answer: It means we should see the author's personality clearly.
200
+ # Analysis: The text says 'efface' (remove) personality. The student says 'see' personality. This is a direct contradiction.
201
+ # Score: 0/{max_marks}
202
+
203
+ # --- EXAMPLE 3 (CORRECT) ---
204
+ # Context: Mitochondria is the powerhouse of the cell.
205
+ # Question: What is mitochondria?
206
+ # Student Answer: It is the cell's powerhouse.
207
+ # Analysis: Matches the text meaning exactly.
208
+ # Score: {max_marks}/{max_marks}
209
+ # """
210
+
211
+ # user_prompt = f"""
212
+ # --- YOUR TASK ---
213
+ # Context:
214
+ # {context}
215
+
216
+ # Question:
217
+ # {question}
218
+
219
+ # Student Answer:
220
+ # {student_answer}
221
+
222
+ # OUTPUT FORMAT:
223
+ # Analysis: [Compare Student Answer vs Context. List any hallucinations or contradictions.]
224
+ # Score: [X]/{max_marks}
225
+ # """
226
+
227
+ # messages = [
228
+ # {"role": "system", "content": system_prompt},
229
+ # {"role": "user", "content": user_prompt}
230
+ # ]
231
+
232
+ # input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
233
+ # inputs = self.tokenizer(input_text, return_tensors="pt")
234
+
235
+ # # Lower temperature for strictness
236
+ # with torch.no_grad():
237
+ # outputs = self.model.generate(
238
+ # **inputs,
239
+ # max_new_tokens=150,
240
+ # temperature=0.1, # Strict logic, no creativity
241
+ # top_p=0.2, # Cut off unlikely tokens
242
+ # do_sample=True,
243
+ # repetition_penalty=1.2 # Penalize repetition
244
+ # )
245
+
246
+ # input_length = inputs['input_ids'].shape[1]
247
+ # response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
248
+ # return response
249
+
250
+
251
+ # # # ---------------------------------------------------------
252
+ # # # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
253
+ # # # ---------------------------------------------------------
254
+ # # class LLMEvaluator:
255
+ # # def __init__(self):
256
+ # # # Qwen 0.5B is great for speed, but needs VERY specific prompts to be strict.
257
+ # # self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
258
+ # # self.local_dir = "onnx_qwen_local"
259
+
260
+ # # print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
261
+
262
+ # # if not os.path.exists(self.local_dir):
263
+ # # print(f"📥 Downloading FP16 model to {self.local_dir}...")
264
+ # # snapshot_download(
265
+ # # repo_id=self.repo_id,
266
+ # # local_dir=self.local_dir,
267
+ # # allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
268
+ # # )
269
+ # # print("✅ Download complete.")
270
+
271
+ # # self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
272
+
273
+ # # sess_options = SessionOptions()
274
+ # # sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
275
+
276
+ # # self.model = ORTModelForCausalLM.from_pretrained(
277
+ # # self.local_dir,
278
+ # # subfolder="onnx",
279
+ # # file_name="model_fp16.onnx",
280
+ # # use_cache=True,
281
+ # # use_io_binding=False,
282
+ # # provider=PROVIDERS[0],
283
+ # # session_options=sess_options
284
+ # # )
285
+
286
+ # # def evaluate(self, context, question, student_answer, max_marks):
287
+ # # # --- STRATEGY: FEW-SHOT PROMPTING & CHAIN OF THOUGHT ---
288
+ # # # Small models (0.5B) need examples to understand "Strictness".
289
+
290
+ # # system_prompt = """You are a strict automated grader. You grade ONLY based on the provided Context.
291
+
292
+ # # RULES:
293
+ # # 1. If the Student Answer contains facts NOT found in the Context, Score is 0.
294
+ # # 2. If the Student Answer contradicts the Context, Score is 0.
295
+ # # 3. Do not use outside knowledge. If it's not in the text, it's wrong.
296
+
297
+ # # --- EXAMPLE 1 (WRONG ANSWER) ---
298
+ # # Context: The sky is blue because of Rayleigh scattering.
299
+ # # Question: Why is the sky blue?
300
+ # # Student Answer: Because the ocean reflects into it.
301
+ # # Analysis: The context mentions Rayleigh scattering. The student mentioned ocean reflection. These do not match.
302
+ # # Score: 0/{max_marks}
303
+
304
+ # # --- EXAMPLE 2 (CORRECT ANSWER) ---
305
+ # # Context: Mitochondria is the powerhouse of the cell.
306
+ # # Question: What is the mitochondria?
307
+ # # Student Answer: It is the powerhouse of the cell.
308
+ # # Analysis: The student answer matches the context text exactly.
309
+ # # Score: {max_marks}/{max_marks}
310
+ # # """
311
+
312
+ # # user_prompt = f"""
313
+ # # --- NOW GRADE THIS ---
314
+ # # Context:
315
+ # # {context}
316
+
317
+ # # Question:
318
+ # # {question}
319
+
320
+ # # Student Answer:
321
+ # # {student_answer}
322
+
323
+ # # Task:
324
+ # # 1. Analyze if the specific keywords in Student Answer exist in Context.
325
+ # # 2. Assign a Score.
326
+
327
+ # # Output format:
328
+ # # Analysis: [Analysis here]
329
+ # # Score: [X]/{max_marks}
330
+ # # """
331
+
332
+ # # messages = [
333
+ # # {"role": "system", "content": system_prompt},
334
+ # # {"role": "user", "content": user_prompt}
335
+ # # ]
336
+
337
+ # # input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
338
+ # # inputs = self.tokenizer(input_text, return_tensors="pt")
339
+
340
+ # # with torch.no_grad():
341
+ # # outputs = self.model.generate(
342
+ # # **inputs,
343
+ # # max_new_tokens=150,
344
+ # # temperature=0.1, # Low temperature for facts
345
+ # # top_p=0.1, # Reduce creativity
346
+ # # do_sample=True,
347
+ # # repetition_penalty=1.1
348
+ # # )
349
+
350
+ # # input_length = inputs['input_ids'].shape[1]
351
+ # # response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
352
+ # # return response
353
+
354
+
355
+ # # ---------------------------------------------------------
356
+ # # 3. Main Application Logic
357
+ # # ---------------------------------------------------------
358
+ # class VectorSystem:
359
+ # def __init__(self):
360
+ # self.vector_store = None
361
+ # self.embeddings = OnnxBgeEmbeddings()
362
+ # self.llm = LLMEvaluator()
363
+ # self.all_chunks = []
364
+ # self.total_chunks = 0
365
+
366
+ # def process_file(self, file_obj):
367
+ # if file_obj is None: return "No file uploaded."
368
+ # try:
369
+ # text = ""
370
+ # if file_obj.name.endswith('.pdf'):
371
+ # doc = fitz.open(file_obj.name)
372
+ # for page in doc: text += page.get_text()
373
+ # elif file_obj.name.endswith('.txt'):
374
+ # with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
375
+ # else:
376
+ # return "❌ Error: Only .pdf and .txt supported."
377
+
378
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
379
+ # self.all_chunks = text_splitter.split_text(text)
380
+ # self.total_chunks = len(self.all_chunks)
381
+
382
+ # if not self.all_chunks: return "File empty."
383
+
384
+ # metadatas = [{"id": i} for i in range(self.total_chunks)]
385
+ # self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
386
+
387
+ # return f"✅ Indexed {self.total_chunks} chunks."
388
+ # except Exception as e:
389
+ # return f"Error: {str(e)}"
390
+
391
+ # def process_query(self, question, student_answer, max_marks):
392
+ # if not self.vector_store: return "⚠️ Please upload a file first.", ""
393
+ # if not question: return "⚠️ Enter a question.", ""
394
+
395
+ # results = self.vector_store.similarity_search_with_score(question, k=1)
396
+ # top_doc, score = results[0]
397
+
398
+ # center_id = top_doc.metadata['id']
399
+ # start_id = max(0, center_id - 1)
400
+ # end_id = min(self.total_chunks - 1, center_id + 1)
401
+
402
+ # expanded_context = ""
403
+ # for i in range(start_id, end_id + 1):
404
+ # expanded_context += self.all_chunks[i] + "\n"
405
+
406
+ # evidence_display = f"### 📚 Expanded Context (Chunks {start_id} to {end_id}):\n"
407
+ # evidence_display += f"> ... {expanded_context} ..."
408
+
409
+ # llm_feedback = "Please enter a student answer to grade."
410
+ # if student_answer:
411
+ # llm_feedback = self.llm.evaluate(expanded_context, question, student_answer, max_marks)
412
+
413
+ # return evidence_display, llm_feedback
414
+
415
+ # system = VectorSystem()
416
+
417
+ # with gr.Blocks(title="EduGenius AI Grader") as demo:
418
+ # gr.Markdown("# ⚡ EduGenius: CPU Optimized RAG")
419
+ # gr.Markdown("Powered by **Qwen-2.5-0.5B** and **BGE-Small** (ONNX Optimized)")
420
+
421
+ # with gr.Row():
422
+ # with gr.Column(scale=1):
423
+ # pdf_input = gr.File(label="1. Upload Chapter")
424
+ # upload_btn = gr.Button("Index Content", variant="primary")
425
+ # status_msg = gr.Textbox(label="Status", interactive=False)
426
+
427
+ # with gr.Column(scale=2):
428
+ # with gr.Row():
429
+ # q_input = gr.Textbox(label="Question", scale=2)
430
+ # max_marks = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max Marks")
431
+
432
+ # a_input = gr.TextArea(label="Student Answer")
433
+ # run_btn = gr.Button("Retrieve & Grade", variant="secondary")
434
+
435
+ # with gr.Row():
436
+ # evidence_box = gr.Markdown(label="Context Used")
437
+ # grade_box = gr.Markdown(label="Grading Result")
438
+
439
+ # upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
440
+ # run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])
441
+
442
+ # if __name__ == "__main__":
443
+ # demo.launch()
444
+
445
+
446
+
447
+
448
+
449
+
450
+
451
+
452
+
453
+
454
+
455
+
456
  import gradio as gr
457
  import fitz # PyMuPDF
458
  import torch
 
818
  self.all_chunks = []
819
  self.total_chunks = 0
820
 
821
+ def process_content(self, file_obj, raw_text):
822
+ # LOGIC: Check for exclusivity (Cannot have both file and text)
823
+ has_file = file_obj is not None
824
+ has_text = raw_text is not None and len(raw_text.strip()) > 0
825
+
826
+ if has_file and has_text:
827
+ return "❌ Error: Please provide EITHER a file OR paste text, not both at the same time."
828
+
829
+ if not has_file and not has_text:
830
+ return "⚠️ No content provided. Please upload a file or paste text."
831
+
832
  try:
833
  text = ""
834
+ # Case 1: Process File
835
+ if has_file:
836
+ if file_obj.name.endswith('.pdf'):
837
+ doc = fitz.open(file_obj.name)
838
+ for page in doc: text += page.get_text()
839
+ elif file_obj.name.endswith('.txt'):
840
+ with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
841
+ else:
842
+ return "❌ Error: Only .pdf and .txt supported."
843
+
844
+ # Case 2: Process Raw Text
845
  else:
846
+ text = raw_text
847
 
848
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
849
  self.all_chunks = text_splitter.split_text(text)
850
  self.total_chunks = len(self.all_chunks)
851
 
852
+ if not self.all_chunks: return "Content empty."
853
 
854
  metadatas = [{"id": i} for i in range(self.total_chunks)]
855
  self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
 
859
  return f"Error: {str(e)}"
860
 
861
  def process_query(self, question, student_answer, max_marks):
862
+ if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
863
  if not question: return "⚠️ Enter a question.", ""
864
 
865
  results = self.vector_store.similarity_search_with_score(question, k=1)
 
890
 
891
  with gr.Row():
892
  with gr.Column(scale=1):
893
+ gr.Markdown("### Source Input (Choose One)")
894
+ pdf_input = gr.File(label="Option A: Upload Chapter (PDF/TXT)")
895
+ gr.Markdown("**OR**")
896
+ text_input = gr.Textbox(label="Option B: Paste Context", placeholder="Paste text here if you don't have a file...", lines=5)
897
+
898
  upload_btn = gr.Button("Index Content", variant="primary")
899
  status_msg = gr.Textbox(label="Status", interactive=False)
900
 
 
910
  evidence_box = gr.Markdown(label="Context Used")
911
  grade_box = gr.Markdown(label="Grading Result")
912
 
913
+ # Pass both inputs to the process_content function
914
+ upload_btn.click(system.process_content, inputs=[pdf_input, text_input], outputs=[status_msg])
915
  run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])
916
 
917
  if __name__ == "__main__":