heerjtdev commited on
Commit
31baf0a
Β·
verified Β·
1 Parent(s): 4cc40b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -284
app.py CHANGED
@@ -179,229 +179,6 @@
179
 
180
 
181
 
182
- # import gradio as gr
183
- # import fitz # PyMuPDF
184
- # import torch
185
- # import os
186
-
187
- # # --- LANGCHAIN & RAG IMPORTS ---
188
- # from langchain_text_splitters import RecursiveCharacterTextSplitter
189
- # from langchain_community.vectorstores import FAISS
190
- # from langchain_core.embeddings import Embeddings
191
-
192
- # # --- ONNX & MODEL IMPORTS ---
193
- # from transformers import AutoTokenizer
194
- # from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
195
- # from huggingface_hub import snapshot_download
196
-
197
- # # ---------------------------------------------------------
198
- # # 1. Custom ONNX Embedding Class (BGE-Large)
199
- # # ---------------------------------------------------------
200
- # class OnnxBgeEmbeddings(Embeddings):
201
- # def __init__(self, model_name="BAAI/bge-large-en-v1.5"):
202
- # print(f"πŸ”„ Loading Embeddings: {model_name}...")
203
- # self.tokenizer = AutoTokenizer.from_pretrained(model_name)
204
- # # Note: export=True will re-convert on every restart.
205
- # # For production, you'd want to save this permanently, but this works for now.
206
- # self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
207
-
208
- # def _process_batch(self, texts):
209
- # inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
210
- # with torch.no_grad():
211
- # outputs = self.model(**inputs)
212
- # # CLS pooling for BGE
213
- # embeddings = outputs.last_hidden_state[:, 0]
214
- # embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
215
- # return embeddings.numpy().tolist()
216
-
217
- # def embed_documents(self, texts):
218
- # return self._process_batch(texts)
219
-
220
- # def embed_query(self, text):
221
- # return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
222
-
223
- # # ---------------------------------------------------------
224
- # # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
225
- # # ---------------------------------------------------------
226
-
227
- # class LLMEvaluator:
228
- # def __init__(self):
229
- # self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
230
- # self.local_dir = "onnx_llama_local"
231
-
232
- # print(f"πŸ”„ Preparing LLM: {self.repo_id}...")
233
-
234
- # # [FIXED DOWNLOADER]
235
- # print(f"πŸ“₯ Downloading FP16 model + data to {self.local_dir}...")
236
- # snapshot_download(
237
- # repo_id=self.repo_id,
238
- # local_dir=self.local_dir,
239
- # local_dir_use_symlinks=False,
240
- # allow_patterns=[
241
- # "config.json",
242
- # "generation_config.json",
243
- # "tokenizer*",
244
- # "special_tokens_map.json",
245
- # "*.jinja",
246
- # "onnx/model_fp16.onnx*" # WILDCARD '*' ensures we get .onnx AND .onnx_data
247
- # ]
248
- # )
249
- # print("βœ… Download complete.")
250
-
251
- # self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
252
-
253
- # # [CRITICAL FIX]
254
- # # Separating 'subfolder' and 'file_name' is required by Optimum
255
- # self.model = ORTModelForCausalLM.from_pretrained(
256
- # self.local_dir,
257
- # subfolder="onnx", # Point to the subfolder
258
- # file_name="model_fp16.onnx", # Just the filename
259
- # use_cache=True,
260
- # use_io_binding=False
261
- # )
262
-
263
- # def evaluate(self, context, question, student_answer):
264
- # # Prompt Engineering for Llama 3
265
- # messages = [
266
- # {"role": "system", "content": "You are a helpful academic grader. Grade the student answer based ONLY on the provided context."},
267
- # {"role": "user", "content": f"""
268
- # ### CONTEXT:
269
- # {context}
270
-
271
- # ### QUESTION:
272
- # {question}
273
-
274
- # ### STUDENT ANSWER:
275
- # {student_answer}
276
-
277
- # ### INSTRUCTIONS:
278
- # 1. Is the answer correct?
279
- # 2. Score out of 10.
280
- # 3. Explanation.
281
- # """}
282
- # ]
283
-
284
- # # Format input using the chat template
285
- # input_text = self.tokenizer.apply_chat_template(
286
- # messages,
287
- # tokenize=False,
288
- # add_generation_prompt=True
289
- # )
290
-
291
- # inputs = self.tokenizer(input_text, return_tensors="pt")
292
-
293
- # # Generate response
294
- # with torch.no_grad():
295
- # outputs = self.model.generate(
296
- # **inputs,
297
- # max_new_tokens=256,
298
- # temperature=0.3,
299
- # do_sample=True,
300
- # top_p=0.9
301
- # )
302
-
303
- # # Decode response
304
- # response = self.tokenizer.decode(
305
- # outputs[0][inputs.input_ids.shape[1]:],
306
- # skip_special_tokens=True
307
- # )
308
- # return response
309
-
310
- # # ---------------------------------------------------------
311
- # # 3. Main Application Logic
312
- # # ---------------------------------------------------------
313
- # class VectorSystem:
314
- # def __init__(self):
315
- # self.vector_store = None
316
- # self.embeddings = OnnxBgeEmbeddings()
317
- # self.llm = LLMEvaluator() # Initialize LLM
318
- # self.all_chunks = []
319
-
320
- # def process_file(self, file_obj):
321
- # if file_obj is None: return "No file uploaded."
322
- # try:
323
- # text = ""
324
- # if file_obj.name.endswith('.pdf'):
325
- # doc = fitz.open(file_obj.name)
326
- # for page in doc: text += page.get_text()
327
- # elif file_obj.name.endswith('.txt'):
328
- # with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
329
- # else:
330
- # return "❌ Error: Only .pdf and .txt supported."
331
-
332
- # text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
333
- # self.all_chunks = text_splitter.split_text(text)
334
-
335
- # if not self.all_chunks: return "File empty."
336
-
337
- # metadatas = [{"id": i} for i in range(len(self.all_chunks))]
338
- # self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
339
- # return f"βœ… Indexed {len(self.all_chunks)} chunks."
340
- # except Exception as e:
341
- # return f"Error: {str(e)}"
342
-
343
- # def process_query(self, question, student_answer):
344
- # if not self.vector_store: return "⚠️ Please upload a file first.", ""
345
- # if not question: return "⚠️ Enter a question.", ""
346
-
347
- # # 1. Retrieve
348
- # results = self.vector_store.similarity_search_with_score(question, k=3)
349
-
350
- # # Prepare context for LLM
351
- # context_text = "\n\n".join([doc.page_content for doc, _ in results])
352
-
353
- # # Prepare Evidence Output for UI
354
- # evidence_display = "### πŸ“š Retrieved Context:\n"
355
- # for i, (doc, score) in enumerate(results):
356
- # evidence_display += f"**Chunk {i+1}** (Score: {score:.4f}):\n> {doc.page_content}\n\n"
357
-
358
- # # 2. Evaluate (if answer provided)
359
- # llm_feedback = "Please enter a student answer to grade."
360
- # if student_answer:
361
- # llm_feedback = self.llm.evaluate(context_text, question, student_answer)
362
-
363
- # return evidence_display, llm_feedback
364
-
365
- # # Initialize
366
- # system = VectorSystem()
367
-
368
- # # --- GRADIO UI ---
369
- # with gr.Blocks(title="EduGenius AI Grader") as demo:
370
- # gr.Markdown("# 🧠 EduGenius: RAG + LLM Grading")
371
- # gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B** (Evaluation) - All ONNX Optimized.")
372
-
373
- # with gr.Row():
374
- # with gr.Column(scale=1):
375
- # pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")
376
- # upload_btn = gr.Button("Index Content", variant="primary")
377
- # status_msg = gr.Textbox(label="System Status", interactive=False)
378
-
379
- # with gr.Column(scale=2):
380
- # q_input = gr.Textbox(label="2. Question")
381
- # a_input = gr.Textbox(label="3. Student Answer")
382
- # run_btn = gr.Button("Retrieve & Grade", variant="secondary")
383
-
384
- # with gr.Row():
385
- # evidence_box = gr.Markdown(label="Context")
386
- # grade_box = gr.Markdown(label="LLM Evaluation")
387
-
388
- # upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
389
- # run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])
390
-
391
- # if __name__ == "__main__":
392
- # demo.launch()
393
-
394
-
395
-
396
-
397
-
398
-
399
-
400
-
401
-
402
-
403
-
404
-
405
  import gradio as gr
406
  import fitz # PyMuPDF
407
  import torch
@@ -424,12 +201,15 @@ class OnnxBgeEmbeddings(Embeddings):
424
  def __init__(self, model_name="BAAI/bge-large-en-v1.5"):
425
  print(f"πŸ”„ Loading Embeddings: {model_name}...")
426
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
427
  self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
428
 
429
  def _process_batch(self, texts):
430
  inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
431
  with torch.no_grad():
432
  outputs = self.model(**inputs)
 
433
  embeddings = outputs.last_hidden_state[:, 0]
434
  embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
435
  return embeddings.numpy().tolist()
@@ -441,79 +221,92 @@ class OnnxBgeEmbeddings(Embeddings):
441
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
442
 
443
  # ---------------------------------------------------------
444
- # 2. LLM Evaluator Class (Llama-3.2-1B ONNX - INT8)
445
  # ---------------------------------------------------------
 
446
  class LLMEvaluator:
447
  def __init__(self):
448
  self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
449
  self.local_dir = "onnx_llama_local"
450
 
451
- print(f"πŸ”„ Preparing LLM: {self.repo_id} (Int8 Quantized)...")
 
 
 
452
  snapshot_download(
453
  repo_id=self.repo_id,
454
  local_dir=self.local_dir,
455
  local_dir_use_symlinks=False,
456
- allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_quantized.onnx"]
 
 
 
 
 
 
 
457
  )
 
 
458
  self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
 
 
 
459
  self.model = ORTModelForCausalLM.from_pretrained(
460
  self.local_dir,
461
- subfolder="onnx",
462
- file_name="model_quantized.onnx",
463
  use_cache=True,
464
  use_io_binding=False
465
  )
466
 
467
  def evaluate(self, context, question, student_answer):
468
- # [STRATEGY: FEW-SHOT PROMPTING]
469
- # We give the model an example so it knows exactly what format to output.
470
- # This prevents it from hallucinating dates or XML tags.
471
-
472
  messages = [
473
- {"role": "system", "content": "You are a grading assistant. Output only the requested format."},
474
  {"role": "user", "content": f"""
475
- Task: Grade the student answer based ONLY on the provided text.
476
-
477
- ---
478
- EXAMPLE:
479
- Text: "Photosynthesis is how plants make food using sunlight."
480
- Question: "How do plants eat?"
481
- Answer: "They use sunlight."
482
-
483
- Grade: 10/10
484
- Verdict: Correct
485
- Explanation: The text confirms plants use sunlight to make food.
486
- ---
487
-
488
- YOUR TURN:
489
-
490
- Text: "{context}"
491
- Question: "{question}"
492
- Answer: "{student_answer}"
493
-
494
- Output the Grade, Verdict, and Explanation:
495
  """}
496
  ]
497
 
498
- input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
 
 
 
 
499
  inputs = self.tokenizer(input_text, return_tensors="pt")
500
 
501
- # [GENERATION SETTINGS FOR STABILITY]
502
  with torch.no_grad():
503
  outputs = self.model.generate(
504
  **inputs,
505
- max_new_tokens=150,
506
-
507
- # We use Sampling with low temp instead of Greedy
508
- # This helps the model get "unstuck" from bad loops without being too creative.
509
- do_sample=True,
510
- temperature=0.2,
511
- top_p=0.9,
512
- repetition_penalty=1.1
513
  )
514
 
515
- return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
516
-
 
 
 
 
 
517
  # ---------------------------------------------------------
518
  # 3. Main Application Logic
519
  # ---------------------------------------------------------
@@ -521,7 +314,7 @@ class VectorSystem:
521
  def __init__(self):
522
  self.vector_store = None
523
  self.embeddings = OnnxBgeEmbeddings()
524
- self.llm = LLMEvaluator()
525
  self.all_chunks = []
526
 
527
  def process_file(self, file_obj):
@@ -551,24 +344,22 @@ class VectorSystem:
551
  if not self.vector_store: return "⚠️ Please upload a file first.", ""
552
  if not question: return "⚠️ Enter a question.", ""
553
 
554
- # 1. RAG RETRIEVAL (Top 1 Only)
555
- # We removed the Pre/Next chunk expansion because it was creating duplicate text
556
- # that confused the model.
557
- results = self.vector_store.similarity_search_with_score(question, k=1)
558
- if not results: return "No relevant text found.", ""
559
-
560
- best_doc, score = results[0]
561
- context_text = best_doc.page_content
 
 
562
 
563
- # 2. LLM EVALUATION
564
  llm_feedback = "Please enter a student answer to grade."
565
  if student_answer:
566
  llm_feedback = self.llm.evaluate(context_text, question, student_answer)
567
 
568
- # UI Display
569
- evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
570
- evidence_display += f"> **{context_text}**\n"
571
-
572
  return evidence_display, llm_feedback
573
 
574
  # Initialize
@@ -576,8 +367,8 @@ system = VectorSystem()
576
 
577
  # --- GRADIO UI ---
578
  with gr.Blocks(title="EduGenius AI Grader") as demo:
579
- gr.Markdown("# 🧠 EduGenius: AI Grader")
580
- gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
581
 
582
  with gr.Row():
583
  with gr.Column(scale=1):
@@ -591,11 +382,12 @@ with gr.Blocks(title="EduGenius AI Grader") as demo:
591
  run_btn = gr.Button("Retrieve & Grade", variant="secondary")
592
 
593
  with gr.Row():
594
- evidence_box = gr.Markdown(label="Context Used")
595
- grade_box = gr.Markdown(label="LLM Result")
596
 
597
  upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
598
  run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])
599
 
600
  if __name__ == "__main__":
601
- demo.launch()
 
 
179
 
180
 
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  import gradio as gr
183
  import fitz # PyMuPDF
184
  import torch
 
201
  def __init__(self, model_name="BAAI/bge-large-en-v1.5"):
202
  print(f"πŸ”„ Loading Embeddings: {model_name}...")
203
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
204
+ # Note: export=True will re-convert on every restart.
205
+ # For production, you'd want to save this permanently, but this works for now.
206
  self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
207
 
208
  def _process_batch(self, texts):
209
  inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
210
  with torch.no_grad():
211
  outputs = self.model(**inputs)
212
+ # CLS pooling for BGE
213
  embeddings = outputs.last_hidden_state[:, 0]
214
  embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
215
  return embeddings.numpy().tolist()
 
221
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
222
 
223
  # ---------------------------------------------------------
224
+ # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
225
  # ---------------------------------------------------------
226
+
227
  class LLMEvaluator:
228
  def __init__(self):
229
  self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
230
  self.local_dir = "onnx_llama_local"
231
 
232
+ print(f"πŸ”„ Preparing LLM: {self.repo_id}...")
233
+
234
+ # [FIXED DOWNLOADER]
235
+ print(f"πŸ“₯ Downloading FP16 model + data to {self.local_dir}...")
236
  snapshot_download(
237
  repo_id=self.repo_id,
238
  local_dir=self.local_dir,
239
  local_dir_use_symlinks=False,
240
+ allow_patterns=[
241
+ "config.json",
242
+ "generation_config.json",
243
+ "tokenizer*",
244
+ "special_tokens_map.json",
245
+ "*.jinja",
246
+ "onnx/model_fp16.onnx*" # WILDCARD '*' ensures we get .onnx AND .onnx_data
247
+ ]
248
  )
249
+ print("βœ… Download complete.")
250
+
251
  self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
252
+
253
+ # [CRITICAL FIX]
254
+ # Separating 'subfolder' and 'file_name' is required by Optimum
255
  self.model = ORTModelForCausalLM.from_pretrained(
256
  self.local_dir,
257
+ subfolder="onnx", # Point to the subfolder
258
+ file_name="model_fp16.onnx", # Just the filename
259
  use_cache=True,
260
  use_io_binding=False
261
  )
262
 
263
  def evaluate(self, context, question, student_answer):
264
+ # Prompt Engineering for Llama 3
 
 
 
265
  messages = [
266
+ {"role": "system", "content": "You are a strict academic. Grade the student answer based ONLY on the provided context."},
267
  {"role": "user", "content": f"""
268
+ ### CONTEXT:
269
+ {context}
270
+
271
+ ### QUESTION:
272
+ {question}
273
+
274
+ ### STUDENT ANSWER:
275
+ {student_answer}
276
+
277
+ ### INSTRUCTIONS:
278
+ 1. Is the answer correct?
279
+ 2. Score out of 10.
280
+ 3. Explanation.
 
 
 
 
 
 
 
281
  """}
282
  ]
283
 
284
+ # Format input using the chat template
285
+ input_text = self.tokenizer.apply_chat_template(
286
+ messages,
287
+ tokenize=False,
288
+ add_generation_prompt=True
289
+ )
290
+
291
  inputs = self.tokenizer(input_text, return_tensors="pt")
292
 
293
+ # Generate response
294
  with torch.no_grad():
295
  outputs = self.model.generate(
296
  **inputs,
297
+ max_new_tokens=256,
298
+ temperature=0.3,
299
+ do_sample=True,
300
+ top_p=0.9
 
 
 
 
301
  )
302
 
303
+ # Decode response
304
+ response = self.tokenizer.decode(
305
+ outputs[0][inputs.input_ids.shape[1]:],
306
+ skip_special_tokens=True
307
+ )
308
+ return response
309
+
310
  # ---------------------------------------------------------
311
  # 3. Main Application Logic
312
  # ---------------------------------------------------------
 
314
  def __init__(self):
315
  self.vector_store = None
316
  self.embeddings = OnnxBgeEmbeddings()
317
+ self.llm = LLMEvaluator() # Initialize LLM
318
  self.all_chunks = []
319
 
320
  def process_file(self, file_obj):
 
344
  if not self.vector_store: return "⚠️ Please upload a file first.", ""
345
  if not question: return "⚠️ Enter a question.", ""
346
 
347
+ # 1. Retrieve
348
+ results = self.vector_store.similarity_search_with_score(question, k=3)
349
+
350
+ # Prepare context for LLM
351
+ context_text = "\n\n".join([doc.page_content for doc, _ in results])
352
+
353
+ # Prepare Evidence Output for UI
354
+ evidence_display = "### πŸ“š Retrieved Context:\n"
355
+ for i, (doc, score) in enumerate(results):
356
+ evidence_display += f"**Chunk {i+1}** (Score: {score:.4f}):\n> {doc.page_content}\n\n"
357
 
358
+ # 2. Evaluate (if answer provided)
359
  llm_feedback = "Please enter a student answer to grade."
360
  if student_answer:
361
  llm_feedback = self.llm.evaluate(context_text, question, student_answer)
362
 
 
 
 
 
363
  return evidence_display, llm_feedback
364
 
365
  # Initialize
 
367
 
368
  # --- GRADIO UI ---
369
  with gr.Blocks(title="EduGenius AI Grader") as demo:
370
+ gr.Markdown("# 🧠 EduGenius: RAG + LLM Grading")
371
+ gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B** (Evaluation) - All ONNX Optimized.")
372
 
373
  with gr.Row():
374
  with gr.Column(scale=1):
 
382
  run_btn = gr.Button("Retrieve & Grade", variant="secondary")
383
 
384
  with gr.Row():
385
+ evidence_box = gr.Markdown(label="Context")
386
+ grade_box = gr.Markdown(label="LLM Evaluation")
387
 
388
  upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
389
  run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])
390
 
391
  if __name__ == "__main__":
392
+ demo.launch()
393
+