heerjtdev commited on
Commit
3dbbbc3
·
verified ·
1 Parent(s): 0b67337

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +351 -1
app.py CHANGED
@@ -287,6 +287,356 @@
287
 
288
 
289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
 
292
 
@@ -352,7 +702,7 @@ class OnnxBgeEmbeddings(Embeddings):
352
  class LLMEvaluator:
353
  def __init__(self):
354
  # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
355
- self.repo_id = "onnx-community/Qwen2.5-1.5B-Instruct"
356
  self.local_dir = "onnx_qwen_local"
357
 
358
  print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
 
287
 
288
 
289
 
290
+
291
+
292
+
293
+
294
+
295
+
296
+
297
+
298
+ # import gradio as gr
299
+ # import fitz # PyMuPDF
300
+ # import torch
301
+ # import os
302
+ # import numpy as np
303
+
304
+ # # --- IMPORT SESSION OPTIONS ---
305
+ # from onnxruntime import SessionOptions, GraphOptimizationLevel
306
+
307
+ # # --- LANGCHAIN & RAG IMPORTS ---
308
+ # from langchain_text_splitters import RecursiveCharacterTextSplitter
309
+ # from langchain_community.vectorstores import FAISS
310
+ # from langchain_core.embeddings import Embeddings
311
+ # from langchain_core.documents import Document
312
+
313
+ # # --- ONNX & MODEL IMPORTS ---
314
+ # from transformers import AutoTokenizer
315
+ # from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM, ORTModelForSequenceClassification
316
+ # from huggingface_hub import snapshot_download
317
+
318
+ # # Force CPU Provider
319
+ # PROVIDERS = ["CPUExecutionProvider"]
320
+ # print(f"⚡ Running on: {PROVIDERS}")
321
+
322
+ # # ---------------------------------------------------------
323
+ # # 1. OPTIMIZED EMBEDDINGS (BGE-SMALL)
324
+ # # ---------------------------------------------------------
325
+ # class OnnxBgeEmbeddings(Embeddings):
326
+ # def __init__(self):
327
+ # model_name = "Xenova/bge-small-en-v1.5"
328
+ # print(f"🔄 Loading Embeddings: {model_name}...")
329
+ # self.tokenizer = AutoTokenizer.from_pretrained(model_name)
330
+ # self.model = ORTModelForFeatureExtraction.from_pretrained(
331
+ # model_name,
332
+ # export=False,
333
+ # provider=PROVIDERS[0]
334
+ # )
335
+
336
+ # def _process_batch(self, texts):
337
+ # inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
338
+ # with torch.no_grad():
339
+ # outputs = self.model(**inputs)
340
+ # embeddings = outputs.last_hidden_state[:, 0]
341
+ # embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
342
+ # return embeddings.numpy().tolist()
343
+
344
+ # def embed_documents(self, texts):
345
+ # return self._process_batch(texts)
346
+
347
+ # def embed_query(self, text):
348
+ # return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
349
+
350
+
351
+ # # ---------------------------------------------------------
352
+ # # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
353
+ # # ---------------------------------------------------------
354
+ # class LLMEvaluator:
355
+ # def __init__(self):
356
+ # # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
357
+ # self.repo_id = "onnx-community/Qwen2.5-1.5B-Instruct"
358
+ # self.local_dir = "onnx_qwen_local"
359
+
360
+ # print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
361
+
362
+ # if not os.path.exists(self.local_dir):
363
+ # print(f"📥 Downloading FP16 model to {self.local_dir}...")
364
+ # snapshot_download(
365
+ # repo_id=self.repo_id,
366
+ # local_dir=self.local_dir,
367
+ # allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
368
+ # )
369
+ # print("✅ Download complete.")
370
+
371
+ # self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
372
+
373
+ # sess_options = SessionOptions()
374
+ # sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
375
+
376
+ # self.model = ORTModelForCausalLM.from_pretrained(
377
+ # self.local_dir,
378
+ # subfolder="onnx",
379
+ # file_name="model_fp16.onnx",
380
+ # use_cache=True,
381
+ # use_io_binding=False,
382
+ # provider=PROVIDERS[0],
383
+ # session_options=sess_options
384
+ # )
385
+
386
+ # def evaluate(self, context, question, student_answer, max_marks):
387
+ # # --- IMPROVED PROMPT STRATEGY ---
388
+ # system_prompt = f"""You are a strict Logic Validator. You are NOT a helpful assistant.
389
+ # Your job is to check if the Student Answer is FACTUALLY present in the Context.
390
+
391
+ # GRADING ALGORITHM:
392
+ # 1. IF the Student Answer mentions things NOT in the Context -> PENALTY (-50% of the marks).
393
+ # 2. IF the Student Answer interprets the text opposite to its meaning -> PENALTY (-100% of the marks).
394
+ # 3. IF the Student Answer is generic fluff -> SCORE: 0.
395
+
396
+ # --- EXAMPLE 1 (HALLUCINATION) ---
397
+ # Context: The sky is blue due to Rayleigh scattering.
398
+ # Question: Why is the sky blue?
399
+ # Student Answer: Because the ocean reflects the water into the sky.
400
+ # Analysis: The Context mentions 'Rayleigh scattering'. The student mentions 'ocean reflection'. These are different. The student is hallucinating outside facts.
401
+ # Score: 0/{max_marks}
402
+
403
+ # --- EXAMPLE 2 (CONTRADICTION) ---
404
+ # Context: One must efface one's own personality. Good prose is like a windowpane.
405
+ # Question: What does the author mean?
406
+ # Student Answer: It means we should see the author's personality clearly.
407
+ # Analysis: The text says 'efface' (remove) personality. The student says 'see' personality. This is a direct contradiction.
408
+ # Score: 0/{max_marks}
409
+
410
+ # --- EXAMPLE 3 (CORRECT) ---
411
+ # Context: Mitochondria is the powerhouse of the cell.
412
+ # Question: What is mitochondria?
413
+ # Student Answer: It is the cell's powerhouse.
414
+ # Analysis: Matches the text meaning exactly.
415
+ # Score: {max_marks}/{max_marks}
416
+ # """
417
+
418
+ # user_prompt = f"""
419
+ # --- YOUR TASK ---
420
+ # Context:
421
+ # {context}
422
+
423
+ # Question:
424
+ # {question}
425
+
426
+ # Student Answer:
427
+ # {student_answer}
428
+
429
+ # OUTPUT FORMAT:
430
+ # Analysis: [Compare Student Answer vs Context. List any hallucinations or contradictions.]
431
+ # Score: [X]/{max_marks}
432
+ # """
433
+
434
+ # messages = [
435
+ # {"role": "system", "content": system_prompt},
436
+ # {"role": "user", "content": user_prompt}
437
+ # ]
438
+
439
+ # input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
440
+ # inputs = self.tokenizer(input_text, return_tensors="pt")
441
+
442
+ # # Lower temperature for strictness
443
+ # with torch.no_grad():
444
+ # outputs = self.model.generate(
445
+ # **inputs,
446
+ # max_new_tokens=150,
447
+ # temperature=0.1, # Strict logic, no creativity
448
+ # top_p=0.2, # Cut off unlikely tokens
449
+ # do_sample=True,
450
+ # repetition_penalty=1.2 # Penalize repetition
451
+ # )
452
+
453
+ # input_length = inputs['input_ids'].shape[1]
454
+ # response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
455
+ # return response
456
+
457
+
458
+ # # ---------------------------------------------------------
459
+ # # 3. NEW: ONNX RERANKER (Cross-Encoder)
460
+ # # Uses existing 'optimum' & 'transformers' libs (No new deps)
461
+ # # ---------------------------------------------------------
462
+ # class OnnxReranker:
463
+ # def __init__(self):
464
+ # # TinyBERT is ~17MB and very fast on CPU
465
+ # self.model_name = "Xenova/ms-marco-TinyBERT-L-2-v2"
466
+ # print(f"🔄 Loading Reranker: {self.model_name}...")
467
+ # self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
468
+ # self.model = ORTModelForSequenceClassification.from_pretrained(
469
+ # self.model_name,
470
+ # export=False,
471
+ # provider=PROVIDERS[0]
472
+ # )
473
+
474
+ # def rank(self, query, docs, top_k=3):
475
+ # if not docs:
476
+ # return []
477
+
478
+ # # Prepare pairs: [query, doc_text]
479
+ # pairs = [[query, doc.page_content] for doc in docs]
480
+
481
+ # inputs = self.tokenizer(
482
+ # pairs,
483
+ # padding=True,
484
+ # truncation=True,
485
+ # max_length=512,
486
+ # return_tensors="pt"
487
+ # )
488
+
489
+ # with torch.no_grad():
490
+ # outputs = self.model(**inputs)
491
+
492
+ # # Get logits (Relevance scores)
493
+ # # MS-Marco models typically output a single logit or [irrelevant, relevant]
494
+ # logits = outputs.logits
495
+ # if logits.shape[1] == 2:
496
+ # scores = logits[:, 1] # Take the "relevant" class score
497
+ # else:
498
+ # scores = logits.flatten()
499
+
500
+ # # Sort docs by score (descending)
501
+ # scores = scores.numpy().tolist()
502
+ # doc_score_pairs = list(zip(docs, scores))
503
+ # doc_score_pairs.sort(key=lambda x: x[1], reverse=True)
504
+
505
+ # # Return top K docs
506
+ # return [doc for doc, score in doc_score_pairs[:top_k]]
507
+
508
+
509
+ # # ---------------------------------------------------------
510
+ # # 4. Main Application Logic
511
+ # # ---------------------------------------------------------
512
+ # class VectorSystem:
513
+ # def __init__(self):
514
+ # self.vector_store = None
515
+ # self.embeddings = OnnxBgeEmbeddings()
516
+ # self.llm = LLMEvaluator()
517
+ # self.reranker = OnnxReranker() # Initialize Reranker
518
+ # self.all_chunks = []
519
+ # self.total_chunks = 0
520
+
521
+ # def process_content(self, file_obj, raw_text):
522
+ # has_file = file_obj is not None
523
+ # has_text = raw_text is not None and len(raw_text.strip()) > 0
524
+
525
+ # if has_file and has_text:
526
+ # return "❌ Error: Please provide EITHER a file OR paste text, not both at the same time."
527
+
528
+ # if not has_file and not has_text:
529
+ # return "⚠️ No content provided. Please upload a file or paste text."
530
+
531
+ # try:
532
+ # text = ""
533
+ # if has_file:
534
+ # if file_obj.name.endswith('.pdf'):
535
+ # doc = fitz.open(file_obj.name)
536
+ # for page in doc: text += page.get_text()
537
+ # elif file_obj.name.endswith('.txt'):
538
+ # with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
539
+ # else:
540
+ # return "❌ Error: Only .pdf and .txt supported."
541
+ # else:
542
+ # text = raw_text
543
+
544
+ # # Smaller chunks for Reranking precision (500 chars)
545
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
546
+ # texts = text_splitter.split_text(text)
547
+ # self.all_chunks = texts # Keep plain text list for reference
548
+
549
+ # # Create Document objects with metadata
550
+ # docs = [Document(page_content=t, metadata={"id": i}) for i, t in enumerate(texts)]
551
+ # self.total_chunks = len(docs)
552
+
553
+ # if not docs: return "Content empty."
554
+
555
+ # self.vector_store = FAISS.from_documents(docs, self.embeddings)
556
+
557
+ # return f"✅ Indexed {self.total_chunks} chunks."
558
+ # except Exception as e:
559
+ # return f"Error: {str(e)}"
560
+
561
+ # def process_query(self, question, student_answer, max_marks):
562
+ # if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
563
+ # if not question: return "⚠️ Enter a question.", ""
564
+
565
+ # # Step A: Wide Net Retrieval (Get top 15 candidates)
566
+ # # We fetch more than we need to ensure the answer is in the candidate pool
567
+ # initial_docs = self.vector_store.similarity_search(question, k=15)
568
+
569
+ # # Step B: Rerank (Get top 3 best matches)
570
+ # # The Cross-Encoder strictly judges relevance
571
+ # top_docs = self.reranker.rank(question, initial_docs, top_k=3)
572
+
573
+ # # Step C: Construct Context
574
+ # # We merge the top 3 specific chunks
575
+ # expanded_context = "\n\n---\n\n".join([d.page_content for d in top_docs])
576
+
577
+ # evidence_display = f"### 📚 Optimized Context (Top {len(top_docs)} chunks after Reranking):\n"
578
+ # evidence_display += f"> {expanded_context} ..."
579
+
580
+ # llm_feedback = "Please enter a student answer to grade."
581
+ # if student_answer:
582
+ # llm_feedback = self.llm.evaluate(expanded_context, question, student_answer, max_marks)
583
+
584
+ # return evidence_display, llm_feedback
585
+
586
+ # system = VectorSystem()
587
+
588
+ # with gr.Blocks(title="EduGenius AI Grader") as demo:
589
+ # gr.Markdown("# ⚡ EduGenius: CPU Optimized RAG")
590
+ # gr.Markdown("Powered by **Qwen-2.5-0.5B**, **BGE-Small** & **TinyBERT Reranker**")
591
+
592
+ # with gr.Row():
593
+ # with gr.Column(scale=1):
594
+ # gr.Markdown("### Source Input (Choose One)")
595
+ # pdf_input = gr.File(label="Option A: Upload Chapter (PDF/TXT)")
596
+ # gr.Markdown("**OR**")
597
+ # text_input = gr.Textbox(label="Option B: Paste Context", placeholder="Paste text here if you don't have a file...", lines=5)
598
+
599
+ # upload_btn = gr.Button("Index Content", variant="primary")
600
+ # status_msg = gr.Textbox(label="Status", interactive=False)
601
+
602
+ # with gr.Column(scale=2):
603
+ # with gr.Row():
604
+ # q_input = gr.Textbox(label="Question", scale=2)
605
+ # max_marks = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max Marks")
606
+
607
+ # a_input = gr.TextArea(label="Student Answer")
608
+ # run_btn = gr.Button("Retrieve & Grade", variant="secondary")
609
+
610
+ # with gr.Row():
611
+ # evidence_box = gr.Markdown(label="Context Used")
612
+ # grade_box = gr.Markdown(label="Grading Result")
613
+
614
+ # # Pass both inputs to the process_content function
615
+ # upload_btn.click(system.process_content, inputs=[pdf_input, text_input], outputs=[status_msg])
616
+ # run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])
617
+
618
+ # if __name__ == "__main__":
619
+ # demo.launch()
620
+
621
+
622
+
623
+
624
+
625
+
626
+
627
+
628
+
629
+
630
+
631
+
632
+
633
+
634
+
635
+
636
+
637
+
638
+
639
+
640
 
641
 
642
 
 
702
  class LLMEvaluator:
703
  def __init__(self):
704
  # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
705
+ self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
706
  self.local_dir = "onnx_qwen_local"
707
 
708
  print(f"🔄 Preparing CPU LLM: {self.repo_id}...")