Spaces:

heerjtdev
/

try_answer

Running

App Files Files Community

heerjtdev commited on Feb 13

Commit

3dbbbc3

verified ·

1 Parent(s): 0b67337

Update app.py

Browse files

Files changed (1) hide show

app.py +351 -1

app.py CHANGED Viewed

@@ -287,6 +287,356 @@
@@ -352,7 +702,7 @@ class OnnxBgeEmbeddings(Embeddings):
 class LLMEvaluator:
     def __init__(self):
         # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
-        self.repo_id = "onnx-community/Qwen2.5-1.5B-Instruct"
         self.local_dir = "onnx_qwen_local"
         print(f"🔄 Preparing CPU LLM: {self.repo_id}...")

+# import gradio as gr
+# import fitz  # PyMuPDF
+# import torch
+# import os
+# import numpy as np
+# # --- IMPORT SESSION OPTIONS ---
+# from onnxruntime import SessionOptions, GraphOptimizationLevel
+# # --- LANGCHAIN & RAG IMPORTS ---
+# from langchain_text_splitters import RecursiveCharacterTextSplitter
+# from langchain_community.vectorstores import FAISS
+# from langchain_core.embeddings import Embeddings
+# from langchain_core.documents import Document
+# # --- ONNX & MODEL IMPORTS ---
+# from transformers import AutoTokenizer
+# from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM, ORTModelForSequenceClassification
+# from huggingface_hub import snapshot_download
+# # Force CPU Provider
+# PROVIDERS = ["CPUExecutionProvider"]
+# print(f"⚡ Running on: {PROVIDERS}")
+# # ---------------------------------------------------------
+# # 1. OPTIMIZED EMBEDDINGS (BGE-SMALL)
+# # ---------------------------------------------------------
+# class OnnxBgeEmbeddings(Embeddings):
+#     def __init__(self):
+#         model_name = "Xenova/bge-small-en-v1.5"
+#         print(f"🔄 Loading Embeddings: {model_name}...")
+#         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+#         self.model = ORTModelForFeatureExtraction.from_pretrained(
+#             model_name,
+#             export=False,
+#             provider=PROVIDERS[0]
+#         )
+#     def _process_batch(self, texts):
+#         inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
+#         with torch.no_grad():
+#             outputs = self.model(**inputs)
+#         embeddings = outputs.last_hidden_state[:, 0]
+#         embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
+#         return embeddings.numpy().tolist()
+#     def embed_documents(self, texts):
+#         return self._process_batch(texts)
+#     def embed_query(self, text):
+#         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
+# # ---------------------------------------------------------
+# # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
+# # ---------------------------------------------------------
+# class LLMEvaluator:
+#     def __init__(self):
+#         # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
+#         self.repo_id = "onnx-community/Qwen2.5-1.5B-Instruct"
+#         self.local_dir = "onnx_qwen_local"
+#         print(f"🔄 Preparing CPU LLM: {self.repo_id}...")
+#         if not os.path.exists(self.local_dir):
+#             print(f"📥 Downloading FP16 model to {self.local_dir}...")
+#             snapshot_download(
+#                 repo_id=self.repo_id,
+#                 local_dir=self.local_dir,
+#                 allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
+#             )
+#             print("✅ Download complete.")
+#         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
+#         sess_options = SessionOptions()
+#         sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
+#         self.model = ORTModelForCausalLM.from_pretrained(
+#             self.local_dir,
+#             subfolder="onnx",
+#             file_name="model_fp16.onnx",
+#             use_cache=True,
+#             use_io_binding=False,
+#             provider=PROVIDERS[0],
+#             session_options=sess_options
+#         )
+#     def evaluate(self, context, question, student_answer, max_marks):
+#         # --- IMPROVED PROMPT STRATEGY ---
+#         system_prompt = f"""You are a strict Logic Validator. You are NOT a helpful assistant.
+#         Your job is to check if the Student Answer is FACTUALLY present in the Context.
+#         GRADING ALGORITHM:
+#         1. IF the Student Answer mentions things NOT in the Context -> PENALTY (-50% of the marks).
+#         2. IF the Student Answer interprets the text opposite to its meaning -> PENALTY (-100% of the marks).
+#         3. IF the Student Answer is generic fluff -> SCORE: 0.
+#         --- EXAMPLE 1 (HALLUCINATION) ---
+#         Context: The sky is blue due to Rayleigh scattering.
+#         Question: Why is the sky blue?
+#         Student Answer: Because the ocean reflects the water into the sky.
+#         Analysis: The Context mentions 'Rayleigh scattering'. The student mentions 'ocean reflection'. These are different. The student is hallucinating outside facts.
+#         Score: 0/{max_marks}
+#         --- EXAMPLE 2 (CONTRADICTION) ---
+#         Context: One must efface one's own personality. Good prose is like a windowpane.
+#         Question: What does the author mean?
+#         Student Answer: It means we should see the author's personality clearly.
+#         Analysis: The text says 'efface' (remove) personality. The student says 'see' personality. This is a direct contradiction.
+#         Score: 0/{max_marks}
+#         --- EXAMPLE 3 (CORRECT) ---
+#         Context: Mitochondria is the powerhouse of the cell.
+#         Question: What is mitochondria?
+#         Student Answer: It is the cell's powerhouse.
+#         Analysis: Matches the text meaning exactly.
+#         Score: {max_marks}/{max_marks}
+#         """
+#         user_prompt = f"""
+#         --- YOUR TASK ---
+#         Context:
+#         {context}
+#         Question:
+#         {question}
+#         Student Answer:
+#         {student_answer}
+#         OUTPUT FORMAT:
+#         Analysis: [Compare Student Answer vs Context. List any hallucinations or contradictions.]
+#         Score: [X]/{max_marks}
+#         """
+#         messages = [
+#             {"role": "system", "content": system_prompt},
+#             {"role": "user", "content": user_prompt}
+#         ]
+#         input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+#         inputs = self.tokenizer(input_text, return_tensors="pt")
+#         # Lower temperature for strictness
+#         with torch.no_grad():
+#             outputs = self.model.generate(
+#                 **inputs,
+#                 max_new_tokens=150,
+#                 temperature=0.1,    # Strict logic, no creativity
+#                 top_p=0.2,          # Cut off unlikely tokens
+#                 do_sample=True,
+#                 repetition_penalty=1.2 # Penalize repetition
+#             )
+#         input_length = inputs['input_ids'].shape[1]
+#         response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
+#         return response
+# # ---------------------------------------------------------
+# # 3. NEW: ONNX RERANKER (Cross-Encoder)
+# # Uses existing 'optimum' & 'transformers' libs (No new deps)
+# # ---------------------------------------------------------
+# class OnnxReranker:
+#     def __init__(self):
+#         # TinyBERT is ~17MB and very fast on CPU
+#         self.model_name = "Xenova/ms-marco-TinyBERT-L-2-v2"
+#         print(f"🔄 Loading Reranker: {self.model_name}...")
+#         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+#         self.model = ORTModelForSequenceClassification.from_pretrained(
+#             self.model_name,
+#             export=False,
+#             provider=PROVIDERS[0]
+#         )
+#     def rank(self, query, docs, top_k=3):
+#         if not docs:
+#             return []
+#         # Prepare pairs: [query, doc_text]
+#         pairs = [[query, doc.page_content] for doc in docs]
+#         inputs = self.tokenizer(
+#             pairs,
+#             padding=True,
+#             truncation=True,
+#             max_length=512,
+#             return_tensors="pt"
+#         )
+#         with torch.no_grad():
+#             outputs = self.model(**inputs)
+#         # Get logits (Relevance scores)
+#         # MS-Marco models typically output a single logit or [irrelevant, relevant]
+#         logits = outputs.logits
+#         if logits.shape[1] == 2:
+#             scores = logits[:, 1] # Take the "relevant" class score
+#         else:
+#             scores = logits.flatten()
+#         # Sort docs by score (descending)
+#         scores = scores.numpy().tolist()
+#         doc_score_pairs = list(zip(docs, scores))
+#         doc_score_pairs.sort(key=lambda x: x[1], reverse=True)
+#         # Return top K docs
+#         return [doc for doc, score in doc_score_pairs[:top_k]]
+# # ---------------------------------------------------------
+# # 4. Main Application Logic
+# # ---------------------------------------------------------
+# class VectorSystem:
+#     def __init__(self):
+#         self.vector_store = None
+#         self.embeddings = OnnxBgeEmbeddings()
+#         self.llm = LLMEvaluator()
+#         self.reranker = OnnxReranker() # Initialize Reranker
+#         self.all_chunks = []
+#         self.total_chunks = 0
+#     def process_content(self, file_obj, raw_text):
+#         has_file = file_obj is not None
+#         has_text = raw_text is not None and len(raw_text.strip()) > 0
+#         if has_file and has_text:
+#             return "❌ Error: Please provide EITHER a file OR paste text, not both at the same time."
+#         if not has_file and not has_text:
+#             return "⚠️ No content provided. Please upload a file or paste text."
+#         try:
+#             text = ""
+#             if has_file:
+#                 if file_obj.name.endswith('.pdf'):
+#                     doc = fitz.open(file_obj.name)
+#                     for page in doc: text += page.get_text()
+#                 elif file_obj.name.endswith('.txt'):
+#                     with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
+#                 else:
+#                     return "❌ Error: Only .pdf and .txt supported."
+#             else:
+#                 text = raw_text
+#             # Smaller chunks for Reranking precision (500 chars)
+#             text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+#             texts = text_splitter.split_text(text)
+#             self.all_chunks = texts # Keep plain text list for reference
+#             # Create Document objects with metadata
+#             docs = [Document(page_content=t, metadata={"id": i}) for i, t in enumerate(texts)]
+#             self.total_chunks = len(docs)
+#             if not docs: return "Content empty."
+#             self.vector_store = FAISS.from_documents(docs, self.embeddings)
+#             return f"✅ Indexed {self.total_chunks} chunks."
+#         except Exception as e:
+#             return f"Error: {str(e)}"
+#     def process_query(self, question, student_answer, max_marks):
+#         if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
+#         if not question: return "⚠️ Enter a question.", ""
+#         # Step A: Wide Net Retrieval (Get top 15 candidates)
+#         # We fetch more than we need to ensure the answer is in the candidate pool
+#         initial_docs = self.vector_store.similarity_search(question, k=15)
+#         # Step B: Rerank (Get top 3 best matches)
+#         # The Cross-Encoder strictly judges relevance
+#         top_docs = self.reranker.rank(question, initial_docs, top_k=3)
+#         # Step C: Construct Context
+#         # We merge the top 3 specific chunks
+#         expanded_context = "\n\n---\n\n".join([d.page_content for d in top_docs])
+#         evidence_display = f"### 📚 Optimized Context (Top {len(top_docs)} chunks after Reranking):\n"
+#         evidence_display += f"> {expanded_context} ..."
+#         llm_feedback = "Please enter a student answer to grade."
+#         if student_answer:
+#             llm_feedback = self.llm.evaluate(expanded_context, question, student_answer, max_marks)
+#         return evidence_display, llm_feedback
+# system = VectorSystem()
+# with gr.Blocks(title="EduGenius AI Grader") as demo:
+#     gr.Markdown("# ⚡ EduGenius: CPU Optimized RAG")
+#     gr.Markdown("Powered by **Qwen-2.5-0.5B**, **BGE-Small** & **TinyBERT Reranker**")
+#     with gr.Row():
+#         with gr.Column(scale=1):
+#             gr.Markdown("### Source Input (Choose One)")
+#             pdf_input = gr.File(label="Option A: Upload Chapter (PDF/TXT)")
+#             gr.Markdown("**OR**")
+#             text_input = gr.Textbox(label="Option B: Paste Context", placeholder="Paste text here if you don't have a file...", lines=5)
+#             upload_btn = gr.Button("Index Content", variant="primary")
+#             status_msg = gr.Textbox(label="Status", interactive=False)
+#         with gr.Column(scale=2):
+#             with gr.Row():
+#                 q_input = gr.Textbox(label="Question", scale=2)
+#                 max_marks = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max Marks")
+#             a_input = gr.TextArea(label="Student Answer")
+#             run_btn = gr.Button("Retrieve & Grade", variant="secondary")
+#             with gr.Row():
+#                 evidence_box = gr.Markdown(label="Context Used")
+#                 grade_box = gr.Markdown(label="Grading Result")
+#     # Pass both inputs to the process_content function
+#     upload_btn.click(system.process_content, inputs=[pdf_input, text_input], outputs=[status_msg])
+#     run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])
+# if __name__ == "__main__":
+#     demo.launch()
 class LLMEvaluator:
     def __init__(self):
         # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
+        self.repo_id = "onnx-community/Qwen2.5-0.5B-Instruct"
         self.local_dir = "onnx_qwen_local"
         print(f"🔄 Preparing CPU LLM: {self.repo_id}...")