try_answer / app.py
heerjtdev's picture
Update app.py
0b67337 verified
raw
history blame
24.7 kB
# import gradio as gr
# import fitz # PyMuPDF
# import torch
# import os
# import onnxruntime as ort
# # --- IMPORT SESSION OPTIONS ---
# from onnxruntime import SessionOptions, GraphOptimizationLevel
# # --- LANGCHAIN & RAG IMPORTS ---
# from langchain_text_splitters import RecursiveCharacterTextSplitter
# from langchain_community.vectorstores import FAISS
# from langchain_core.embeddings import Embeddings
# # --- ONNX & MODEL IMPORTS ---
# from transformers import AutoTokenizer
# from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
# from huggingface_hub import snapshot_download
# # Force CPU Provider
# PROVIDERS = ["CPUExecutionProvider"]
# print(f"⚑ Running on: {PROVIDERS}")
# # ---------------------------------------------------------
# # 1. OPTIMIZED EMBEDDINGS (BGE-SMALL)
# # ---------------------------------------------------------
# class OnnxBgeEmbeddings(Embeddings):
# def __init__(self):
# model_name = "Xenova/bge-small-en-v1.5"
# print(f"πŸ”„ Loading Embeddings: {model_name}...")
# self.tokenizer = AutoTokenizer.from_pretrained(model_name)
# self.model = ORTModelForFeatureExtraction.from_pretrained(
# model_name,
# export=False,
# provider=PROVIDERS[0]
# )
# def _process_batch(self, texts):
# inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
# with torch.no_grad():
# outputs = self.model(**inputs)
# embeddings = outputs.last_hidden_state[:, 0]
# embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
# return embeddings.numpy().tolist()
# def embed_documents(self, texts):
# return self._process_batch(texts)
# def embed_query(self, text):
# return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
# # ---------------------------------------------------------
# # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
# # ---------------------------------------------------------
# class LLMEvaluator:
# def __init__(self):
# # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
# self.repo_id = "onnx-community/Qwen2.5-1.5B-Instruct"
# self.local_dir = "onnx_qwen_local"
# print(f"πŸ”„ Preparing CPU LLM: {self.repo_id}...")
# if not os.path.exists(self.local_dir):
# print(f"πŸ“₯ Downloading FP16 model to {self.local_dir}...")
# snapshot_download(
# repo_id=self.repo_id,
# local_dir=self.local_dir,
# allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
# )
# print("βœ… Download complete.")
# self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
# sess_options = SessionOptions()
# sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
# self.model = ORTModelForCausalLM.from_pretrained(
# self.local_dir,
# subfolder="onnx",
# file_name="model_fp16.onnx",
# use_cache=True,
# use_io_binding=False,
# provider=PROVIDERS[0],
# session_options=sess_options
# )
# def evaluate(self, context, question, student_answer, max_marks):
# # --- IMPROVED PROMPT STRATEGY ---
# # 1. Role: We set the persona to a "Strict Logical Validator" not a "Teacher".
# # 2. Few-Shot: We give examples of HALLUCINATIONS getting 0 marks.
# system_prompt = f"""You are a strict Logic Validator. You are NOT a helpful assistant.
# Your job is to check if the Student Answer is FACTUALLY present in the Context.
# GRADING ALGORITHM:
# 1. IF the Student Answer mentions things NOT in the Context -> PENALTY (-50% of the marks).
# 2. IF the Student Answer interprets the text opposite to its meaning -> PENALTY (-100% of the marks).
# 3. IF the Student Answer is generic fluff -> SCORE: 0.
# --- EXAMPLE 1 (HALLUCINATION) ---
# Context: The sky is blue due to Rayleigh scattering.
# Question: Why is the sky blue?
# Student Answer: Because the ocean reflects the water into the sky.
# Analysis: The Context mentions 'Rayleigh scattering'. The student mentions 'ocean reflection'. These are different. The student is hallucinating outside facts.
# Score: 0/{max_marks}
# --- EXAMPLE 2 (CONTRADICTION) ---
# Context: One must efface one's own personality. Good prose is like a windowpane.
# Question: What does the author mean?
# Student Answer: It means we should see the author's personality clearly.
# Analysis: The text says 'efface' (remove) personality. The student says 'see' personality. This is a direct contradiction.
# Score: 0/{max_marks}
# --- EXAMPLE 3 (CORRECT) ---
# Context: Mitochondria is the powerhouse of the cell.
# Question: What is mitochondria?
# Student Answer: It is the cell's powerhouse.
# Analysis: Matches the text meaning exactly.
# Score: {max_marks}/{max_marks}
# """
# user_prompt = f"""
# --- YOUR TASK ---
# Context:
# {context}
# Question:
# {question}
# Student Answer:
# {student_answer}
# OUTPUT FORMAT:
# Analysis: [Compare Student Answer vs Context. List any hallucinations or contradictions.]
# Score: [X]/{max_marks}
# """
# messages = [
# {"role": "system", "content": system_prompt},
# {"role": "user", "content": user_prompt}
# ]
# input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# inputs = self.tokenizer(input_text, return_tensors="pt")
# # Lower temperature for strictness
# with torch.no_grad():
# outputs = self.model.generate(
# **inputs,
# max_new_tokens=150,
# temperature=0.1, # Strict logic, no creativity
# top_p=0.2, # Cut off unlikely tokens
# do_sample=True,
# repetition_penalty=1.2 # Penalize repetition
# )
# input_length = inputs['input_ids'].shape[1]
# response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
# return response
# # ---------------------------------------------------------
# # 3. Main Application Logic
# # ---------------------------------------------------------
# class VectorSystem:
# def __init__(self):
# self.vector_store = None
# self.embeddings = OnnxBgeEmbeddings()
# self.llm = LLMEvaluator()
# self.all_chunks = []
# self.total_chunks = 0
# def process_content(self, file_obj, raw_text):
# # LOGIC: Check for exclusivity (Cannot have both file and text)
# has_file = file_obj is not None
# has_text = raw_text is not None and len(raw_text.strip()) > 0
# if has_file and has_text:
# return "❌ Error: Please provide EITHER a file OR paste text, not both at the same time."
# if not has_file and not has_text:
# return "⚠️ No content provided. Please upload a file or paste text."
# try:
# text = ""
# # Case 1: Process File
# if has_file:
# if file_obj.name.endswith('.pdf'):
# doc = fitz.open(file_obj.name)
# for page in doc: text += page.get_text()
# elif file_obj.name.endswith('.txt'):
# with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
# else:
# return "❌ Error: Only .pdf and .txt supported."
# # Case 2: Process Raw Text
# else:
# text = raw_text
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
# self.all_chunks = text_splitter.split_text(text)
# self.total_chunks = len(self.all_chunks)
# if not self.all_chunks: return "Content empty."
# metadatas = [{"id": i} for i in range(self.total_chunks)]
# self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
# return f"βœ… Indexed {self.total_chunks} chunks."
# except Exception as e:
# return f"Error: {str(e)}"
# def process_query(self, question, student_answer, max_marks):
# if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
# if not question: return "⚠️ Enter a question.", ""
# results = self.vector_store.similarity_search_with_score(question, k=1)
# top_doc, score = results[0]
# center_id = top_doc.metadata['id']
# start_id = max(0, center_id - 1)
# end_id = min(self.total_chunks - 1, center_id + 1)
# expanded_context = ""
# for i in range(start_id, end_id + 1):
# expanded_context += self.all_chunks[i] + "\n"
# evidence_display = f"### πŸ“š Expanded Context (Chunks {start_id} to {end_id}):\n"
# evidence_display += f"> ... {expanded_context} ..."
# llm_feedback = "Please enter a student answer to grade."
# if student_answer:
# llm_feedback = self.llm.evaluate(expanded_context, question, student_answer, max_marks)
# return evidence_display, llm_feedback
# system = VectorSystem()
# with gr.Blocks(title="EduGenius AI Grader") as demo:
# gr.Markdown("# ⚑ EduGenius: CPU Optimized RAG")
# gr.Markdown("Powered by **Qwen-2.5-0.5B** and **BGE-Small** (ONNX Optimized)")
# with gr.Row():
# with gr.Column(scale=1):
# gr.Markdown("### Source Input (Choose One)")
# pdf_input = gr.File(label="Option A: Upload Chapter (PDF/TXT)")
# gr.Markdown("**OR**")
# text_input = gr.Textbox(label="Option B: Paste Context", placeholder="Paste text here if you don't have a file...", lines=5)
# upload_btn = gr.Button("Index Content", variant="primary")
# status_msg = gr.Textbox(label="Status", interactive=False)
# with gr.Column(scale=2):
# with gr.Row():
# q_input = gr.Textbox(label="Question", scale=2)
# max_marks = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max Marks")
# a_input = gr.TextArea(label="Student Answer")
# run_btn = gr.Button("Retrieve & Grade", variant="secondary")
# with gr.Row():
# evidence_box = gr.Markdown(label="Context Used")
# grade_box = gr.Markdown(label="Grading Result")
# # Pass both inputs to the process_content function
# upload_btn.click(system.process_content, inputs=[pdf_input, text_input], outputs=[status_msg])
# run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])
# if __name__ == "__main__":
# demo.launch()
import gradio as gr
import fitz # PyMuPDF
import torch
import os
import numpy as np
# --- IMPORT SESSION OPTIONS ---
from onnxruntime import SessionOptions, GraphOptimizationLevel
# --- LANGCHAIN & RAG IMPORTS ---
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.embeddings import Embeddings
from langchain_core.documents import Document
# --- ONNX & MODEL IMPORTS ---
from transformers import AutoTokenizer
from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM, ORTModelForSequenceClassification
from huggingface_hub import snapshot_download
# Force CPU Provider
PROVIDERS = ["CPUExecutionProvider"]
print(f"⚑ Running on: {PROVIDERS}")
# ---------------------------------------------------------
# 1. OPTIMIZED EMBEDDINGS (BGE-SMALL)
# ---------------------------------------------------------
class OnnxBgeEmbeddings(Embeddings):
def __init__(self):
model_name = "Xenova/bge-small-en-v1.5"
print(f"πŸ”„ Loading Embeddings: {model_name}...")
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = ORTModelForFeatureExtraction.from_pretrained(
model_name,
export=False,
provider=PROVIDERS[0]
)
def _process_batch(self, texts):
inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
with torch.no_grad():
outputs = self.model(**inputs)
embeddings = outputs.last_hidden_state[:, 0]
embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
return embeddings.numpy().tolist()
def embed_documents(self, texts):
return self._process_batch(texts)
def embed_query(self, text):
return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
# ---------------------------------------------------------
# 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
# ---------------------------------------------------------
class LLMEvaluator:
def __init__(self):
# Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
self.repo_id = "onnx-community/Qwen2.5-1.5B-Instruct"
self.local_dir = "onnx_qwen_local"
print(f"πŸ”„ Preparing CPU LLM: {self.repo_id}...")
if not os.path.exists(self.local_dir):
print(f"πŸ“₯ Downloading FP16 model to {self.local_dir}...")
snapshot_download(
repo_id=self.repo_id,
local_dir=self.local_dir,
allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_fp16.onnx*"]
)
print("βœ… Download complete.")
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
sess_options = SessionOptions()
sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
self.model = ORTModelForCausalLM.from_pretrained(
self.local_dir,
subfolder="onnx",
file_name="model_fp16.onnx",
use_cache=True,
use_io_binding=False,
provider=PROVIDERS[0],
session_options=sess_options
)
def evaluate(self, context, question, student_answer, max_marks):
# --- IMPROVED PROMPT STRATEGY ---
system_prompt = f"""You are a strict Logic Validator. You are NOT a helpful assistant.
Your job is to check if the Student Answer is FACTUALLY present in the Context.
GRADING ALGORITHM:
1. IF the Student Answer mentions things NOT in the Context -> PENALTY (-50% of the marks).
2. IF the Student Answer interprets the text opposite to its meaning -> PENALTY (-100% of the marks).
3. IF the Student Answer is generic fluff -> SCORE: 0.
--- EXAMPLE 1 (HALLUCINATION) ---
Context: The sky is blue due to Rayleigh scattering.
Question: Why is the sky blue?
Student Answer: Because the ocean reflects the water into the sky.
Analysis: The Context mentions 'Rayleigh scattering'. The student mentions 'ocean reflection'. These are different. The student is hallucinating outside facts.
Score: 0/{max_marks}
--- EXAMPLE 2 (CONTRADICTION) ---
Context: One must efface one's own personality. Good prose is like a windowpane.
Question: What does the author mean?
Student Answer: It means we should see the author's personality clearly.
Analysis: The text says 'efface' (remove) personality. The student says 'see' personality. This is a direct contradiction.
Score: 0/{max_marks}
--- EXAMPLE 3 (CORRECT) ---
Context: Mitochondria is the powerhouse of the cell.
Question: What is mitochondria?
Student Answer: It is the cell's powerhouse.
Analysis: Matches the text meaning exactly.
Score: {max_marks}/{max_marks}
"""
user_prompt = f"""
--- YOUR TASK ---
Context:
{context}
Question:
{question}
Student Answer:
{student_answer}
OUTPUT FORMAT:
Analysis: [Compare Student Answer vs Context. List any hallucinations or contradictions.]
Score: [X]/{max_marks}
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = self.tokenizer(input_text, return_tensors="pt")
# Lower temperature for strictness
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_new_tokens=150,
temperature=0.1, # Strict logic, no creativity
top_p=0.2, # Cut off unlikely tokens
do_sample=True,
repetition_penalty=1.2 # Penalize repetition
)
input_length = inputs['input_ids'].shape[1]
response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
return response
# ---------------------------------------------------------
# 3. NEW: ONNX RERANKER (Cross-Encoder)
# Uses existing 'optimum' & 'transformers' libs (No new deps)
# ---------------------------------------------------------
class OnnxReranker:
def __init__(self):
# TinyBERT is ~17MB and very fast on CPU
self.model_name = "Xenova/ms-marco-TinyBERT-L-2-v2"
print(f"πŸ”„ Loading Reranker: {self.model_name}...")
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
self.model = ORTModelForSequenceClassification.from_pretrained(
self.model_name,
export=False,
provider=PROVIDERS[0]
)
def rank(self, query, docs, top_k=3):
if not docs:
return []
# Prepare pairs: [query, doc_text]
pairs = [[query, doc.page_content] for doc in docs]
inputs = self.tokenizer(
pairs,
padding=True,
truncation=True,
max_length=512,
return_tensors="pt"
)
with torch.no_grad():
outputs = self.model(**inputs)
# Get logits (Relevance scores)
# MS-Marco models typically output a single logit or [irrelevant, relevant]
logits = outputs.logits
if logits.shape[1] == 2:
scores = logits[:, 1] # Take the "relevant" class score
else:
scores = logits.flatten()
# Sort docs by score (descending)
scores = scores.numpy().tolist()
doc_score_pairs = list(zip(docs, scores))
doc_score_pairs.sort(key=lambda x: x[1], reverse=True)
# Return top K docs
return [doc for doc, score in doc_score_pairs[:top_k]]
# ---------------------------------------------------------
# 4. Main Application Logic
# ---------------------------------------------------------
class VectorSystem:
def __init__(self):
self.vector_store = None
self.embeddings = OnnxBgeEmbeddings()
self.llm = LLMEvaluator()
self.reranker = OnnxReranker() # Initialize Reranker
self.all_chunks = []
self.total_chunks = 0
def process_content(self, file_obj, raw_text):
has_file = file_obj is not None
has_text = raw_text is not None and len(raw_text.strip()) > 0
if has_file and has_text:
return "❌ Error: Please provide EITHER a file OR paste text, not both at the same time."
if not has_file and not has_text:
return "⚠️ No content provided. Please upload a file or paste text."
try:
text = ""
if has_file:
if file_obj.name.endswith('.pdf'):
doc = fitz.open(file_obj.name)
for page in doc: text += page.get_text()
elif file_obj.name.endswith('.txt'):
with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
else:
return "❌ Error: Only .pdf and .txt supported."
else:
text = raw_text
# Smaller chunks for Reranking precision (500 chars)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = text_splitter.split_text(text)
self.all_chunks = texts # Keep plain text list for reference
# Create Document objects with metadata
docs = [Document(page_content=t, metadata={"id": i}) for i, t in enumerate(texts)]
self.total_chunks = len(docs)
if not docs: return "Content empty."
self.vector_store = FAISS.from_documents(docs, self.embeddings)
return f"βœ… Indexed {self.total_chunks} chunks."
except Exception as e:
return f"Error: {str(e)}"
def process_query(self, question, student_answer, max_marks):
if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
if not question: return "⚠️ Enter a question.", ""
# Step A: Wide Net Retrieval (Get top 15 candidates)
# We fetch more than we need to ensure the answer is in the candidate pool
initial_docs = self.vector_store.similarity_search(question, k=15)
# Step B: Rerank (Get top 3 best matches)
# The Cross-Encoder strictly judges relevance
top_docs = self.reranker.rank(question, initial_docs, top_k=3)
# Step C: Construct Context
# We merge the top 3 specific chunks
expanded_context = "\n\n---\n\n".join([d.page_content for d in top_docs])
evidence_display = f"### πŸ“š Optimized Context (Top {len(top_docs)} chunks after Reranking):\n"
evidence_display += f"> {expanded_context} ..."
llm_feedback = "Please enter a student answer to grade."
if student_answer:
llm_feedback = self.llm.evaluate(expanded_context, question, student_answer, max_marks)
return evidence_display, llm_feedback
system = VectorSystem()
with gr.Blocks(title="EduGenius AI Grader") as demo:
gr.Markdown("# ⚑ EduGenius: CPU Optimized RAG")
gr.Markdown("Powered by **Qwen-2.5-0.5B**, **BGE-Small** & **TinyBERT Reranker**")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Source Input (Choose One)")
pdf_input = gr.File(label="Option A: Upload Chapter (PDF/TXT)")
gr.Markdown("**OR**")
text_input = gr.Textbox(label="Option B: Paste Context", placeholder="Paste text here if you don't have a file...", lines=5)
upload_btn = gr.Button("Index Content", variant="primary")
status_msg = gr.Textbox(label="Status", interactive=False)
with gr.Column(scale=2):
with gr.Row():
q_input = gr.Textbox(label="Question", scale=2)
max_marks = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max Marks")
a_input = gr.TextArea(label="Student Answer")
run_btn = gr.Button("Retrieve & Grade", variant="secondary")
with gr.Row():
evidence_box = gr.Markdown(label="Context Used")
grade_box = gr.Markdown(label="Grading Result")
# Pass both inputs to the process_content function
upload_btn.click(system.process_content, inputs=[pdf_input, text_input], outputs=[status_msg])
run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])
if __name__ == "__main__":
demo.launch()