Spaces:

heerjtdev
/

try_answer

Sleeping

App Files Files Community

try_answer / app.py

heerjtdev

Update app.py

0b67337 verified 2 months ago

raw

history blame

24.7 kB


	# import gradio as gr
	# import fitz # PyMuPDF
	# import torch
	# import os
	# import onnxruntime as ort

	# # --- IMPORT SESSION OPTIONS ---
	# from onnxruntime import SessionOptions, GraphOptimizationLevel

	# # --- LANGCHAIN & RAG IMPORTS ---
	# from langchain_text_splitters import RecursiveCharacterTextSplitter
	# from langchain_community.vectorstores import FAISS
	# from langchain_core.embeddings import Embeddings

	# # --- ONNX & MODEL IMPORTS ---
	# from transformers import AutoTokenizer
	# from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
	# from huggingface_hub import snapshot_download

	# # Force CPU Provider
	# PROVIDERS = ["CPUExecutionProvider"]
	# print(f"⚡ Running on: {PROVIDERS}")




	# # ---------------------------------------------------------
	# # 1. OPTIMIZED EMBEDDINGS (BGE-SMALL)
	# # ---------------------------------------------------------
	# class OnnxBgeEmbeddings(Embeddings):
	# def __init__(self):
	# model_name = "Xenova/bge-small-en-v1.5"
	# print(f"🔄 Loading Embeddings: {model_name}...")
	# self.tokenizer = AutoTokenizer.from_pretrained(model_name)
	# self.model = ORTModelForFeatureExtraction.from_pretrained(
	# model_name,
	# export=False,
	# provider=PROVIDERS[0]
	# )

	# def _process_batch(self, texts):
	# inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
	# with torch.no_grad():
	# outputs = self.model(**inputs)
	# embeddings = outputs.last_hidden_state[:, 0]
	# embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
	# return embeddings.numpy().tolist()

	# def embed_documents(self, texts):
	# return self._process_batch(texts)

	# def embed_query(self, text):
	# return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]


	# # ---------------------------------------------------------
	# # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
	# # ---------------------------------------------------------
	# class LLMEvaluator:
	# def __init__(self):
	# # Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
	# self.repo_id = "onnx-community/Qwen2.5-1.5B-Instruct"
	# self.local_dir = "onnx_qwen_local"

	# print(f"🔄 Preparing CPU LLM: {self.repo_id}...")

	# if not os.path.exists(self.local_dir):
	# print(f"📥 Downloading FP16 model to {self.local_dir}...")
	# snapshot_download(
	# repo_id=self.repo_id,
	# local_dir=self.local_dir,
	# allow_patterns=["config.json", "generation_config.json", "tokenizer", "special_tokens_map.json", ".jinja", "onnx/model_fp16.onnx*"]
	# )
	# print("✅ Download complete.")

	# self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)

	# sess_options = SessionOptions()
	# sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL

	# self.model = ORTModelForCausalLM.from_pretrained(
	# self.local_dir,
	# subfolder="onnx",
	# file_name="model_fp16.onnx",
	# use_cache=True,
	# use_io_binding=False,
	# provider=PROVIDERS[0],
	# session_options=sess_options
	# )

	# def evaluate(self, context, question, student_answer, max_marks):
	# # --- IMPROVED PROMPT STRATEGY ---
	# # 1. Role: We set the persona to a "Strict Logical Validator" not a "Teacher".
	# # 2. Few-Shot: We give examples of HALLUCINATIONS getting 0 marks.

	# system_prompt = f"""You are a strict Logic Validator. You are NOT a helpful assistant.
	# Your job is to check if the Student Answer is FACTUALLY present in the Context.

	# GRADING ALGORITHM:
	# 1. IF the Student Answer mentions things NOT in the Context -> PENALTY (-50% of the marks).
	# 2. IF the Student Answer interprets the text opposite to its meaning -> PENALTY (-100% of the marks).
	# 3. IF the Student Answer is generic fluff -> SCORE: 0.

	# --- EXAMPLE 1 (HALLUCINATION) ---
	# Context: The sky is blue due to Rayleigh scattering.
	# Question: Why is the sky blue?
	# Student Answer: Because the ocean reflects the water into the sky.
	# Analysis: The Context mentions 'Rayleigh scattering'. The student mentions 'ocean reflection'. These are different. The student is hallucinating outside facts.
	# Score: 0/{max_marks}

	# --- EXAMPLE 2 (CONTRADICTION) ---
	# Context: One must efface one's own personality. Good prose is like a windowpane.
	# Question: What does the author mean?
	# Student Answer: It means we should see the author's personality clearly.
	# Analysis: The text says 'efface' (remove) personality. The student says 'see' personality. This is a direct contradiction.
	# Score: 0/{max_marks}

	# --- EXAMPLE 3 (CORRECT) ---
	# Context: Mitochondria is the powerhouse of the cell.
	# Question: What is mitochondria?
	# Student Answer: It is the cell's powerhouse.
	# Analysis: Matches the text meaning exactly.
	# Score: {max_marks}/{max_marks}
	# """

	# user_prompt = f"""
	# --- YOUR TASK ---
	# Context:
	# {context}

	# Question:
	# {question}

	# Student Answer:
	# {student_answer}

	# OUTPUT FORMAT:
	# Analysis: [Compare Student Answer vs Context. List any hallucinations or contradictions.]
	# Score: [X]/{max_marks}
	# """

	# messages = [
	# {"role": "system", "content": system_prompt},
	# {"role": "user", "content": user_prompt}
	# ]

	# input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	# inputs = self.tokenizer(input_text, return_tensors="pt")

	# # Lower temperature for strictness
	# with torch.no_grad():
	# outputs = self.model.generate(
	# **inputs,
	# max_new_tokens=150,
	# temperature=0.1, # Strict logic, no creativity
	# top_p=0.2, # Cut off unlikely tokens
	# do_sample=True,
	# repetition_penalty=1.2 # Penalize repetition
	# )

	# input_length = inputs['input_ids'].shape[1]
	# response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
	# return response


	# # ---------------------------------------------------------
	# # 3. Main Application Logic
	# # ---------------------------------------------------------
	# class VectorSystem:
	# def __init__(self):
	# self.vector_store = None
	# self.embeddings = OnnxBgeEmbeddings()
	# self.llm = LLMEvaluator()
	# self.all_chunks = []
	# self.total_chunks = 0

	# def process_content(self, file_obj, raw_text):
	# # LOGIC: Check for exclusivity (Cannot have both file and text)
	# has_file = file_obj is not None
	# has_text = raw_text is not None and len(raw_text.strip()) > 0

	# if has_file and has_text:
	# return "❌ Error: Please provide EITHER a file OR paste text, not both at the same time."

	# if not has_file and not has_text:
	# return "⚠️ No content provided. Please upload a file or paste text."

	# try:
	# text = ""
	# # Case 1: Process File
	# if has_file:
	# if file_obj.name.endswith('.pdf'):
	# doc = fitz.open(file_obj.name)
	# for page in doc: text += page.get_text()
	# elif file_obj.name.endswith('.txt'):
	# with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
	# else:
	# return "❌ Error: Only .pdf and .txt supported."

	# # Case 2: Process Raw Text
	# else:
	# text = raw_text

	# text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
	# self.all_chunks = text_splitter.split_text(text)
	# self.total_chunks = len(self.all_chunks)

	# if not self.all_chunks: return "Content empty."

	# metadatas = [{"id": i} for i in range(self.total_chunks)]
	# self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)

	# return f"✅ Indexed {self.total_chunks} chunks."
	# except Exception as e:
	# return f"Error: {str(e)}"

	# def process_query(self, question, student_answer, max_marks):
	# if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
	# if not question: return "⚠️ Enter a question.", ""

	# results = self.vector_store.similarity_search_with_score(question, k=1)
	# top_doc, score = results[0]

	# center_id = top_doc.metadata['id']
	# start_id = max(0, center_id - 1)
	# end_id = min(self.total_chunks - 1, center_id + 1)

	# expanded_context = ""
	# for i in range(start_id, end_id + 1):
	# expanded_context += self.all_chunks[i] + "\n"

	# evidence_display = f"### 📚 Expanded Context (Chunks {start_id} to {end_id}):\n"
	# evidence_display += f"> ... {expanded_context} ..."

	# llm_feedback = "Please enter a student answer to grade."
	# if student_answer:
	# llm_feedback = self.llm.evaluate(expanded_context, question, student_answer, max_marks)

	# return evidence_display, llm_feedback

	# system = VectorSystem()

	# with gr.Blocks(title="EduGenius AI Grader") as demo:
	# gr.Markdown("# ⚡ EduGenius: CPU Optimized RAG")
	# gr.Markdown("Powered by Qwen-2.5-0.5B and BGE-Small (ONNX Optimized)")

	# with gr.Row():
	# with gr.Column(scale=1):
	# gr.Markdown("### Source Input (Choose One)")
	# pdf_input = gr.File(label="Option A: Upload Chapter (PDF/TXT)")
	# gr.Markdown("OR")
	# text_input = gr.Textbox(label="Option B: Paste Context", placeholder="Paste text here if you don't have a file...", lines=5)

	# upload_btn = gr.Button("Index Content", variant="primary")
	# status_msg = gr.Textbox(label="Status", interactive=False)

	# with gr.Column(scale=2):
	# with gr.Row():
	# q_input = gr.Textbox(label="Question", scale=2)
	# max_marks = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max Marks")

	# a_input = gr.TextArea(label="Student Answer")
	# run_btn = gr.Button("Retrieve & Grade", variant="secondary")

	# with gr.Row():
	# evidence_box = gr.Markdown(label="Context Used")
	# grade_box = gr.Markdown(label="Grading Result")

	# # Pass both inputs to the process_content function
	# upload_btn.click(system.process_content, inputs=[pdf_input, text_input], outputs=[status_msg])
	# run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])

	# if __name__ == "__main__":
	# demo.launch()




















	import gradio as gr
	import fitz # PyMuPDF
	import torch
	import os
	import numpy as np

	# --- IMPORT SESSION OPTIONS ---
	from onnxruntime import SessionOptions, GraphOptimizationLevel

	# --- LANGCHAIN & RAG IMPORTS ---
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_core.embeddings import Embeddings
	from langchain_core.documents import Document

	# --- ONNX & MODEL IMPORTS ---
	from transformers import AutoTokenizer
	from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM, ORTModelForSequenceClassification
	from huggingface_hub import snapshot_download

	# Force CPU Provider
	PROVIDERS = ["CPUExecutionProvider"]
	print(f"⚡ Running on: {PROVIDERS}")

	# ---------------------------------------------------------
	# 1. OPTIMIZED EMBEDDINGS (BGE-SMALL)
	# ---------------------------------------------------------
	class OnnxBgeEmbeddings(Embeddings):
	def __init__(self):
	model_name = "Xenova/bge-small-en-v1.5"
	print(f"🔄 Loading Embeddings: {model_name}...")
	self.tokenizer = AutoTokenizer.from_pretrained(model_name)
	self.model = ORTModelForFeatureExtraction.from_pretrained(
	model_name,
	export=False,
	provider=PROVIDERS[0]
	)

	def _process_batch(self, texts):
	inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
	with torch.no_grad():
	outputs = self.model(**inputs)
	embeddings = outputs.last_hidden_state[:, 0]
	embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
	return embeddings.numpy().tolist()

	def embed_documents(self, texts):
	return self._process_batch(texts)

	def embed_query(self, text):
	return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]


	# ---------------------------------------------------------
	# 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
	# ---------------------------------------------------------
	class LLMEvaluator:
	def __init__(self):
	# Qwen 2.5 0.5B is fast but needs "Few-Shot" examples to be strict.
	self.repo_id = "onnx-community/Qwen2.5-1.5B-Instruct"
	self.local_dir = "onnx_qwen_local"

	print(f"🔄 Preparing CPU LLM: {self.repo_id}...")

	if not os.path.exists(self.local_dir):
	print(f"📥 Downloading FP16 model to {self.local_dir}...")
	snapshot_download(
	repo_id=self.repo_id,
	local_dir=self.local_dir,
	allow_patterns=["config.json", "generation_config.json", "tokenizer", "special_tokens_map.json", ".jinja", "onnx/model_fp16.onnx*"]
	)
	print("✅ Download complete.")

	self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)

	sess_options = SessionOptions()
	sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL

	self.model = ORTModelForCausalLM.from_pretrained(
	self.local_dir,
	subfolder="onnx",
	file_name="model_fp16.onnx",
	use_cache=True,
	use_io_binding=False,
	provider=PROVIDERS[0],
	session_options=sess_options
	)

	def evaluate(self, context, question, student_answer, max_marks):
	# --- IMPROVED PROMPT STRATEGY ---
	system_prompt = f"""You are a strict Logic Validator. You are NOT a helpful assistant.
	Your job is to check if the Student Answer is FACTUALLY present in the Context.

	GRADING ALGORITHM:
	1. IF the Student Answer mentions things NOT in the Context -> PENALTY (-50% of the marks).
	2. IF the Student Answer interprets the text opposite to its meaning -> PENALTY (-100% of the marks).
	3. IF the Student Answer is generic fluff -> SCORE: 0.

	--- EXAMPLE 1 (HALLUCINATION) ---
	Context: The sky is blue due to Rayleigh scattering.
	Question: Why is the sky blue?
	Student Answer: Because the ocean reflects the water into the sky.
	Analysis: The Context mentions 'Rayleigh scattering'. The student mentions 'ocean reflection'. These are different. The student is hallucinating outside facts.
	Score: 0/{max_marks}

	--- EXAMPLE 2 (CONTRADICTION) ---
	Context: One must efface one's own personality. Good prose is like a windowpane.
	Question: What does the author mean?
	Student Answer: It means we should see the author's personality clearly.
	Analysis: The text says 'efface' (remove) personality. The student says 'see' personality. This is a direct contradiction.
	Score: 0/{max_marks}

	--- EXAMPLE 3 (CORRECT) ---
	Context: Mitochondria is the powerhouse of the cell.
	Question: What is mitochondria?
	Student Answer: It is the cell's powerhouse.
	Analysis: Matches the text meaning exactly.
	Score: {max_marks}/{max_marks}
	"""

	user_prompt = f"""
	--- YOUR TASK ---
	Context:
	{context}

	Question:
	{question}

	Student Answer:
	{student_answer}

	OUTPUT FORMAT:
	Analysis: [Compare Student Answer vs Context. List any hallucinations or contradictions.]
	Score: [X]/{max_marks}
	"""

	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	]

	input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	inputs = self.tokenizer(input_text, return_tensors="pt")

	# Lower temperature for strictness
	with torch.no_grad():
	outputs = self.model.generate(
	**inputs,
	max_new_tokens=150,
	temperature=0.1, # Strict logic, no creativity
	top_p=0.2, # Cut off unlikely tokens
	do_sample=True,
	repetition_penalty=1.2 # Penalize repetition
	)

	input_length = inputs['input_ids'].shape[1]
	response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
	return response


	# ---------------------------------------------------------
	# 3. NEW: ONNX RERANKER (Cross-Encoder)
	# Uses existing 'optimum' & 'transformers' libs (No new deps)
	# ---------------------------------------------------------
	class OnnxReranker:
	def __init__(self):
	# TinyBERT is ~17MB and very fast on CPU
	self.model_name = "Xenova/ms-marco-TinyBERT-L-2-v2"
	print(f"🔄 Loading Reranker: {self.model_name}...")
	self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
	self.model = ORTModelForSequenceClassification.from_pretrained(
	self.model_name,
	export=False,
	provider=PROVIDERS[0]
	)

	def rank(self, query, docs, top_k=3):
	if not docs:
	return []

	# Prepare pairs: [query, doc_text]
	pairs = [[query, doc.page_content] for doc in docs]

	inputs = self.tokenizer(
	pairs,
	padding=True,
	truncation=True,
	max_length=512,
	return_tensors="pt"
	)

	with torch.no_grad():
	outputs = self.model(**inputs)

	# Get logits (Relevance scores)
	# MS-Marco models typically output a single logit or [irrelevant, relevant]
	logits = outputs.logits
	if logits.shape[1] == 2:
	scores = logits[:, 1] # Take the "relevant" class score
	else:
	scores = logits.flatten()

	# Sort docs by score (descending)
	scores = scores.numpy().tolist()
	doc_score_pairs = list(zip(docs, scores))
	doc_score_pairs.sort(key=lambda x: x[1], reverse=True)

	# Return top K docs
	return [doc for doc, score in doc_score_pairs[:top_k]]


	# ---------------------------------------------------------
	# 4. Main Application Logic
	# ---------------------------------------------------------
	class VectorSystem:
	def __init__(self):
	self.vector_store = None
	self.embeddings = OnnxBgeEmbeddings()
	self.llm = LLMEvaluator()
	self.reranker = OnnxReranker() # Initialize Reranker
	self.all_chunks = []
	self.total_chunks = 0

	def process_content(self, file_obj, raw_text):
	has_file = file_obj is not None
	has_text = raw_text is not None and len(raw_text.strip()) > 0

	if has_file and has_text:
	return "❌ Error: Please provide EITHER a file OR paste text, not both at the same time."

	if not has_file and not has_text:
	return "⚠️ No content provided. Please upload a file or paste text."

	try:
	text = ""
	if has_file:
	if file_obj.name.endswith('.pdf'):
	doc = fitz.open(file_obj.name)
	for page in doc: text += page.get_text()
	elif file_obj.name.endswith('.txt'):
	with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
	else:
	return "❌ Error: Only .pdf and .txt supported."
	else:
	text = raw_text

	# Smaller chunks for Reranking precision (500 chars)
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
	texts = text_splitter.split_text(text)
	self.all_chunks = texts # Keep plain text list for reference

	# Create Document objects with metadata
	docs = [Document(page_content=t, metadata={"id": i}) for i, t in enumerate(texts)]
	self.total_chunks = len(docs)

	if not docs: return "Content empty."

	self.vector_store = FAISS.from_documents(docs, self.embeddings)

	return f"✅ Indexed {self.total_chunks} chunks."
	except Exception as e:
	return f"Error: {str(e)}"

	def process_query(self, question, student_answer, max_marks):
	if not self.vector_store: return "⚠️ Please upload a file or paste text first.", ""
	if not question: return "⚠️ Enter a question.", ""

	# Step A: Wide Net Retrieval (Get top 15 candidates)
	# We fetch more than we need to ensure the answer is in the candidate pool
	initial_docs = self.vector_store.similarity_search(question, k=15)

	# Step B: Rerank (Get top 3 best matches)
	# The Cross-Encoder strictly judges relevance
	top_docs = self.reranker.rank(question, initial_docs, top_k=3)

	# Step C: Construct Context
	# We merge the top 3 specific chunks
	expanded_context = "\n\n---\n\n".join([d.page_content for d in top_docs])

	evidence_display = f"### 📚 Optimized Context (Top {len(top_docs)} chunks after Reranking):\n"
	evidence_display += f"> {expanded_context} ..."

	llm_feedback = "Please enter a student answer to grade."
	if student_answer:
	llm_feedback = self.llm.evaluate(expanded_context, question, student_answer, max_marks)

	return evidence_display, llm_feedback

	system = VectorSystem()

	with gr.Blocks(title="EduGenius AI Grader") as demo:
	gr.Markdown("# ⚡ EduGenius: CPU Optimized RAG")
	gr.Markdown("Powered by Qwen-2.5-0.5B, BGE-Small & TinyBERT Reranker")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### Source Input (Choose One)")
	pdf_input = gr.File(label="Option A: Upload Chapter (PDF/TXT)")
	gr.Markdown("OR")
	text_input = gr.Textbox(label="Option B: Paste Context", placeholder="Paste text here if you don't have a file...", lines=5)

	upload_btn = gr.Button("Index Content", variant="primary")
	status_msg = gr.Textbox(label="Status", interactive=False)

	with gr.Column(scale=2):
	with gr.Row():
	q_input = gr.Textbox(label="Question", scale=2)
	max_marks = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max Marks")

	a_input = gr.TextArea(label="Student Answer")
	run_btn = gr.Button("Retrieve & Grade", variant="secondary")

	with gr.Row():
	evidence_box = gr.Markdown(label="Context Used")
	grade_box = gr.Markdown(label="Grading Result")

	# Pass both inputs to the process_content function
	upload_btn.click(system.process_content, inputs=[pdf_input, text_input], outputs=[status_msg])
	run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box])

	if __name__ == "__main__":
	demo.launch()