Spaces:
Running
Running
| import gradio as gr | |
| import fitz # PyMuPDF | |
| import torch | |
| import os | |
| # --- LANGCHAIN & RAG IMPORTS --- | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_core.embeddings import Embeddings | |
| # --- ONNX & MODEL IMPORTS --- | |
| from transformers import AutoTokenizer | |
| from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM | |
| from huggingface_hub import snapshot_download | |
| import onnxruntime as ort | |
| # Check available hardware accelerators | |
| PROVIDERS = ort.get_available_providers() | |
| print(f"β‘ Hardware Acceleration Providers: {PROVIDERS}") | |
| # --------------------------------------------------------- | |
| # 1. OPTIMIZED EMBEDDINGS (BGE-SMALL) | |
| # --------------------------------------------------------- | |
| class OnnxBgeEmbeddings(Embeddings): | |
| # CHANGE 1: Switched to 'bge-small' (3x faster than large, similar accuracy) | |
| def __init__(self, model_name="BAAI/bge-small-en-v1.5"): | |
| print(f"π Loading Faster Embeddings: {model_name}...") | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| self.model = ORTModelForFeatureExtraction.from_pretrained( | |
| model_name, | |
| export=False, | |
| provider=PROVIDERS[0] # Auto-select best hardware (CUDA/CoreML) | |
| ) | |
| def _process_batch(self, texts): | |
| inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt") | |
| # Move inputs to same device as model if needed (mostly handled by Optimum) | |
| device = self.model.device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| outputs = self.model(**inputs) | |
| embeddings = outputs.last_hidden_state[:, 0] | |
| embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1) | |
| # Detach from graph before converting to numpy | |
| return embeddings.cpu().numpy().tolist() | |
| def embed_documents(self, texts): | |
| return self._process_batch(texts) | |
| def embed_query(self, text): | |
| return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0] | |
| # --------------------------------------------------------- | |
| # 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) | |
| # --------------------------------------------------------- | |
| class LLMEvaluator: | |
| def __init__(self): | |
| # CHANGE 2: Switched to Qwen 2.5 0.5B (Half the size of Llama 1B, very smart) | |
| self.repo_id = "Xenova/Qwen2.5-0.5B-Instruct" | |
| self.local_dir = "onnx_qwen_local" | |
| print(f"π Preparing Ultra-Fast LLM: {self.repo_id}...") | |
| if not os.path.exists(self.local_dir): | |
| print(f"π₯ Downloading Model to {self.local_dir}...") | |
| # Note: Xenova repos usually have the ONNX ready, no complex wildcard needed | |
| snapshot_download(repo_id=self.repo_id, local_dir=self.local_dir) | |
| print("β Download complete.") | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir) | |
| # CHANGE 3: Enabled IO Binding + Explicit Provider | |
| self.model = ORTModelForCausalLM.from_pretrained( | |
| self.local_dir, | |
| use_cache=True, | |
| use_io_binding=True, # CHANGE: Major speedup on GPU | |
| provider=PROVIDERS[0] | |
| ) | |
| def evaluate(self, context, question, student_answer, max_marks): | |
| # Qwen uses ChatML format implicitly via tokenizer | |
| messages = [ | |
| {"role": "system", "content": "You are a strict academic grader. Verify the student answer against the context. Be harsh. Do not halluncinate."}, | |
| {"role": "user", "content": f""" | |
| CONTEXT: {context} | |
| QUESTION: {question} | |
| ANSWER: {student_answer} | |
| TASK: Grade out of {max_marks}. | |
| RULES: | |
| 1. If wrong, 0 marks. | |
| 2. Be strict. | |
| 3. Format: 'Score: X/{max_marks} \n Feedback: ...' | |
| """} | |
| ] | |
| input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = self.tokenizer(input_text, return_tensors="pt") | |
| # Move inputs for IO Binding | |
| device = self.model.device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| outputs = self.model.generate( | |
| **inputs, | |
| max_new_tokens=75, # CHANGE 4: Reduced tokens (we only need a short score/feedback) | |
| temperature=0.1, | |
| do_sample=False | |
| ) | |
| response = self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) | |
| return response | |
| # --------------------------------------------------------- | |
| # 3. Main Application Logic (Unchanged but uses new classes) | |
| # --------------------------------------------------------- | |
| class VectorSystem: | |
| def __init__(self): | |
| self.vector_store = None | |
| self.embeddings = OnnxBgeEmbeddings() # Uses new BGE-Small | |
| self.llm = LLMEvaluator() # Uses new Qwen 0.5B | |
| self.all_chunks = [] | |
| self.total_chunks = 0 | |
| def process_file(self, file_obj): | |
| if file_obj is None: return "No file uploaded." | |
| try: | |
| text = "" | |
| if file_obj.name.endswith('.pdf'): | |
| doc = fitz.open(file_obj.name) | |
| for page in doc: text += page.get_text() | |
| elif file_obj.name.endswith('.txt'): | |
| with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read() | |
| else: | |
| return "β Error: Only .pdf and .txt supported." | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100) | |
| self.all_chunks = text_splitter.split_text(text) | |
| self.total_chunks = len(self.all_chunks) | |
| if not self.all_chunks: return "File empty." | |
| metadatas = [{"id": i} for i in range(self.total_chunks)] | |
| self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas) | |
| return f"β Indexed {self.total_chunks} chunks." | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def process_query(self, question, student_answer, max_marks): | |
| if not self.vector_store: return "β οΈ Please upload a file first.", "" | |
| if not question: return "β οΈ Enter a question.", "" | |
| results = self.vector_store.similarity_search_with_score(question, k=1) | |
| top_doc, score = results[0] | |
| center_id = top_doc.metadata['id'] | |
| start_id = max(0, center_id - 1) | |
| end_id = min(self.total_chunks - 1, center_id + 1) | |
| expanded_context = "" | |
| for i in range(start_id, end_id + 1): | |
| expanded_context += self.all_chunks[i] + "\n" | |
| evidence_display = f"### π Expanded Context (Chunks {start_id} to {end_id}):\n" | |
| evidence_display += f"> ... {expanded_context} ..." | |
| llm_feedback = "Please enter a student answer to grade." | |
| if student_answer: | |
| llm_feedback = self.llm.evaluate(expanded_context, question, student_answer, max_marks) | |
| return evidence_display, llm_feedback | |
| system = VectorSystem() | |
| with gr.Blocks(title="EduGenius AI Grader") as demo: | |
| gr.Markdown("# β‘ EduGenius: Ultra-Fast RAG") | |
| gr.Markdown("Powered by **Qwen-2.5-0.5B** and **BGE-Small** (ONNX Optimized)") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| pdf_input = gr.File(label="1. Upload Chapter") | |
| upload_btn = gr.Button("Index Content", variant="primary") | |
| status_msg = gr.Textbox(label="Status", interactive=False) | |
| with gr.Column(scale=2): | |
| with gr.Row(): | |
| q_input = gr.Textbox(label="Question", scale=2) | |
| max_marks = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max Marks") | |
| a_input = gr.TextArea(label="Student Answer") | |
| run_btn = gr.Button("Retrieve & Grade", variant="secondary") | |
| with gr.Row(): | |
| evidence_box = gr.Markdown(label="Context Used") | |
| grade_box = gr.Markdown(label="Grading Result") | |
| upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg]) | |
| run_btn.click(system.process_query, inputs=[q_input, a_input, max_marks], outputs=[evidence_box, grade_box]) | |
| if __name__ == "__main__": | |
| demo.launch() |