Spaces:

heerjtdev
/

answer_feedback

Sleeping

App Files Files Community

heerjtdev commited on Feb 3

Commit

31baf0a

verified ·

1 Parent(s): 4cc40b8

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -284

app.py CHANGED Viewed

@@ -179,229 +179,6 @@
-# import gradio as gr
-# import fitz  # PyMuPDF
-# import torch
-# import os
-# # --- LANGCHAIN & RAG IMPORTS ---
-# from langchain_text_splitters import RecursiveCharacterTextSplitter
-# from langchain_community.vectorstores import FAISS
-# from langchain_core.embeddings import Embeddings
-# # --- ONNX & MODEL IMPORTS ---
-# from transformers import AutoTokenizer
-# from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
-# from huggingface_hub import snapshot_download
-# # ---------------------------------------------------------
-# # 1. Custom ONNX Embedding Class (BGE-Large)
-# # ---------------------------------------------------------
-# class OnnxBgeEmbeddings(Embeddings):
-#     def __init__(self, model_name="BAAI/bge-large-en-v1.5"):
-#         print(f"🔄 Loading Embeddings: {model_name}...")
-#         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-#         # Note: export=True will re-convert on every restart.
-#         # For production, you'd want to save this permanently, but this works for now.
-#         self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
-#     def _process_batch(self, texts):
-#         inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
-#         with torch.no_grad():
-#             outputs = self.model(**inputs)
-#         # CLS pooling for BGE
-#         embeddings = outputs.last_hidden_state[:, 0]
-#         embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
-#         return embeddings.numpy().tolist()
-#     def embed_documents(self, texts):
-#         return self._process_batch(texts)
-#     def embed_query(self, text):
-#         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
-# # ---------------------------------------------------------
-# # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
-# # ---------------------------------------------------------
-# class LLMEvaluator:
-#     def __init__(self):
-#         self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
-#         self.local_dir = "onnx_llama_local"
-#         print(f"🔄 Preparing LLM: {self.repo_id}...")
-#         # [FIXED DOWNLOADER]
-#         print(f"📥 Downloading FP16 model + data to {self.local_dir}...")
-#         snapshot_download(
-#             repo_id=self.repo_id,
-#             local_dir=self.local_dir,
-#             local_dir_use_symlinks=False,
-#             allow_patterns=[
-#                 "config.json",
-#                 "generation_config.json",
-#                 "tokenizer*",
-#                 "special_tokens_map.json",
-#                 "*.jinja",
-#                 "onnx/model_fp16.onnx*"  # WILDCARD '*' ensures we get .onnx AND .onnx_data
-#             ]
-#         )
-#         print("✅ Download complete.")
-#         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
-#         # [CRITICAL FIX]
-#         # Separating 'subfolder' and 'file_name' is required by Optimum
-#         self.model = ORTModelForCausalLM.from_pretrained(
-#             self.local_dir,
-#             subfolder="onnx",             # Point to the subfolder
-#             file_name="model_fp16.onnx",  # Just the filename
-#             use_cache=True,
-#             use_io_binding=False
-#         )
-#     def evaluate(self, context, question, student_answer):
-#         # Prompt Engineering for Llama 3
-#         messages = [
-#             {"role": "system", "content": "You are a helpful academic grader. Grade the student answer based ONLY on the provided context."},
-#             {"role": "user", "content": f"""
-#             ### CONTEXT:
-#             {context}
-#             ### QUESTION:
-#             {question}
-#             ### STUDENT ANSWER:
-#             {student_answer}
-#             ### INSTRUCTIONS:
-#             1. Is the answer correct?
-#             2. Score out of 10.
-#             3. Explanation.
-#             """}
-#         ]
-#         # Format input using the chat template
-#         input_text = self.tokenizer.apply_chat_template(
-#             messages,
-#             tokenize=False,
-#             add_generation_prompt=True
-#         )
-#         inputs = self.tokenizer(input_text, return_tensors="pt")
-#         # Generate response
-#         with torch.no_grad():
-#             outputs = self.model.generate(
-#                 **inputs,
-#                 max_new_tokens=256,
-#                 temperature=0.3,
-#                 do_sample=True,
-#                 top_p=0.9
-#             )
-#         # Decode response
-#         response = self.tokenizer.decode(
-#             outputs[0][inputs.input_ids.shape[1]:],
-#             skip_special_tokens=True
-#         )
-#         return response
-# # ---------------------------------------------------------
-# # 3. Main Application Logic
-# # ---------------------------------------------------------
-# class VectorSystem:
-#     def __init__(self):
-#         self.vector_store = None
-#         self.embeddings = OnnxBgeEmbeddings()
-#         self.llm = LLMEvaluator() # Initialize LLM
-#         self.all_chunks = []
-#     def process_file(self, file_obj):
-#         if file_obj is None: return "No file uploaded."
-#         try:
-#             text = ""
-#             if file_obj.name.endswith('.pdf'):
-#                 doc = fitz.open(file_obj.name)
-#                 for page in doc: text += page.get_text()
-#             elif file_obj.name.endswith('.txt'):
-#                 with open(file_obj.name, 'r', encoding='utf-8') as f: text = f.read()
-#             else:
-#                 return "❌ Error: Only .pdf and .txt supported."
-#             text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
-#             self.all_chunks = text_splitter.split_text(text)
-#             if not self.all_chunks: return "File empty."
-#             metadatas = [{"id": i} for i in range(len(self.all_chunks))]
-#             self.vector_store = FAISS.from_texts(self.all_chunks, self.embeddings, metadatas=metadatas)
-#             return f"✅ Indexed {len(self.all_chunks)} chunks."
-#         except Exception as e:
-#             return f"Error: {str(e)}"
-#     def process_query(self, question, student_answer):
-#         if not self.vector_store: return "⚠️ Please upload a file first.", ""
-#         if not question: return "⚠️ Enter a question.", ""
-#         # 1. Retrieve
-#         results = self.vector_store.similarity_search_with_score(question, k=3)
-#         # Prepare context for LLM
-#         context_text = "\n\n".join([doc.page_content for doc, _ in results])
-#         # Prepare Evidence Output for UI
-#         evidence_display = "### 📚 Retrieved Context:\n"
-#         for i, (doc, score) in enumerate(results):
-#             evidence_display += f"**Chunk {i+1}** (Score: {score:.4f}):\n> {doc.page_content}\n\n"
-#         # 2. Evaluate (if answer provided)
-#         llm_feedback = "Please enter a student answer to grade."
-#         if student_answer:
-#             llm_feedback = self.llm.evaluate(context_text, question, student_answer)
-#         return evidence_display, llm_feedback
-# # Initialize
-# system = VectorSystem()
-# # --- GRADIO UI ---
-# with gr.Blocks(title="EduGenius AI Grader") as demo:
-#     gr.Markdown("# 🧠 EduGenius: RAG + LLM Grading")
-#     gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B** (Evaluation) - All ONNX Optimized.")
-#     with gr.Row():
-#         with gr.Column(scale=1):
-#             pdf_input = gr.File(label="1. Upload Chapter (PDF/TXT)")
-#             upload_btn = gr.Button("Index Content", variant="primary")
-#             status_msg = gr.Textbox(label="System Status", interactive=False)
-#         with gr.Column(scale=2):
-#             q_input = gr.Textbox(label="2. Question")
-#             a_input = gr.Textbox(label="3. Student Answer")
-#             run_btn = gr.Button("Retrieve & Grade", variant="secondary")
-#             with gr.Row():
-#                 evidence_box = gr.Markdown(label="Context")
-#                 grade_box = gr.Markdown(label="LLM Evaluation")
-#     upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
-#     run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])
-# if __name__ == "__main__":
-#     demo.launch()
 import gradio as gr
 import fitz  # PyMuPDF
 import torch
@@ -424,12 +201,15 @@ class OnnxBgeEmbeddings(Embeddings):
     def __init__(self, model_name="BAAI/bge-large-en-v1.5"):
         print(f"🔄 Loading Embeddings: {model_name}...")
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
     def _process_batch(self, texts):
         inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
         with torch.no_grad():
             outputs = self.model(**inputs)
         embeddings = outputs.last_hidden_state[:, 0]
         embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
         return embeddings.numpy().tolist()
@@ -441,79 +221,92 @@ class OnnxBgeEmbeddings(Embeddings):
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
 # ---------------------------------------------------------
-# 2. LLM Evaluator Class (Llama-3.2-1B ONNX - INT8)
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
         self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
         self.local_dir = "onnx_llama_local"
-        print(f"🔄 Preparing LLM: {self.repo_id} (Int8 Quantized)...")
         snapshot_download(
             repo_id=self.repo_id,
             local_dir=self.local_dir,
             local_dir_use_symlinks=False,
-            allow_patterns=["config.json", "generation_config.json", "tokenizer*", "special_tokens_map.json", "*.jinja", "onnx/model_quantized.onnx"]
         )
         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
         self.model = ORTModelForCausalLM.from_pretrained(
             self.local_dir,
-            subfolder="onnx",
-            file_name="model_quantized.onnx",
             use_cache=True,
             use_io_binding=False
         )
     def evaluate(self, context, question, student_answer):
-        # [STRATEGY: FEW-SHOT PROMPTING]
-        # We give the model an example so it knows exactly what format to output.
-        # This prevents it from hallucinating dates or XML tags.
         messages = [
-            {"role": "system", "content": "You are a grading assistant. Output only the requested format."},
             {"role": "user", "content": f"""
-            Task: Grade the student answer based ONLY on the provided text.
-            ---
-            EXAMPLE:
-            Text: "Photosynthesis is how plants make food using sunlight."
-            Question: "How do plants eat?"
-            Answer: "They use sunlight."
-            Grade: 10/10
-            Verdict: Correct
-            Explanation: The text confirms plants use sunlight to make food.
-            ---
-            YOUR TURN:
-            Text: "{context}"
-            Question: "{question}"
-            Answer: "{student_answer}"
-            Output the Grade, Verdict, and Explanation:
             """}
         ]
-        input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = self.tokenizer(input_text, return_tensors="pt")
-        # [GENERATION SETTINGS FOR STABILITY]
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
-                max_new_tokens=150,
-                # We use Sampling with low temp instead of Greedy
-                # This helps the model get "unstuck" from bad loops without being too creative.
-                do_sample=True,
-                temperature=0.2,
-                top_p=0.9,
-                repetition_penalty=1.1
             )
-        return self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------
@@ -521,7 +314,7 @@ class VectorSystem:
     def __init__(self):
         self.vector_store = None
         self.embeddings = OnnxBgeEmbeddings()
-        self.llm = LLMEvaluator()
         self.all_chunks = []
     def process_file(self, file_obj):
@@ -551,24 +344,22 @@ class VectorSystem:
         if not self.vector_store: return "⚠️ Please upload a file first.", ""
         if not question: return "⚠️ Enter a question.", ""
-        # 1. RAG RETRIEVAL (Top 1 Only)
-        # We removed the Pre/Next chunk expansion because it was creating duplicate text
-        # that confused the model.
-        results = self.vector_store.similarity_search_with_score(question, k=1)
-        if not results: return "No relevant text found.", ""
-        best_doc, score = results[0]
-        context_text = best_doc.page_content
-        # 2. LLM EVALUATION
         llm_feedback = "Please enter a student answer to grade."
         if student_answer:
             llm_feedback = self.llm.evaluate(context_text, question, student_answer)
-        # UI Display
-        evidence_display = f"### 🎯 Best Match (Score: {score:.4f})\n"
-        evidence_display += f"> **{context_text}**\n"
         return evidence_display, llm_feedback
 # Initialize
@@ -576,8 +367,8 @@ system = VectorSystem()
 # --- GRADIO UI ---
 with gr.Blocks(title="EduGenius AI Grader") as demo:
-    gr.Markdown("# 🧠 EduGenius: AI Grader")
-    gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B-Int8** (Evaluation).")
     with gr.Row():
         with gr.Column(scale=1):
@@ -591,11 +382,12 @@ with gr.Blocks(title="EduGenius AI Grader") as demo:
             run_btn = gr.Button("Retrieve & Grade", variant="secondary")
             with gr.Row():
-                evidence_box = gr.Markdown(label="Context Used")
-                grade_box = gr.Markdown(label="LLM Result")
     upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
     run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import fitz  # PyMuPDF
 import torch
     def __init__(self, model_name="BAAI/bge-large-en-v1.5"):
         print(f"🔄 Loading Embeddings: {model_name}...")
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Note: export=True will re-convert on every restart.
+        # For production, you'd want to save this permanently, but this works for now.
         self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
     def _process_batch(self, texts):
         inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
         with torch.no_grad():
             outputs = self.model(**inputs)
+        # CLS pooling for BGE
         embeddings = outputs.last_hidden_state[:, 0]
         embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
         return embeddings.numpy().tolist()
         return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
 # ---------------------------------------------------------
+# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
 # ---------------------------------------------------------
 class LLMEvaluator:
     def __init__(self):
         self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
         self.local_dir = "onnx_llama_local"
+        print(f"🔄 Preparing LLM: {self.repo_id}...")
+        # [FIXED DOWNLOADER]
+        print(f"📥 Downloading FP16 model + data to {self.local_dir}...")
         snapshot_download(
             repo_id=self.repo_id,
             local_dir=self.local_dir,
             local_dir_use_symlinks=False,
+            allow_patterns=[
+                "config.json",
+                "generation_config.json",
+                "tokenizer*",
+                "special_tokens_map.json",
+                "*.jinja",
+                "onnx/model_fp16.onnx*"  # WILDCARD '*' ensures we get .onnx AND .onnx_data
+            ]
         )
+        print("✅ Download complete.")
         self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
+        # [CRITICAL FIX]
+        # Separating 'subfolder' and 'file_name' is required by Optimum
         self.model = ORTModelForCausalLM.from_pretrained(
             self.local_dir,
+            subfolder="onnx",             # Point to the subfolder
+            file_name="model_fp16.onnx",  # Just the filename
             use_cache=True,
             use_io_binding=False
         )
     def evaluate(self, context, question, student_answer):
+        # Prompt Engineering for Llama 3
         messages = [
+            {"role": "system", "content": "You are a strict academic. Grade the student answer based ONLY on the provided context."},
             {"role": "user", "content": f"""
+            ### CONTEXT:
+            {context}
+            ### QUESTION:
+            {question}
+            ### STUDENT ANSWER:
+            {student_answer}
+            ### INSTRUCTIONS:
+            1. Is the answer correct?
+            2. Score out of 10.
+            3. Explanation.
             """}
         ]
+        # Format input using the chat template
+        input_text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
         inputs = self.tokenizer(input_text, return_tensors="pt")
+        # Generate response
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
+                max_new_tokens=256,
+                temperature=0.3,
+                do_sample=True,
+                top_p=0.9
             )
+        # Decode response
+        response = self.tokenizer.decode(
+            outputs[0][inputs.input_ids.shape[1]:],
+            skip_special_tokens=True
+        )
+        return response
 # ---------------------------------------------------------
 # 3. Main Application Logic
 # ---------------------------------------------------------
     def __init__(self):
         self.vector_store = None
         self.embeddings = OnnxBgeEmbeddings()
+        self.llm = LLMEvaluator() # Initialize LLM
         self.all_chunks = []
     def process_file(self, file_obj):
         if not self.vector_store: return "⚠️ Please upload a file first.", ""
         if not question: return "⚠️ Enter a question.", ""
+        # 1. Retrieve
+        results = self.vector_store.similarity_search_with_score(question, k=3)
+        # Prepare context for LLM
+        context_text = "\n\n".join([doc.page_content for doc, _ in results])
+        # Prepare Evidence Output for UI
+        evidence_display = "### 📚 Retrieved Context:\n"
+        for i, (doc, score) in enumerate(results):
+            evidence_display += f"**Chunk {i+1}** (Score: {score:.4f}):\n> {doc.page_content}\n\n"
+        # 2. Evaluate (if answer provided)
         llm_feedback = "Please enter a student answer to grade."
         if student_answer:
             llm_feedback = self.llm.evaluate(context_text, question, student_answer)
         return evidence_display, llm_feedback
 # Initialize
 # --- GRADIO UI ---
 with gr.Blocks(title="EduGenius AI Grader") as demo:
+    gr.Markdown("# 🧠 EduGenius: RAG + LLM Grading")
+    gr.Markdown("Powered by **BGE-Large** (Retrieval) and **Llama-3.2-1B** (Evaluation) - All ONNX Optimized.")
     with gr.Row():
         with gr.Column(scale=1):
             run_btn = gr.Button("Retrieve & Grade", variant="secondary")
             with gr.Row():
+                evidence_box = gr.Markdown(label="Context")
+                grade_box = gr.Markdown(label="LLM Evaluation")
     upload_btn.click(system.process_file, inputs=[pdf_input], outputs=[status_msg])
     run_btn.click(system.process_query, inputs=[q_input, a_input], outputs=[evidence_box, grade_box])
 if __name__ == "__main__":
+    demo.launch()