Spaces:

PranavRatnalikar
/

re-evaluation_model

Sleeping

App Files Files Community

PranavRatnalikar commited on Feb 23, 2025

Commit

0cc90be

verified ·

1 Parent(s): 4d6c301

Upload 3 files

Browse files

Files changed (3) hide show

app.py +104 -0
generate_index.py +67 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import streamlit as st
+import pdfplumber
+import pickle
+import faiss
+import numpy as np
+import re
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+# Load FAISS index and metadata
+INDEX_NAME = "index"
+API_KEY = "AIzaSyArdn9_Uabo9q0aYmm4dxybVEb0tj7dlrk"
+def load_faiss_index(api_key):
+    """Loads FAISS index and metadata."""
+    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
+    try:
+        index = faiss.read_index(f"{INDEX_NAME}.faiss")
+        with open(f"{INDEX_NAME}.pkl", "rb") as f:
+            question_numbers = pickle.load(f)
+        return index, question_numbers, embeddings
+    except Exception as e:
+        st.error(f"❌ Error loading FAISS index: {e}")
+        return None, None, None
+def extract_student_answers(pdf_file):
+    """Extracts question-wise answers from student PDF."""
+    text = ""
+    with pdfplumber.open(pdf_file) as pdf_reader:
+        for page in pdf_reader.pages:
+            text += page.extract_text() or ""  # Handle NoneType
+    # Extract answers based on the ###QuestionNumber format
+    answers = {}
+    pattern = r"(###\d+[A-Z])\s*(.+?)(?=###|\Z)"  # Matches "###1A" followed by the answer
+    matches = re.findall(pattern, text, re.DOTALL)
+    for match in matches:
+        question, answer = match
+        answers[question.upper()] = answer.strip()
+    return answers
+def compute_similarity(student_answer, index, question_numbers, embeddings):
+    """Finds most similar template answer and calculates similarity."""
+    if not student_answer:
+        return "No answer provided.", 0.0
+    student_embedding = np.array(embeddings.embed_query(student_answer)).astype('float32').reshape(1, -1)
+    _, closest_idx = index.search(student_embedding, 1)  # Retrieve nearest neighbor
+    matched_question = question_numbers[closest_idx[0][0]]
+    return matched_question, (1 / (1 + _[0][0])) * 100  # Convert L2 distance to similarity %
+def evaluate_answers(student_answers, index, question_numbers, embeddings, max_marks=5):
+    results = {}
+    for question, student_answer in student_answers.items():
+        matched_question, similarity = compute_similarity(student_answer, index, question_numbers, embeddings)
+        # Calculate marks as a percentage of max_marks
+        marks_obtained = (similarity * max_marks) / 100
+        # Round marks to nearest integer or .5
+        decimal_part = marks_obtained - int(marks_obtained)
+        if decimal_part < 0.25:
+            marks_obtained = int(marks_obtained)
+        elif 0.25 <= decimal_part < 0.75:
+            marks_obtained = int(marks_obtained) + 0.5
+        else:
+            marks_obtained = int(marks_obtained) + 1
+        results[question] = {
+            "similarity": f"{round(similarity, 2)}%",  # Format similarity as percentage
+            "marks_obtained": marks_obtained,
+            "max_marks": max_marks
+        }
+    return results
+# Streamlit UI
+st.title("📄 Automated Answer Evaluation System")
+index, question_numbers, embeddings = load_faiss_index(API_KEY)
+uploaded_file = st.file_uploader("📂 Upload Student Answer Sheet (PDF)", type="pdf")
+if uploaded_file:
+    with st.spinner("Extracting text from student answer sheet..."):
+        student_answers = extract_student_answers(uploaded_file)
+    if student_answers:
+        st.text_area("📜 Extracted Student Answers:", "\n".join(f"{q}: {a}" for q, a in student_answers.items()), height=150)
+        if st.button("🔍 Evaluate Answers"):
+            with st.spinner("Comparing answers with templates..."):
+                results = evaluate_answers(student_answers, index, question_numbers, embeddings)
+                st.subheader("📊 Score Breakdown:")
+                st.json(results)
+                total_marks = sum(v["marks_obtained"] for v in results.values())
+                st.subheader(f"🏆 Total Score: {total_marks} Marks")

generate_index.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import os
+import pdfplumber
+import pickle
+import faiss
+import numpy as np
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from langchain.vectorstores import FAISS
+# Configuration
+TEMPLATE_DIR = "dataset"  # Folder containing template answer PDFs
+INDEX_NAME = "index"       # Prefix for FAISS index files
+API_KEY = "AIzaSyArdn9_Uabo9q0aYmm4dxybVEb0tj7dlrk"
+def extract_text_from_pdf(pdf_path):
+    """Extracts text from a single PDF file."""
+    text = ""
+    with pdfplumber.open(pdf_path) as pdf_reader:
+        for page in pdf_reader.pages:
+            text += page.extract_text() or ""  # Handle NoneType
+    return text.strip()
+def process_template_answers():
+    """Extracts answers from template PDFs and stores them in FAISS."""
+    template_answers = {}
+    for file in os.listdir(TEMPLATE_DIR):
+        if file.endswith(".pdf"):
+            question_number = file.replace(".pdf", "").upper()  # Extract question ID (e.g., 1A)
+            file_path = os.path.join(TEMPLATE_DIR, file)
+            extracted_text = extract_text_from_pdf(file_path)
+            if extracted_text:
+                template_answers[question_number] = extracted_text
+    return template_answers
+def generate_faiss_index(api_key):
+    """Creates FAISS index with Google AI Embeddings."""
+    print("🔄 Extracting template answers...")
+    template_answers = process_template_answers()
+    if not template_answers:
+        print("❌ No valid template answers found.")
+        return
+    print("🔍 Generating embeddings...")
+    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
+    texts = list(template_answers.values())
+    question_numbers = list(template_answers.keys())
+    text_embeddings = np.array([embeddings.embed_query(text) for text in texts]).astype('float32')
+    print("📁 Creating FAISS index...")
+    dimension = text_embeddings.shape[1]
+    index = faiss.IndexFlatL2(dimension)
+    index.add(text_embeddings)
+    print("💾 Saving FAISS index...")
+    faiss.write_index(index, f"{INDEX_NAME}.faiss")
+    with open(f"{INDEX_NAME}.pkl", "wb") as f:
+        pickle.dump(question_numbers, f)
+    print("✅ Indexing complete!")
+if __name__ == "__main__":
+    generate_faiss_index(API_KEY)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+pdfplumber
+langchain
+langchain_google_genai
+faiss-cpu
+langchain-community
+pickle5
+fitz