PranavRatnalikar commited on
Commit
0cc90be
Β·
verified Β·
1 Parent(s): 4d6c301

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +104 -0
  2. generate_index.py +67 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pdfplumber
3
+ import pickle
4
+ import faiss
5
+ import numpy as np
6
+ import re
7
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
8
+
9
+ # Load FAISS index and metadata
10
+ INDEX_NAME = "index"
11
+ API_KEY = "AIzaSyArdn9_Uabo9q0aYmm4dxybVEb0tj7dlrk"
12
+
13
+ def load_faiss_index(api_key):
14
+ """Loads FAISS index and metadata."""
15
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
16
+
17
+ try:
18
+ index = faiss.read_index(f"{INDEX_NAME}.faiss")
19
+ with open(f"{INDEX_NAME}.pkl", "rb") as f:
20
+ question_numbers = pickle.load(f)
21
+ return index, question_numbers, embeddings
22
+ except Exception as e:
23
+ st.error(f"❌ Error loading FAISS index: {e}")
24
+ return None, None, None
25
+
26
+ def extract_student_answers(pdf_file):
27
+ """Extracts question-wise answers from student PDF."""
28
+ text = ""
29
+ with pdfplumber.open(pdf_file) as pdf_reader:
30
+ for page in pdf_reader.pages:
31
+ text += page.extract_text() or "" # Handle NoneType
32
+
33
+ # Extract answers based on the ###QuestionNumber format
34
+ answers = {}
35
+ pattern = r"(###\d+[A-Z])\s*(.+?)(?=###|\Z)" # Matches "###1A" followed by the answer
36
+ matches = re.findall(pattern, text, re.DOTALL)
37
+
38
+ for match in matches:
39
+ question, answer = match
40
+ answers[question.upper()] = answer.strip()
41
+
42
+ return answers
43
+
44
+ def compute_similarity(student_answer, index, question_numbers, embeddings):
45
+ """Finds most similar template answer and calculates similarity."""
46
+ if not student_answer:
47
+ return "No answer provided.", 0.0
48
+
49
+ student_embedding = np.array(embeddings.embed_query(student_answer)).astype('float32').reshape(1, -1)
50
+ _, closest_idx = index.search(student_embedding, 1) # Retrieve nearest neighbor
51
+
52
+ matched_question = question_numbers[closest_idx[0][0]]
53
+ return matched_question, (1 / (1 + _[0][0])) * 100 # Convert L2 distance to similarity %
54
+
55
+ def evaluate_answers(student_answers, index, question_numbers, embeddings, max_marks=5):
56
+
57
+ results = {}
58
+
59
+ for question, student_answer in student_answers.items():
60
+ matched_question, similarity = compute_similarity(student_answer, index, question_numbers, embeddings)
61
+
62
+ # Calculate marks as a percentage of max_marks
63
+ marks_obtained = (similarity * max_marks) / 100
64
+
65
+ # Round marks to nearest integer or .5
66
+ decimal_part = marks_obtained - int(marks_obtained)
67
+ if decimal_part < 0.25:
68
+ marks_obtained = int(marks_obtained)
69
+ elif 0.25 <= decimal_part < 0.75:
70
+ marks_obtained = int(marks_obtained) + 0.5
71
+ else:
72
+ marks_obtained = int(marks_obtained) + 1
73
+
74
+ results[question] = {
75
+ "similarity": f"{round(similarity, 2)}%", # Format similarity as percentage
76
+ "marks_obtained": marks_obtained,
77
+ "max_marks": max_marks
78
+ }
79
+
80
+ return results
81
+
82
+ # Streamlit UI
83
+ st.title("πŸ“„ Automated Answer Evaluation System")
84
+
85
+ index, question_numbers, embeddings = load_faiss_index(API_KEY)
86
+
87
+ uploaded_file = st.file_uploader("πŸ“‚ Upload Student Answer Sheet (PDF)", type="pdf")
88
+
89
+ if uploaded_file:
90
+ with st.spinner("Extracting text from student answer sheet..."):
91
+ student_answers = extract_student_answers(uploaded_file)
92
+
93
+ if student_answers:
94
+ st.text_area("πŸ“œ Extracted Student Answers:", "\n".join(f"{q}: {a}" for q, a in student_answers.items()), height=150)
95
+
96
+ if st.button("πŸ” Evaluate Answers"):
97
+ with st.spinner("Comparing answers with templates..."):
98
+ results = evaluate_answers(student_answers, index, question_numbers, embeddings)
99
+
100
+ st.subheader("πŸ“Š Score Breakdown:")
101
+ st.json(results)
102
+
103
+ total_marks = sum(v["marks_obtained"] for v in results.values())
104
+ st.subheader(f"πŸ† Total Score: {total_marks} Marks")
generate_index.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pdfplumber
3
+ import pickle
4
+ import faiss
5
+ import numpy as np
6
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+
9
+ # Configuration
10
+ TEMPLATE_DIR = "dataset" # Folder containing template answer PDFs
11
+ INDEX_NAME = "index" # Prefix for FAISS index files
12
+ API_KEY = "AIzaSyArdn9_Uabo9q0aYmm4dxybVEb0tj7dlrk"
13
+
14
+ def extract_text_from_pdf(pdf_path):
15
+ """Extracts text from a single PDF file."""
16
+ text = ""
17
+ with pdfplumber.open(pdf_path) as pdf_reader:
18
+ for page in pdf_reader.pages:
19
+ text += page.extract_text() or "" # Handle NoneType
20
+ return text.strip()
21
+
22
+ def process_template_answers():
23
+ """Extracts answers from template PDFs and stores them in FAISS."""
24
+ template_answers = {}
25
+
26
+ for file in os.listdir(TEMPLATE_DIR):
27
+ if file.endswith(".pdf"):
28
+ question_number = file.replace(".pdf", "").upper() # Extract question ID (e.g., 1A)
29
+ file_path = os.path.join(TEMPLATE_DIR, file)
30
+ extracted_text = extract_text_from_pdf(file_path)
31
+ if extracted_text:
32
+ template_answers[question_number] = extracted_text
33
+
34
+ return template_answers
35
+
36
+ def generate_faiss_index(api_key):
37
+ """Creates FAISS index with Google AI Embeddings."""
38
+ print("πŸ”„ Extracting template answers...")
39
+ template_answers = process_template_answers()
40
+
41
+ if not template_answers:
42
+ print("❌ No valid template answers found.")
43
+ return
44
+
45
+ print("πŸ” Generating embeddings...")
46
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
47
+
48
+ texts = list(template_answers.values())
49
+ question_numbers = list(template_answers.keys())
50
+
51
+ text_embeddings = np.array([embeddings.embed_query(text) for text in texts]).astype('float32')
52
+
53
+ print("πŸ“ Creating FAISS index...")
54
+ dimension = text_embeddings.shape[1]
55
+ index = faiss.IndexFlatL2(dimension)
56
+ index.add(text_embeddings)
57
+
58
+ print("πŸ’Ύ Saving FAISS index...")
59
+ faiss.write_index(index, f"{INDEX_NAME}.faiss")
60
+
61
+ with open(f"{INDEX_NAME}.pkl", "wb") as f:
62
+ pickle.dump(question_numbers, f)
63
+
64
+ print("βœ… Indexing complete!")
65
+
66
+ if __name__ == "__main__":
67
+ generate_faiss_index(API_KEY)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pdfplumber
3
+ langchain
4
+ langchain_google_genai
5
+ faiss-cpu
6
+ langchain-community
7
+ pickle5
8
+ fitz