import streamlit as st import pandas as pd import pytesseract from PIL import Image from rapidfuzz import fuzz, utils import io # --- CONFIGURATION --- st.set_page_config(page_title="AI Student Grader", layout="wide") st.title("📝 AI Student Answer Grader") st.markdown("Upload answer sheets and an answer key to automatically calculate marks.") # --- SIDEBAR SETTINGS --- st.sidebar.header("Grading Settings") accuracy_threshold = st.sidebar.slider("Minimum Accuracy Threshold (%)", 0, 100, 70) marks_per_question = st.sidebar.number_input("Marks per correct answer", value=1.0) # --- HELPER FUNCTIONS --- def perform_ocr(image): """Extracts text from an uploaded image.""" img = Image.open(image) # Optional: Add image preprocessing here (grayscale, thresholding) text = pytesseract.image_to_string(img) return text.strip() def compare_answers(student_text, answer_key, threshold): """ Compares student text with answer key using Fuzzy Matching. DeepSeek-R1 style logic: We look for the presence of key concepts. """ # Simple line-by-line comparison (assuming 1 question per line or similar structure) # For complex papers, you'd split by question numbers score = 0 key_lines = [line.strip() for line in answer_key.split('\n') if line.strip()] student_lines = [line.strip() for line in student_text.split('\n') if line.strip()] details = [] for i, correct_ans in enumerate(key_lines): match_found = False highest_match = 0 # Compare against each line in student text to find the best match for this answer for s_line in student_lines: similarity = fuzz.token_set_ratio(correct_ans, s_line) if similarity > highest_match: highest_match = similarity if highest_match >= threshold: score += marks_per_question match_found = True details.append({ "Question": i + 1, "Match %": round(highest_match, 2), "Status": "Correct" if match_found else "Incorrect" }) return score, details # --- UI LAYOUT --- col1, col2 = st.columns(2) with col1: st.subheader("1. Reference Answer Key") key_input_type = st.radio("Key Format", ["Text Input", "Upload Image"]) if key_input_type == "Text Input": answer_key_text = st.text_area("Paste the correct answers (one per line):") else: key_img = st.file_uploader("Upload Answer Key Image", type=['png', 'jpg', 'jpeg']) if key_img: answer_key_text = perform_ocr(key_img) st.text_area("Extracted Key (Edit if needed):", value=answer_key_text) with col2: st.subheader("2. Student Answer Sheets") student_images = st.file_uploader("Upload Student Images (Max 5)", type=['png', 'jpg', 'jpeg'], accept_multiple_files=True) # --- PROCESSING --- if st.button("Calculate Marks"): if not answer_key_text or not student_images: st.error("Please provide both the answer key and student images.") else: results = [] progress_bar = st.progress(0) for idx, img_file in enumerate(student_images): # 1. OCR extracted_text = perform_ocr(img_file) # 2. Compare score, details = compare_answers(extracted_text, answer_key_text, accuracy_threshold) # 3. Store Results results.append({ "Student Name": img_file.name.split('.')[0], # Uses filename as name "Raw Score": score, "Final Marks": f"{score}/{len(answer_key_text.splitlines()) * marks_per_question}", "Match Percentage": f"{accuracy_threshold}%" }) progress_bar.progress((idx + 1) / len(student_images)) # --- DISPLAY RESULTS --- df = pd.DataFrame(results) st.subheader("📊 Results Overview") st.table(df) # --- EXCEL EXPORT --- output = io.BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: df.to_excel(writer, index=False, sheet_name='Grades') st.download_button( label="📥 Download Excel Sheet", data=output.getvalue(), file_name="student_grades.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" )