Create app.py

c500600 verified 3 days ago

4.46 kB

	import streamlit as st
	import pandas as pd
	import pytesseract
	from PIL import Image
	from rapidfuzz import fuzz, utils
	import io

	# --- CONFIGURATION ---
	st.set_page_config(page_title="AI Student Grader", layout="wide")
	st.title("📝 AI Student Answer Grader")
	st.markdown("Upload answer sheets and an answer key to automatically calculate marks.")

	# --- SIDEBAR SETTINGS ---
	st.sidebar.header("Grading Settings")
	accuracy_threshold = st.sidebar.slider("Minimum Accuracy Threshold (%)", 0, 100, 70)
	marks_per_question = st.sidebar.number_input("Marks per correct answer", value=1.0)

	# --- HELPER FUNCTIONS ---
	def perform_ocr(image):
	"""Extracts text from an uploaded image."""
	img = Image.open(image)
	# Optional: Add image preprocessing here (grayscale, thresholding)
	text = pytesseract.image_to_string(img)
	return text.strip()

	def compare_answers(student_text, answer_key, threshold):
	"""
	Compares student text with answer key using Fuzzy Matching.
	DeepSeek-R1 style logic: We look for the presence of key concepts.
	"""
	# Simple line-by-line comparison (assuming 1 question per line or similar structure)
	# For complex papers, you'd split by question numbers
	score = 0
	key_lines = [line.strip() for line in answer_key.split('\n') if line.strip()]
	student_lines = [line.strip() for line in student_text.split('\n') if line.strip()]

	details = []

	for i, correct_ans in enumerate(key_lines):
	match_found = False
	highest_match = 0

	# Compare against each line in student text to find the best match for this answer
	for s_line in student_lines:
	similarity = fuzz.token_set_ratio(correct_ans, s_line)
	if similarity > highest_match:
	highest_match = similarity

	if highest_match >= threshold:
	score += marks_per_question
	match_found = True

	details.append({
	"Question": i + 1,
	"Match %": round(highest_match, 2),
	"Status": "Correct" if match_found else "Incorrect"
	})

	return score, details

	# --- UI LAYOUT ---
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("1. Reference Answer Key")
	key_input_type = st.radio("Key Format", ["Text Input", "Upload Image"])

	if key_input_type == "Text Input":
	answer_key_text = st.text_area("Paste the correct answers (one per line):")
	else:
	key_img = st.file_uploader("Upload Answer Key Image", type=['png', 'jpg', 'jpeg'])
	if key_img:
	answer_key_text = perform_ocr(key_img)
	st.text_area("Extracted Key (Edit if needed):", value=answer_key_text)

	with col2:
	st.subheader("2. Student Answer Sheets")
	student_images = st.file_uploader("Upload Student Images (Max 5)", type=['png', 'jpg', 'jpeg'], accept_multiple_files=True)

	# --- PROCESSING ---
	if st.button("Calculate Marks"):
	if not answer_key_text or not student_images:
	st.error("Please provide both the answer key and student images.")
	else:
	results = []

	progress_bar = st.progress(0)
	for idx, img_file in enumerate(student_images):
	# 1. OCR
	extracted_text = perform_ocr(img_file)

	# 2. Compare
	score, details = compare_answers(extracted_text, answer_key_text, accuracy_threshold)

	# 3. Store Results
	results.append({
	"Student Name": img_file.name.split('.')[0], # Uses filename as name
	"Raw Score": score,
	"Final Marks": f"{score}/{len(answer_key_text.splitlines()) * marks_per_question}",
	"Match Percentage": f"{accuracy_threshold}%"
	})
	progress_bar.progress((idx + 1) / len(student_images))

	# --- DISPLAY RESULTS ---
	df = pd.DataFrame(results)
	st.subheader("📊 Results Overview")
	st.table(df)

	# --- EXCEL EXPORT ---
	output = io.BytesIO()
	with pd.ExcelWriter(output, engine='openpyxl') as writer:
	df.to_excel(writer, index=False, sheet_name='Grades')

	st.download_button(
	label="📥 Download Excel Sheet",
	data=output.getvalue(),
	file_name="student_grades.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)