import streamlit as st import cv2 import numpy as np from PIL import Image import pytesseract from transformers import TrOCRProcessor, VisionEncoderDecoderModel, pipeline import re # Load TrOCR model for handwriting recognition processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") # Load pre-trained QA model for grading qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad") # Function to preprocess the image def preprocess_image(image_file): image = np.array(Image.open(image_file).convert("RGB")) gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) blurred = cv2.GaussianBlur(gray, (5, 5), 0) thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] preprocessed_image = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB) return Image.fromarray(preprocessed_image) # Function to extract text using Tesseract OCR def extract_text_with_tesseract(image): return pytesseract.image_to_string(image) # Function to extract text using TrOCR def extract_text_with_trocr(image): pixel_values = processor(images=image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return extracted_text # Extract student name and roll number def extract_student_info(text): name = re.search(r"NAME\s*=\s*([\w\s]+)", text, re.IGNORECASE) roll_no = re.search(r"Roll\s*NO\s*=\s*(\d+)", text, re.IGNORECASE) student_name = name.group(1).strip() if name else "Unknown" roll_number = roll_no.group(1).strip() if roll_no else "Unknown" return student_name, roll_number # Extract questions from the text def extract_questions_from_text(text): questions = re.findall(r'(?:[^\n]*\?)', text) return questions # Grade answers def grade_answer(question, context): result = qa_pipeline(question=question, context=context) return result['score'], "Correct" if result['score'] > 0.5 else "Incorrect" # Streamlit App st.title("Handwritten Answer Sheet Grading System") st.write("Upload an image or handwritten file to process.") # Upload image uploaded_image = st.file_uploader("Upload Handwritten Image", type=["png", "jpg", "jpeg"]) if uploaded_image: st.image(uploaded_image, caption="Original Image", use_container_width=True) # Preprocess the image preprocessed_image = preprocess_image(uploaded_image) st.image(preprocessed_image, caption="Preprocessed Image", use_container_width=True) # Attempt text extraction with Tesseract st.subheader("Extracted Text:") tesseract_text = extract_text_with_tesseract(preprocessed_image) if len(tesseract_text.strip()) > 10: extracted_text = tesseract_text # Use Tesseract output if it seems valid else: extracted_text = extract_text_with_trocr(preprocessed_image) # Use TrOCR fallback st.text(extracted_text) # Extract student info student_name, roll_number = extract_student_info(extracted_text) st.subheader(f"Student Name: {student_name}") st.subheader(f"Roll No: {roll_number}") # Extract questions questions = extract_questions_from_text(extracted_text) st.subheader("Extracted Questions") for i, question in enumerate(questions): st.write(f"Q{i+1}: {question}") # Grade the answers st.subheader("Grading Results") for question in questions: score, feedback = grade_answer(question, extracted_text) st.write(f"**Question:** {question}") st.write(f"**Score:** {score:.2f}") st.write(f"**Feedback:** {feedback}") st.write("---")