File size: 3,759 Bytes
0133ca6
421fc43
 
0133ca6
4e00f7b
bfbd7e6
0133ca6
c020812
4e00f7b
bfbd7e6
 
0133ca6
4e00f7b
0133ca6
 
4e00f7b
421fc43
 
4e00f7b
 
 
5b20f3a
4e00f7b
421fc43
4e00f7b
 
 
 
 
 
bfbd7e6
 
 
 
0133ca6
4e00f7b
0133ca6
 
 
 
 
 
 
4e00f7b
bfbd7e6
421fc43
bfbd7e6
 
421fc43
0133ca6
 
 
 
 
 
 
 
 
 
 
 
421fc43
4e00f7b
 
421fc43
 
0133ca6
4e00f7b
 
 
 
 
 
 
 
0133ca6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import streamlit as st
import cv2
import numpy as np
from PIL import Image
import pytesseract
from transformers import TrOCRProcessor, VisionEncoderDecoderModel, pipeline
import re

# Load TrOCR model for handwriting recognition
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

# Load pre-trained QA model for grading
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

# Function to preprocess the image
def preprocess_image(image_file):
    image = np.array(Image.open(image_file).convert("RGB"))
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    preprocessed_image = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB)
    return Image.fromarray(preprocessed_image)

# Function to extract text using Tesseract OCR
def extract_text_with_tesseract(image):
    return pytesseract.image_to_string(image)

# Function to extract text using TrOCR
def extract_text_with_trocr(image):
    pixel_values = processor(images=image, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values)
    extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return extracted_text

# Extract student name and roll number
def extract_student_info(text):
    name = re.search(r"NAME\s*=\s*([\w\s]+)", text, re.IGNORECASE)
    roll_no = re.search(r"Roll\s*NO\s*=\s*(\d+)", text, re.IGNORECASE)
    student_name = name.group(1).strip() if name else "Unknown"
    roll_number = roll_no.group(1).strip() if roll_no else "Unknown"
    return student_name, roll_number

# Extract questions from the text
def extract_questions_from_text(text):
    questions = re.findall(r'(?:[^\n]*\?)', text)
    return questions

# Grade answers
def grade_answer(question, context):
    result = qa_pipeline(question=question, context=context)
    return result['score'], "Correct" if result['score'] > 0.5 else "Incorrect"

# Streamlit App
st.title("Handwritten Answer Sheet Grading System")
st.write("Upload an image or handwritten file to process.")

# Upload image
uploaded_image = st.file_uploader("Upload Handwritten Image", type=["png", "jpg", "jpeg"])

if uploaded_image:
    st.image(uploaded_image, caption="Original Image", use_container_width=True)

    # Preprocess the image
    preprocessed_image = preprocess_image(uploaded_image)
    st.image(preprocessed_image, caption="Preprocessed Image", use_container_width=True)

    # Attempt text extraction with Tesseract
    st.subheader("Extracted Text:")
    tesseract_text = extract_text_with_tesseract(preprocessed_image)
    if len(tesseract_text.strip()) > 10:
        extracted_text = tesseract_text  # Use Tesseract output if it seems valid
    else:
        extracted_text = extract_text_with_trocr(preprocessed_image)  # Use TrOCR fallback

    st.text(extracted_text)

    # Extract student info
    student_name, roll_number = extract_student_info(extracted_text)
    st.subheader(f"Student Name: {student_name}")
    st.subheader(f"Roll No: {roll_number}")

    # Extract questions
    questions = extract_questions_from_text(extracted_text)
    st.subheader("Extracted Questions")
    for i, question in enumerate(questions):
        st.write(f"Q{i+1}: {question}")

    # Grade the answers
    st.subheader("Grading Results")
    for question in questions:
        score, feedback = grade_answer(question, extracted_text)
        st.write(f"**Question:** {question}")
        st.write(f"**Score:** {score:.2f}")
        st.write(f"**Feedback:** {feedback}")
        st.write("---")