Zeeshan24's picture
Update app.py
4e00f7b verified
import streamlit as st
import cv2
import numpy as np
from PIL import Image
import pytesseract
from transformers import TrOCRProcessor, VisionEncoderDecoderModel, pipeline
import re
# Load TrOCR model for handwriting recognition
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
# Load pre-trained QA model for grading
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
# Function to preprocess the image
def preprocess_image(image_file):
image = np.array(Image.open(image_file).convert("RGB"))
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
preprocessed_image = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB)
return Image.fromarray(preprocessed_image)
# Function to extract text using Tesseract OCR
def extract_text_with_tesseract(image):
return pytesseract.image_to_string(image)
# Function to extract text using TrOCR
def extract_text_with_trocr(image):
pixel_values = processor(images=image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return extracted_text
# Extract student name and roll number
def extract_student_info(text):
name = re.search(r"NAME\s*=\s*([\w\s]+)", text, re.IGNORECASE)
roll_no = re.search(r"Roll\s*NO\s*=\s*(\d+)", text, re.IGNORECASE)
student_name = name.group(1).strip() if name else "Unknown"
roll_number = roll_no.group(1).strip() if roll_no else "Unknown"
return student_name, roll_number
# Extract questions from the text
def extract_questions_from_text(text):
questions = re.findall(r'(?:[^\n]*\?)', text)
return questions
# Grade answers
def grade_answer(question, context):
result = qa_pipeline(question=question, context=context)
return result['score'], "Correct" if result['score'] > 0.5 else "Incorrect"
# Streamlit App
st.title("Handwritten Answer Sheet Grading System")
st.write("Upload an image or handwritten file to process.")
# Upload image
uploaded_image = st.file_uploader("Upload Handwritten Image", type=["png", "jpg", "jpeg"])
if uploaded_image:
st.image(uploaded_image, caption="Original Image", use_container_width=True)
# Preprocess the image
preprocessed_image = preprocess_image(uploaded_image)
st.image(preprocessed_image, caption="Preprocessed Image", use_container_width=True)
# Attempt text extraction with Tesseract
st.subheader("Extracted Text:")
tesseract_text = extract_text_with_tesseract(preprocessed_image)
if len(tesseract_text.strip()) > 10:
extracted_text = tesseract_text # Use Tesseract output if it seems valid
else:
extracted_text = extract_text_with_trocr(preprocessed_image) # Use TrOCR fallback
st.text(extracted_text)
# Extract student info
student_name, roll_number = extract_student_info(extracted_text)
st.subheader(f"Student Name: {student_name}")
st.subheader(f"Roll No: {roll_number}")
# Extract questions
questions = extract_questions_from_text(extracted_text)
st.subheader("Extracted Questions")
for i, question in enumerate(questions):
st.write(f"Q{i+1}: {question}")
# Grade the answers
st.subheader("Grading Results")
for question in questions:
score, feedback = grade_answer(question, extracted_text)
st.write(f"**Question:** {question}")
st.write(f"**Score:** {score:.2f}")
st.write(f"**Feedback:** {feedback}")
st.write("---")