Spaces:

Zeeshan24
/

Question-Checker

Build error

App Files Files Community

Question-Checker / app.py

Zeeshan24

Update app.py

4e00f7b verified about 1 year ago

raw

history blame contribute delete

3.76 kB

	import streamlit as st
	import cv2
	import numpy as np
	from PIL import Image
	import pytesseract
	from transformers import TrOCRProcessor, VisionEncoderDecoderModel, pipeline
	import re

	# Load TrOCR model for handwriting recognition
	processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
	model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

	# Load pre-trained QA model for grading
	qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

	# Function to preprocess the image
	def preprocess_image(image_file):
	image = np.array(Image.open(image_file).convert("RGB"))
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	blurred = cv2.GaussianBlur(gray, (5, 5), 0)
	thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
	preprocessed_image = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB)
	return Image.fromarray(preprocessed_image)

	# Function to extract text using Tesseract OCR
	def extract_text_with_tesseract(image):
	return pytesseract.image_to_string(image)

	# Function to extract text using TrOCR
	def extract_text_with_trocr(image):
	pixel_values = processor(images=image, return_tensors="pt").pixel_values
	generated_ids = model.generate(pixel_values)
	extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return extracted_text

	# Extract student name and roll number
	def extract_student_info(text):
	name = re.search(r"NAME\s=\s([\w\s]+)", text, re.IGNORECASE)
	roll_no = re.search(r"Roll\sNO\s=\s*(\d+)", text, re.IGNORECASE)
	student_name = name.group(1).strip() if name else "Unknown"
	roll_number = roll_no.group(1).strip() if roll_no else "Unknown"
	return student_name, roll_number

	# Extract questions from the text
	def extract_questions_from_text(text):
	questions = re.findall(r'(?:[^\n]*\?)', text)
	return questions

	# Grade answers
	def grade_answer(question, context):
	result = qa_pipeline(question=question, context=context)
	return result['score'], "Correct" if result['score'] > 0.5 else "Incorrect"

	# Streamlit App
	st.title("Handwritten Answer Sheet Grading System")
	st.write("Upload an image or handwritten file to process.")

	# Upload image
	uploaded_image = st.file_uploader("Upload Handwritten Image", type=["png", "jpg", "jpeg"])

	if uploaded_image:
	st.image(uploaded_image, caption="Original Image", use_container_width=True)

	# Preprocess the image
	preprocessed_image = preprocess_image(uploaded_image)
	st.image(preprocessed_image, caption="Preprocessed Image", use_container_width=True)

	# Attempt text extraction with Tesseract
	st.subheader("Extracted Text:")
	tesseract_text = extract_text_with_tesseract(preprocessed_image)
	if len(tesseract_text.strip()) > 10:
	extracted_text = tesseract_text # Use Tesseract output if it seems valid
	else:
	extracted_text = extract_text_with_trocr(preprocessed_image) # Use TrOCR fallback

	st.text(extracted_text)

	# Extract student info
	student_name, roll_number = extract_student_info(extracted_text)
	st.subheader(f"Student Name: {student_name}")
	st.subheader(f"Roll No: {roll_number}")

	# Extract questions
	questions = extract_questions_from_text(extracted_text)
	st.subheader("Extracted Questions")
	for i, question in enumerate(questions):
	st.write(f"Q{i+1}: {question}")

	# Grade the answers
	st.subheader("Grading Results")
	for question in questions:
	score, feedback = grade_answer(question, extracted_text)
	st.write(f"Question: {question}")
	st.write(f"Score: {score:.2f}")
	st.write(f"Feedback: {feedback}")
	st.write("---")