Spaces:

Zeeshan24
/

Exam-Checker

Build error

App Files Files Community

Exam-Checker / app.py

Zeeshan24

Update app.py

41a5bcd verified about 1 year ago

raw

history blame contribute delete

3.61 kB

	import streamlit as st
	from fpdf import FPDF
	import PyPDF2
	import pytesseract
	from pdf2image import convert_from_path
	from transformers import pipeline
	import re
	import io

	# Load pre-trained model for question-answering
	qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

	# Extract text from PDF (text-based and image-based)
	def extract_text_from_pdf(pdf_path):
	with open(pdf_path, 'rb') as file:
	reader = PyPDF2.PdfReader(file)
	text = ''
	for page in reader.pages:
	text += page.extract_text()
	return text

	def extract_text_from_image_pdf(pdf_path):
	images = convert_from_path(pdf_path)
	text = ''
	for image in images:
	text += pytesseract.image_to_string(image)
	return text

	# Process the extracted text
	def preprocess_text(text):
	text = re.sub(r'\s+', ' ', text) # Clean up spaces
	text = re.sub(r'\n', ' ', text) # Clean up newlines
	return text.strip()

	# Grading function using the question-answering model
	def grade_answer(question, student_answer):
	result = qa_pipeline(question=question, context=student_answer)
	answer_score = result['score']
	if answer_score > 0.5:
	return answer_score, "Correct"
	else:
	return answer_score, "Incorrect"

	# Function to extract student name from text
	def extract_student_name(text):
	match = re.search(r"Name\s[:\|-]?\s([\w\s]+)", text)
	if match:
	return match.group(1).strip()
	return "Unknown Student"

	# Function to extract questions from the text
	def extract_questions_from_text(text):
	# Improved logic: extract sentences ending with '?' or "Question: [text]"
	questions = re.findall(r'(Question\s[:\|-]?\s[\w\s\?]+)', text) # Extract questions starting with "Question:"
	questions += re.findall(r'([^.]*\?)', text) # Also extract any sentence ending with "?"

	# Remove duplicates and metadata like 'Name', 'Roll No', etc.
	questions = list(set(questions)) # Remove duplicates
	questions = [q for q in questions if not any(keyword in q.lower() for keyword in ['name', 'roll no', 'school'])]

	return questions

	# Streamlit Interface
	st.title('Student Answer Grading System')
	st.write('Upload a PDF containing student details and their answers.')

	# Upload file
	uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

	if uploaded_file is not None:
	# Save uploaded file temporarily
	with open("uploaded_file.pdf", "wb") as f:
	f.write(uploaded_file.getbuffer())

	# Extract text from the uploaded PDF
	text = extract_text_from_pdf("uploaded_file.pdf")
	if not text: # If no text extracted, try OCR
	text = extract_text_from_image_pdf("uploaded_file.pdf")

	# Print the extracted text to manually check what was extracted (optional)
	st.subheader("Extracted Text:")
	st.text(text)

	# Preprocess text
	preprocessed_text = preprocess_text(text)

	# Extract student name and questions
	student_name = extract_student_name(text)
	questions = extract_questions_from_text(text)

	# Display student name
	st.subheader(f"Student Name: {student_name}")

	# Results
	results = {}
	for question in questions:
	score, feedback = grade_answer(question, preprocessed_text)
	results[question] = {"score": score, "feedback": feedback}

	# Display results
	for question, result in results.items():
	st.write(f"Question: {question}")
	st.write(f"Score: {result['score']:.2f}")
	st.write(f"Feedback: {result['feedback']}")
	st.write("---")