Exam-Checker / app.py
Zeeshan24's picture
Update app.py
41a5bcd verified
import streamlit as st
from fpdf import FPDF
import PyPDF2
import pytesseract
from pdf2image import convert_from_path
from transformers import pipeline
import re
import io
# Load pre-trained model for question-answering
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
# Extract text from PDF (text-based and image-based)
def extract_text_from_pdf(pdf_path):
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
text = ''
for page in reader.pages:
text += page.extract_text()
return text
def extract_text_from_image_pdf(pdf_path):
images = convert_from_path(pdf_path)
text = ''
for image in images:
text += pytesseract.image_to_string(image)
return text
# Process the extracted text
def preprocess_text(text):
text = re.sub(r'\s+', ' ', text) # Clean up spaces
text = re.sub(r'\n', ' ', text) # Clean up newlines
return text.strip()
# Grading function using the question-answering model
def grade_answer(question, student_answer):
result = qa_pipeline(question=question, context=student_answer)
answer_score = result['score']
if answer_score > 0.5:
return answer_score, "Correct"
else:
return answer_score, "Incorrect"
# Function to extract student name from text
def extract_student_name(text):
match = re.search(r"Name\s*[:|-]?\s*([\w\s]+)", text)
if match:
return match.group(1).strip()
return "Unknown Student"
# Function to extract questions from the text
def extract_questions_from_text(text):
# Improved logic: extract sentences ending with '?' or "Question: [text]"
questions = re.findall(r'(Question\s*[:|-]?\s*[\w\s\?]+)', text) # Extract questions starting with "Question:"
questions += re.findall(r'([^.]*\?)', text) # Also extract any sentence ending with "?"
# Remove duplicates and metadata like 'Name', 'Roll No', etc.
questions = list(set(questions)) # Remove duplicates
questions = [q for q in questions if not any(keyword in q.lower() for keyword in ['name', 'roll no', 'school'])]
return questions
# Streamlit Interface
st.title('Student Answer Grading System')
st.write('Upload a PDF containing student details and their answers.')
# Upload file
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
# Save uploaded file temporarily
with open("uploaded_file.pdf", "wb") as f:
f.write(uploaded_file.getbuffer())
# Extract text from the uploaded PDF
text = extract_text_from_pdf("uploaded_file.pdf")
if not text: # If no text extracted, try OCR
text = extract_text_from_image_pdf("uploaded_file.pdf")
# Print the extracted text to manually check what was extracted (optional)
st.subheader("Extracted Text:")
st.text(text)
# Preprocess text
preprocessed_text = preprocess_text(text)
# Extract student name and questions
student_name = extract_student_name(text)
questions = extract_questions_from_text(text)
# Display student name
st.subheader(f"Student Name: {student_name}")
# Results
results = {}
for question in questions:
score, feedback = grade_answer(question, preprocessed_text)
results[question] = {"score": score, "feedback": feedback}
# Display results
for question, result in results.items():
st.write(f"**Question**: {question}")
st.write(f"**Score**: {result['score']:.2f}")
st.write(f"**Feedback**: {result['feedback']}")
st.write("---")