Spaces:
Build error
Build error
| import streamlit as st | |
| from fpdf import FPDF | |
| import PyPDF2 | |
| import pytesseract | |
| from pdf2image import convert_from_path | |
| from transformers import pipeline | |
| import re | |
| import io | |
| # Load pre-trained model for question-answering | |
| qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad") | |
| # Extract text from PDF (text-based and image-based) | |
| def extract_text_from_pdf(pdf_path): | |
| with open(pdf_path, 'rb') as file: | |
| reader = PyPDF2.PdfReader(file) | |
| text = '' | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| def extract_text_from_image_pdf(pdf_path): | |
| images = convert_from_path(pdf_path) | |
| text = '' | |
| for image in images: | |
| text += pytesseract.image_to_string(image) | |
| return text | |
| # Process the extracted text | |
| def preprocess_text(text): | |
| text = re.sub(r'\s+', ' ', text) # Clean up spaces | |
| text = re.sub(r'\n', ' ', text) # Clean up newlines | |
| return text.strip() | |
| # Grading function using the question-answering model | |
| def grade_answer(question, student_answer): | |
| result = qa_pipeline(question=question, context=student_answer) | |
| answer_score = result['score'] | |
| if answer_score > 0.5: | |
| return answer_score, "Correct" | |
| else: | |
| return answer_score, "Incorrect" | |
| # Function to extract student name from text | |
| def extract_student_name(text): | |
| match = re.search(r"Name\s*[:|-]?\s*([\w\s]+)", text) | |
| if match: | |
| return match.group(1).strip() | |
| return "Unknown Student" | |
| # Function to extract questions from the text | |
| def extract_questions_from_text(text): | |
| # Improved logic: extract sentences ending with '?' or "Question: [text]" | |
| questions = re.findall(r'(Question\s*[:|-]?\s*[\w\s\?]+)', text) # Extract questions starting with "Question:" | |
| questions += re.findall(r'([^.]*\?)', text) # Also extract any sentence ending with "?" | |
| # Remove duplicates and metadata like 'Name', 'Roll No', etc. | |
| questions = list(set(questions)) # Remove duplicates | |
| questions = [q for q in questions if not any(keyword in q.lower() for keyword in ['name', 'roll no', 'school'])] | |
| return questions | |
| # Streamlit Interface | |
| st.title('Student Answer Grading System') | |
| st.write('Upload a PDF containing student details and their answers.') | |
| # Upload file | |
| uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
| if uploaded_file is not None: | |
| # Save uploaded file temporarily | |
| with open("uploaded_file.pdf", "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| # Extract text from the uploaded PDF | |
| text = extract_text_from_pdf("uploaded_file.pdf") | |
| if not text: # If no text extracted, try OCR | |
| text = extract_text_from_image_pdf("uploaded_file.pdf") | |
| # Print the extracted text to manually check what was extracted (optional) | |
| st.subheader("Extracted Text:") | |
| st.text(text) | |
| # Preprocess text | |
| preprocessed_text = preprocess_text(text) | |
| # Extract student name and questions | |
| student_name = extract_student_name(text) | |
| questions = extract_questions_from_text(text) | |
| # Display student name | |
| st.subheader(f"Student Name: {student_name}") | |
| # Results | |
| results = {} | |
| for question in questions: | |
| score, feedback = grade_answer(question, preprocessed_text) | |
| results[question] = {"score": score, "feedback": feedback} | |
| # Display results | |
| for question, result in results.items(): | |
| st.write(f"**Question**: {question}") | |
| st.write(f"**Score**: {result['score']:.2f}") | |
| st.write(f"**Feedback**: {result['feedback']}") | |
| st.write("---") | |