Spaces:

arif670
/

O_Level_exam

Sleeping

App Files Files Community

O_Level_exam / app.py

arif670

Update app.py

0fe6418 verified about 1 year ago

raw

history blame contribute delete

3.03 kB

	import streamlit as st
	from bs4 import BeautifulSoup
	import requests
	from transformers import pipeline
	import pandas as pd

	# Load pre-trained question-answering model (replace with a suitable model)
	qa_model = pipeline("question-answering")

	@st.cache(allow_output_mutation=True)
	def fetch_past_papers(subject_code, exam_year, variant, session):
	"""Fetches past papers from the Cambridge Assessment International Education website."""
	url = f"https://www.cambridgeinternational.org/programmes-and-qualifications/cambridge-o-level/{subject_code}/past-papers-and-mark-schemes/"
	response = requests.get(url)
	soup = BeautifulSoup(response.content, "html.parser")

	# Extract relevant past paper based on exam year, variant, and session
	past_papers = soup.find_all("a", href=lambda href: href and href.startswith(f"/programmes-and-qualifications/cambridge-o-level/{subject_code}/past-papers-and-mark-schemes/{exam_year}/{session}/"))
	for paper in past_papers:
	if paper.text.strip() == f"{variant} Paper {variant}":
	return paper["href"]

	return None

	def extract_questions_and_answers(past_paper_url):
	"""Extracts questions and answers from the past paper PDF using a combination of OCR and question-answering."""
	# Replace with a suitable OCR library (e.g., PyMuPDF, Tesseract) and ensure it's installed
	# This example demonstrates the overall approach, assuming OCR functionality
	# Replace the placeholder code with actual OCR processing
	# ocr_result = ocr_process(past_paper_url) # Replace with your OCR implementation

	# Process the extracted text using the question-answering model
	questions_and_answers = []
	for paragraph in ocr_result.split("\n\n"):
	question = qa_model.question_answering(paragraph, question="What is the question?")["question"]
	if question:
	answer = qa_model.question_answering(paragraph, question=question)["answer"]
	questions_and_answers.append({"question": question, "answer": answer})

	return questions_and_answers

	def main():
	"""Streamlit app to interact with the user and display results."""
	st.title("Cambridge O-Level Exam Q&A Extractor")

	subject_code = st.text_input("Subject Code")
	exam_year = st.selectbox("Exam Year", [str(year) for year in range(2015, 2026)])
	variant = st.selectbox("Variant", ["1", "2", "3"])
	session = st.selectbox("Session", ["May-Jun", "Oct-Nov"])

	if st.button("Search"):
	past_paper_url = fetch_past_papers(subject_code, exam_year, variant, session)
	if past_paper_url:
	questions_and_answers = extract_questions_and_answers(past_paper_url)
	if questions_and_answers:
	df = pd.DataFrame(questions_and_answers)
	st.dataframe(df)
	else:
	st.error("No questions and answers found in the extracted text.")
	else:
	st.error("Past paper not found for the specified criteria.")

	if __name__ == "__main__":
	main()