import streamlit as st import fitz # PyMuPDF import os import re # Configure Streamlit page st.set_page_config(page_title="PDF Extractor", layout="centered") # Custom Styling st.markdown( """ """, unsafe_allow_html=True, ) # Page title st.markdown("

📄 PDF Extractor

", unsafe_allow_html=True) # File uploader uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) # Selection: Summarize or Generate MCQs & Key Points task = st.radio("Select Task:", ["Summarize PDF", "Generate MCQs, Key Points, and Important Questions"]) # Function to extract text from PDF def extract_text_from_pdf(pdf_path): doc = fitz.open(pdf_path) text = "" for page in doc: text += page.get_text("text") + "\n" return text.strip() # Function for simple text summarization (basic method) def simple_summarize(text): sentences = text.split(". ") summary = ". ".join(sentences[:5]) # Take the first 5 sentences as a simple summary return summary + "..." if len(sentences) > 5 else summary # Function to extract only key points, MCQs, and important questions def extract_relevant_info(text): key_points = [] mcqs = [] important_questions = [] # Define patterns mcq_pattern = r"^[A-D]\)" # Example: A) Option 1 question_pattern = r"^(What|Which|How|Why|When|Who|Where|Explain|Describe)\b" bullet_point_pattern = r"^(•|-|\*)\s" lines = text.split("\n") for line in lines: line = line.strip() # Extract MCQs if re.match(mcq_pattern, line): mcqs.append(line) # Extract Important Questions elif re.match(question_pattern, line, re.IGNORECASE): important_questions.append(line) # Extract Key Points (Bullets or Short Sentences) elif re.match(bullet_point_pattern, line) or (len(line) < 150 and "." in line): key_points.append(line) return key_points, mcqs, important_questions # Extract Data Button if uploaded_file: extract_button = st.button("🚀 Extract Data", use_container_width=True) if extract_button: with st.spinner("Processing your PDF..."): temp_path = "temp.pdf" with open(temp_path, "wb") as f: f.write(uploaded_file.getbuffer()) extracted_text = extract_text_from_pdf(temp_path) os.remove(temp_path) # Perform selected task if task == "Summarize PDF": st.subheader("📖 Summary") summary = simple_summarize(extracted_text) st.write(summary) elif task == "Generate MCQs, Key Points, and Important Questions": key_points, mcqs, important_questions = extract_relevant_info(extracted_text) col1, col2 = st.columns(2) with col1: if key_points: st.subheader("📌 Key Points") for point in key_points: st.write(f"- {point}") with col2: if mcqs: st.subheader("❓ MCQs") for question in mcqs: st.write(f"- {question}") if important_questions: st.subheader("❓ Important Questions") for question in important_questions: st.write(f"- {question}") else: st.warning("⚠️ Please upload a PDF file first.")