import gradio as gr import PyPDF2 from transformers import pipeline import random import re from io import BytesIO # Initialize models (CPU-optimized) summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1) qa_generator = pipeline("text2text-generation", model="google/flan-t5-base", device=-1) def extract_text_from_pdf(pdf_file): """Extract text from uploaded PDF""" try: # Open the file path directly with open(pdf_file.name, 'rb') as f: pdf_reader = PyPDF2.PdfReader(f) text = "" # Limit to first 10 pages for CPU performance max_pages = min(10, len(pdf_reader.pages)) for page_num in range(max_pages): text += pdf_reader.pages[page_num].extract_text() return text[:15000] # Limit tokens except Exception as e: return f"Error reading PDF: {str(e)}" def chunk_text(text, max_length=1000): """Split text into manageable chunks""" words = text.split() chunks = [] current_chunk = [] current_length = 0 for word in words: current_length += len(word) + 1 if current_length > max_length: chunks.append(" ".join(current_chunk)) current_chunk = [word] current_length = len(word) else: current_chunk.append(word) if current_chunk: chunks.append(" ".join(current_chunk)) return chunks def generate_summary(text): """Generate concise summary""" if len(text) < 100: return "Text too short to summarize." chunks = chunk_text(text, 1000) summaries = [] for chunk in chunks[:3]: # Limit chunks for CPU try: summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False) summaries.append(summary[0]['summary_text']) except: continue return "\n\n".join(summaries) if summaries else "Unable to generate summary." def generate_flashcards(text, num_cards=5): """Generate flashcards from text""" sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 20] selected = random.sample(sentences, min(num_cards, len(sentences))) flashcards = [] for i, sentence in enumerate(selected, 1): # Extract key concept (simple heuristic) words = sentence.split() if len(words) > 5: question = f"Q{i}: What is explained by: '{' '.join(words[:5])}...'?" answer = sentence flashcards.append(f"**{question}**\n\nA: {answer}\n") return "\n---\n\n".join(flashcards) if flashcards else "Unable to generate flashcards." def generate_quiz(text, num_questions=3): """Generate multiple choice quiz""" sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 30] selected = random.sample(sentences, min(num_questions, len(sentences))) quiz = [] for i, sentence in enumerate(selected, 1): prompt = f"Create a multiple choice question about: {sentence[:200]}" try: result = qa_generator(prompt, max_length=100) quiz.append(f"**Question {i}:**\n{result[0]['generated_text']}\n") except: quiz.append(f"**Question {i}:**\nBased on the text: {sentence[:150]}... (provide your answer)\n") return "\n---\n\n".join(quiz) if quiz else "Unable to generate quiz." def process_document(pdf_file, text_input, features): """Main processing function""" # Get text from PDF or text input if pdf_file is not None: text = extract_text_from_pdf(pdf_file) elif text_input.strip(): text = text_input[:15000] else: return "Please provide a PDF file or paste text.", "", "", "" if text.startswith("Error"): return text, "", "", "" # Generate outputs based on selected features summary = generate_summary(text) if "Summary" in features else "" flashcards = generate_flashcards(text) if "Flashcards" in features else "" quiz = generate_quiz(text) if "Quiz" in features else "" return text[:500] + "..." if len(text) > 500 else text, summary, flashcards, quiz # Gradio Interface with gr.Blocks(theme=gr.themes.Soft(), title="StudyForge AI") as demo: gr.Markdown(""" # 📚 StudyForge AI - Your Intelligent Study Companion Transform any textbook chapter or notes into summaries, flashcards, and practice quizzes instantly! """) with gr.Row(): with gr.Column(): pdf_input = gr.File(label="Upload PDF (Max 10 pages)", file_types=[".pdf"]) text_input = gr.Textbox(label="Or Paste Text Here", lines=5, placeholder="Paste your study material...") features = gr.CheckboxGroup( ["Summary", "Flashcards", "Quiz"], label="Select What You Need", value=["Summary", "Flashcards"] ) generate_btn = gr.Button("🚀 Generate Study Materials", variant="primary") with gr.Column(): text_preview = gr.Textbox(label="Text Preview", lines=3) summary_output = gr.Markdown(label="Summary") flashcards_output = gr.Markdown(label="Flashcards") quiz_output = gr.Markdown(label="Practice Quiz") generate_btn.click( fn=process_document, inputs=[pdf_input, text_input, features], outputs=[text_preview, summary_output, flashcards_output, quiz_output] ) gr.Markdown(""" ### Tips: - For best results, use clear, well-formatted text (10 pages max for CPU performance) - Flashcards work best with content that has clear concepts - Processing may take 30-60 seconds on CPU """) if __name__ == "__main__": demo.launch()