Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import PyPDF2 | |
| from transformers import pipeline | |
| import random | |
| import re | |
| from io import BytesIO | |
| # Initialize models (CPU-optimized) | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1) | |
| qa_generator = pipeline("text2text-generation", model="google/flan-t5-base", device=-1) | |
| def extract_text_from_pdf(pdf_file): | |
| """Extract text from uploaded PDF""" | |
| try: | |
| # Open the file path directly | |
| with open(pdf_file.name, 'rb') as f: | |
| pdf_reader = PyPDF2.PdfReader(f) | |
| text = "" | |
| # Limit to first 10 pages for CPU performance | |
| max_pages = min(10, len(pdf_reader.pages)) | |
| for page_num in range(max_pages): | |
| text += pdf_reader.pages[page_num].extract_text() | |
| return text[:15000] # Limit tokens | |
| except Exception as e: | |
| return f"Error reading PDF: {str(e)}" | |
| def chunk_text(text, max_length=1000): | |
| """Split text into manageable chunks""" | |
| words = text.split() | |
| chunks = [] | |
| current_chunk = [] | |
| current_length = 0 | |
| for word in words: | |
| current_length += len(word) + 1 | |
| if current_length > max_length: | |
| chunks.append(" ".join(current_chunk)) | |
| current_chunk = [word] | |
| current_length = len(word) | |
| else: | |
| current_chunk.append(word) | |
| if current_chunk: | |
| chunks.append(" ".join(current_chunk)) | |
| return chunks | |
| def generate_summary(text): | |
| """Generate concise summary""" | |
| if len(text) < 100: | |
| return "Text too short to summarize." | |
| chunks = chunk_text(text, 1000) | |
| summaries = [] | |
| for chunk in chunks[:3]: # Limit chunks for CPU | |
| try: | |
| summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False) | |
| summaries.append(summary[0]['summary_text']) | |
| except: | |
| continue | |
| return "\n\n".join(summaries) if summaries else "Unable to generate summary." | |
| def generate_flashcards(text, num_cards=5): | |
| """Generate flashcards from text""" | |
| sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 20] | |
| selected = random.sample(sentences, min(num_cards, len(sentences))) | |
| flashcards = [] | |
| for i, sentence in enumerate(selected, 1): | |
| # Extract key concept (simple heuristic) | |
| words = sentence.split() | |
| if len(words) > 5: | |
| question = f"Q{i}: What is explained by: '{' '.join(words[:5])}...'?" | |
| answer = sentence | |
| flashcards.append(f"**{question}**\n\nA: {answer}\n") | |
| return "\n---\n\n".join(flashcards) if flashcards else "Unable to generate flashcards." | |
| def generate_quiz(text, num_questions=3): | |
| """Generate multiple choice quiz""" | |
| sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 30] | |
| selected = random.sample(sentences, min(num_questions, len(sentences))) | |
| quiz = [] | |
| for i, sentence in enumerate(selected, 1): | |
| prompt = f"Create a multiple choice question about: {sentence[:200]}" | |
| try: | |
| result = qa_generator(prompt, max_length=100) | |
| quiz.append(f"**Question {i}:**\n{result[0]['generated_text']}\n") | |
| except: | |
| quiz.append(f"**Question {i}:**\nBased on the text: {sentence[:150]}... (provide your answer)\n") | |
| return "\n---\n\n".join(quiz) if quiz else "Unable to generate quiz." | |
| def process_document(pdf_file, text_input, features): | |
| """Main processing function""" | |
| # Get text from PDF or text input | |
| if pdf_file is not None: | |
| text = extract_text_from_pdf(pdf_file) | |
| elif text_input.strip(): | |
| text = text_input[:15000] | |
| else: | |
| return "Please provide a PDF file or paste text.", "", "", "" | |
| if text.startswith("Error"): | |
| return text, "", "", "" | |
| # Generate outputs based on selected features | |
| summary = generate_summary(text) if "Summary" in features else "" | |
| flashcards = generate_flashcards(text) if "Flashcards" in features else "" | |
| quiz = generate_quiz(text) if "Quiz" in features else "" | |
| return text[:500] + "..." if len(text) > 500 else text, summary, flashcards, quiz | |
| # Gradio Interface | |
| with gr.Blocks(theme=gr.themes.Soft(), title="StudyForge AI") as demo: | |
| gr.Markdown(""" | |
| # π StudyForge AI - Your Intelligent Study Companion | |
| Transform any textbook chapter or notes into summaries, flashcards, and practice quizzes instantly! | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_input = gr.File(label="Upload PDF (Max 10 pages)", file_types=[".pdf"]) | |
| text_input = gr.Textbox(label="Or Paste Text Here", lines=5, placeholder="Paste your study material...") | |
| features = gr.CheckboxGroup( | |
| ["Summary", "Flashcards", "Quiz"], | |
| label="Select What You Need", | |
| value=["Summary", "Flashcards"] | |
| ) | |
| generate_btn = gr.Button("π Generate Study Materials", variant="primary") | |
| with gr.Column(): | |
| text_preview = gr.Textbox(label="Text Preview", lines=3) | |
| summary_output = gr.Markdown(label="Summary") | |
| flashcards_output = gr.Markdown(label="Flashcards") | |
| quiz_output = gr.Markdown(label="Practice Quiz") | |
| generate_btn.click( | |
| fn=process_document, | |
| inputs=[pdf_input, text_input, features], | |
| outputs=[text_preview, summary_output, flashcards_output, quiz_output] | |
| ) | |
| gr.Markdown(""" | |
| ### Tips: | |
| - For best results, use clear, well-formatted text (10 pages max for CPU performance) | |
| - Flashcards work best with content that has clear concepts | |
| - Processing may take 30-60 seconds on CPU | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |