shaheerawan3's picture
Update app.py
01dcc2d verified
import gradio as gr
import PyPDF2
from transformers import pipeline
import random
import re
from io import BytesIO
# Initialize models (CPU-optimized)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)
qa_generator = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
def extract_text_from_pdf(pdf_file):
"""Extract text from uploaded PDF"""
try:
# Open the file path directly
with open(pdf_file.name, 'rb') as f:
pdf_reader = PyPDF2.PdfReader(f)
text = ""
# Limit to first 10 pages for CPU performance
max_pages = min(10, len(pdf_reader.pages))
for page_num in range(max_pages):
text += pdf_reader.pages[page_num].extract_text()
return text[:15000] # Limit tokens
except Exception as e:
return f"Error reading PDF: {str(e)}"
def chunk_text(text, max_length=1000):
"""Split text into manageable chunks"""
words = text.split()
chunks = []
current_chunk = []
current_length = 0
for word in words:
current_length += len(word) + 1
if current_length > max_length:
chunks.append(" ".join(current_chunk))
current_chunk = [word]
current_length = len(word)
else:
current_chunk.append(word)
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def generate_summary(text):
"""Generate concise summary"""
if len(text) < 100:
return "Text too short to summarize."
chunks = chunk_text(text, 1000)
summaries = []
for chunk in chunks[:3]: # Limit chunks for CPU
try:
summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
summaries.append(summary[0]['summary_text'])
except:
continue
return "\n\n".join(summaries) if summaries else "Unable to generate summary."
def generate_flashcards(text, num_cards=5):
"""Generate flashcards from text"""
sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 20]
selected = random.sample(sentences, min(num_cards, len(sentences)))
flashcards = []
for i, sentence in enumerate(selected, 1):
# Extract key concept (simple heuristic)
words = sentence.split()
if len(words) > 5:
question = f"Q{i}: What is explained by: '{' '.join(words[:5])}...'?"
answer = sentence
flashcards.append(f"**{question}**\n\nA: {answer}\n")
return "\n---\n\n".join(flashcards) if flashcards else "Unable to generate flashcards."
def generate_quiz(text, num_questions=3):
"""Generate multiple choice quiz"""
sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 30]
selected = random.sample(sentences, min(num_questions, len(sentences)))
quiz = []
for i, sentence in enumerate(selected, 1):
prompt = f"Create a multiple choice question about: {sentence[:200]}"
try:
result = qa_generator(prompt, max_length=100)
quiz.append(f"**Question {i}:**\n{result[0]['generated_text']}\n")
except:
quiz.append(f"**Question {i}:**\nBased on the text: {sentence[:150]}... (provide your answer)\n")
return "\n---\n\n".join(quiz) if quiz else "Unable to generate quiz."
def process_document(pdf_file, text_input, features):
"""Main processing function"""
# Get text from PDF or text input
if pdf_file is not None:
text = extract_text_from_pdf(pdf_file)
elif text_input.strip():
text = text_input[:15000]
else:
return "Please provide a PDF file or paste text.", "", "", ""
if text.startswith("Error"):
return text, "", "", ""
# Generate outputs based on selected features
summary = generate_summary(text) if "Summary" in features else ""
flashcards = generate_flashcards(text) if "Flashcards" in features else ""
quiz = generate_quiz(text) if "Quiz" in features else ""
return text[:500] + "..." if len(text) > 500 else text, summary, flashcards, quiz
# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(), title="StudyForge AI") as demo:
gr.Markdown("""
# πŸ“š StudyForge AI - Your Intelligent Study Companion
Transform any textbook chapter or notes into summaries, flashcards, and practice quizzes instantly!
""")
with gr.Row():
with gr.Column():
pdf_input = gr.File(label="Upload PDF (Max 10 pages)", file_types=[".pdf"])
text_input = gr.Textbox(label="Or Paste Text Here", lines=5, placeholder="Paste your study material...")
features = gr.CheckboxGroup(
["Summary", "Flashcards", "Quiz"],
label="Select What You Need",
value=["Summary", "Flashcards"]
)
generate_btn = gr.Button("πŸš€ Generate Study Materials", variant="primary")
with gr.Column():
text_preview = gr.Textbox(label="Text Preview", lines=3)
summary_output = gr.Markdown(label="Summary")
flashcards_output = gr.Markdown(label="Flashcards")
quiz_output = gr.Markdown(label="Practice Quiz")
generate_btn.click(
fn=process_document,
inputs=[pdf_input, text_input, features],
outputs=[text_preview, summary_output, flashcards_output, quiz_output]
)
gr.Markdown("""
### Tips:
- For best results, use clear, well-formatted text (10 pages max for CPU performance)
- Flashcards work best with content that has clear concepts
- Processing may take 30-60 seconds on CPU
""")
if __name__ == "__main__":
demo.launch()