Spaces:
Sleeping
Sleeping
File size: 5,810 Bytes
7588311 01dcc2d 7588311 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import gradio as gr
import PyPDF2
from transformers import pipeline
import random
import re
from io import BytesIO
# Initialize models (CPU-optimized)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)
qa_generator = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
def extract_text_from_pdf(pdf_file):
"""Extract text from uploaded PDF"""
try:
# Open the file path directly
with open(pdf_file.name, 'rb') as f:
pdf_reader = PyPDF2.PdfReader(f)
text = ""
# Limit to first 10 pages for CPU performance
max_pages = min(10, len(pdf_reader.pages))
for page_num in range(max_pages):
text += pdf_reader.pages[page_num].extract_text()
return text[:15000] # Limit tokens
except Exception as e:
return f"Error reading PDF: {str(e)}"
def chunk_text(text, max_length=1000):
"""Split text into manageable chunks"""
words = text.split()
chunks = []
current_chunk = []
current_length = 0
for word in words:
current_length += len(word) + 1
if current_length > max_length:
chunks.append(" ".join(current_chunk))
current_chunk = [word]
current_length = len(word)
else:
current_chunk.append(word)
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def generate_summary(text):
"""Generate concise summary"""
if len(text) < 100:
return "Text too short to summarize."
chunks = chunk_text(text, 1000)
summaries = []
for chunk in chunks[:3]: # Limit chunks for CPU
try:
summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
summaries.append(summary[0]['summary_text'])
except:
continue
return "\n\n".join(summaries) if summaries else "Unable to generate summary."
def generate_flashcards(text, num_cards=5):
"""Generate flashcards from text"""
sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 20]
selected = random.sample(sentences, min(num_cards, len(sentences)))
flashcards = []
for i, sentence in enumerate(selected, 1):
# Extract key concept (simple heuristic)
words = sentence.split()
if len(words) > 5:
question = f"Q{i}: What is explained by: '{' '.join(words[:5])}...'?"
answer = sentence
flashcards.append(f"**{question}**\n\nA: {answer}\n")
return "\n---\n\n".join(flashcards) if flashcards else "Unable to generate flashcards."
def generate_quiz(text, num_questions=3):
"""Generate multiple choice quiz"""
sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 30]
selected = random.sample(sentences, min(num_questions, len(sentences)))
quiz = []
for i, sentence in enumerate(selected, 1):
prompt = f"Create a multiple choice question about: {sentence[:200]}"
try:
result = qa_generator(prompt, max_length=100)
quiz.append(f"**Question {i}:**\n{result[0]['generated_text']}\n")
except:
quiz.append(f"**Question {i}:**\nBased on the text: {sentence[:150]}... (provide your answer)\n")
return "\n---\n\n".join(quiz) if quiz else "Unable to generate quiz."
def process_document(pdf_file, text_input, features):
"""Main processing function"""
# Get text from PDF or text input
if pdf_file is not None:
text = extract_text_from_pdf(pdf_file)
elif text_input.strip():
text = text_input[:15000]
else:
return "Please provide a PDF file or paste text.", "", "", ""
if text.startswith("Error"):
return text, "", "", ""
# Generate outputs based on selected features
summary = generate_summary(text) if "Summary" in features else ""
flashcards = generate_flashcards(text) if "Flashcards" in features else ""
quiz = generate_quiz(text) if "Quiz" in features else ""
return text[:500] + "..." if len(text) > 500 else text, summary, flashcards, quiz
# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(), title="StudyForge AI") as demo:
gr.Markdown("""
# π StudyForge AI - Your Intelligent Study Companion
Transform any textbook chapter or notes into summaries, flashcards, and practice quizzes instantly!
""")
with gr.Row():
with gr.Column():
pdf_input = gr.File(label="Upload PDF (Max 10 pages)", file_types=[".pdf"])
text_input = gr.Textbox(label="Or Paste Text Here", lines=5, placeholder="Paste your study material...")
features = gr.CheckboxGroup(
["Summary", "Flashcards", "Quiz"],
label="Select What You Need",
value=["Summary", "Flashcards"]
)
generate_btn = gr.Button("π Generate Study Materials", variant="primary")
with gr.Column():
text_preview = gr.Textbox(label="Text Preview", lines=3)
summary_output = gr.Markdown(label="Summary")
flashcards_output = gr.Markdown(label="Flashcards")
quiz_output = gr.Markdown(label="Practice Quiz")
generate_btn.click(
fn=process_document,
inputs=[pdf_input, text_input, features],
outputs=[text_preview, summary_output, flashcards_output, quiz_output]
)
gr.Markdown("""
### Tips:
- For best results, use clear, well-formatted text (10 pages max for CPU performance)
- Flashcards work best with content that has clear concepts
- Processing may take 30-60 seconds on CPU
""")
if __name__ == "__main__":
demo.launch() |