File size: 5,810 Bytes
7588311
 
 
 
 
 
 
 
 
 
 
 
 
 
01dcc2d
 
 
 
 
 
 
 
7588311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import gradio as gr
import PyPDF2
from transformers import pipeline
import random
import re
from io import BytesIO

# Initialize models (CPU-optimized)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)
qa_generator = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)

def extract_text_from_pdf(pdf_file):
    """Extract text from uploaded PDF"""
    try:
        # Open the file path directly
        with open(pdf_file.name, 'rb') as f:
            pdf_reader = PyPDF2.PdfReader(f)
            text = ""
            # Limit to first 10 pages for CPU performance
            max_pages = min(10, len(pdf_reader.pages))
            for page_num in range(max_pages):
                text += pdf_reader.pages[page_num].extract_text()
        return text[:15000]  # Limit tokens
    except Exception as e:
        return f"Error reading PDF: {str(e)}"

def chunk_text(text, max_length=1000):
    """Split text into manageable chunks"""
    words = text.split()
    chunks = []
    current_chunk = []
    current_length = 0
    
    for word in words:
        current_length += len(word) + 1
        if current_length > max_length:
            chunks.append(" ".join(current_chunk))
            current_chunk = [word]
            current_length = len(word)
        else:
            current_chunk.append(word)
    
    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

def generate_summary(text):
    """Generate concise summary"""
    if len(text) < 100:
        return "Text too short to summarize."
    
    chunks = chunk_text(text, 1000)
    summaries = []
    
    for chunk in chunks[:3]:  # Limit chunks for CPU
        try:
            summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
            summaries.append(summary[0]['summary_text'])
        except:
            continue
    
    return "\n\n".join(summaries) if summaries else "Unable to generate summary."

def generate_flashcards(text, num_cards=5):
    """Generate flashcards from text"""
    sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 20]
    selected = random.sample(sentences, min(num_cards, len(sentences)))
    
    flashcards = []
    for i, sentence in enumerate(selected, 1):
        # Extract key concept (simple heuristic)
        words = sentence.split()
        if len(words) > 5:
            question = f"Q{i}: What is explained by: '{' '.join(words[:5])}...'?"
            answer = sentence
            flashcards.append(f"**{question}**\n\nA: {answer}\n")
    
    return "\n---\n\n".join(flashcards) if flashcards else "Unable to generate flashcards."

def generate_quiz(text, num_questions=3):
    """Generate multiple choice quiz"""
    sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 30]
    selected = random.sample(sentences, min(num_questions, len(sentences)))
    
    quiz = []
    for i, sentence in enumerate(selected, 1):
        prompt = f"Create a multiple choice question about: {sentence[:200]}"
        try:
            result = qa_generator(prompt, max_length=100)
            quiz.append(f"**Question {i}:**\n{result[0]['generated_text']}\n")
        except:
            quiz.append(f"**Question {i}:**\nBased on the text: {sentence[:150]}... (provide your answer)\n")
    
    return "\n---\n\n".join(quiz) if quiz else "Unable to generate quiz."

def process_document(pdf_file, text_input, features):
    """Main processing function"""
    # Get text from PDF or text input
    if pdf_file is not None:
        text = extract_text_from_pdf(pdf_file)
    elif text_input.strip():
        text = text_input[:15000]
    else:
        return "Please provide a PDF file or paste text.", "", "", ""
    
    if text.startswith("Error"):
        return text, "", "", ""
    
    # Generate outputs based on selected features
    summary = generate_summary(text) if "Summary" in features else ""
    flashcards = generate_flashcards(text) if "Flashcards" in features else ""
    quiz = generate_quiz(text) if "Quiz" in features else ""
    
    return text[:500] + "..." if len(text) > 500 else text, summary, flashcards, quiz

# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(), title="StudyForge AI") as demo:
    gr.Markdown("""
    # πŸ“š StudyForge AI - Your Intelligent Study Companion
    Transform any textbook chapter or notes into summaries, flashcards, and practice quizzes instantly!
    """)
    
    with gr.Row():
        with gr.Column():
            pdf_input = gr.File(label="Upload PDF (Max 10 pages)", file_types=[".pdf"])
            text_input = gr.Textbox(label="Or Paste Text Here", lines=5, placeholder="Paste your study material...")
            
            features = gr.CheckboxGroup(
                ["Summary", "Flashcards", "Quiz"],
                label="Select What You Need",
                value=["Summary", "Flashcards"]
            )
            
            generate_btn = gr.Button("πŸš€ Generate Study Materials", variant="primary")
        
        with gr.Column():
            text_preview = gr.Textbox(label="Text Preview", lines=3)
            summary_output = gr.Markdown(label="Summary")
            flashcards_output = gr.Markdown(label="Flashcards")
            quiz_output = gr.Markdown(label="Practice Quiz")
    
    generate_btn.click(
        fn=process_document,
        inputs=[pdf_input, text_input, features],
        outputs=[text_preview, summary_output, flashcards_output, quiz_output]
    )
    
    gr.Markdown("""
    ### Tips:
    - For best results, use clear, well-formatted text (10 pages max for CPU performance)
    - Flashcards work best with content that has clear concepts
    - Processing may take 30-60 seconds on CPU
    """)

if __name__ == "__main__":
    demo.launch()