shaheerawan3 commited on
Commit
7588311
·
verified ·
1 Parent(s): 49f2b49

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import PyPDF2
3
+ from transformers import pipeline
4
+ import random
5
+ import re
6
+ from io import BytesIO
7
+
8
+ # Initialize models (CPU-optimized)
9
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)
10
+ qa_generator = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
11
+
12
+ def extract_text_from_pdf(pdf_file):
13
+ """Extract text from uploaded PDF"""
14
+ try:
15
+ pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_file))
16
+ text = ""
17
+ # Limit to first 10 pages for CPU performance
18
+ max_pages = min(10, len(pdf_reader.pages))
19
+ for page_num in range(max_pages):
20
+ text += pdf_reader.pages[page_num].extract_text()
21
+ return text[:15000] # Limit tokens
22
+ except Exception as e:
23
+ return f"Error reading PDF: {str(e)}"
24
+
25
+ def chunk_text(text, max_length=1000):
26
+ """Split text into manageable chunks"""
27
+ words = text.split()
28
+ chunks = []
29
+ current_chunk = []
30
+ current_length = 0
31
+
32
+ for word in words:
33
+ current_length += len(word) + 1
34
+ if current_length > max_length:
35
+ chunks.append(" ".join(current_chunk))
36
+ current_chunk = [word]
37
+ current_length = len(word)
38
+ else:
39
+ current_chunk.append(word)
40
+
41
+ if current_chunk:
42
+ chunks.append(" ".join(current_chunk))
43
+ return chunks
44
+
45
+ def generate_summary(text):
46
+ """Generate concise summary"""
47
+ if len(text) < 100:
48
+ return "Text too short to summarize."
49
+
50
+ chunks = chunk_text(text, 1000)
51
+ summaries = []
52
+
53
+ for chunk in chunks[:3]: # Limit chunks for CPU
54
+ try:
55
+ summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
56
+ summaries.append(summary[0]['summary_text'])
57
+ except:
58
+ continue
59
+
60
+ return "\n\n".join(summaries) if summaries else "Unable to generate summary."
61
+
62
+ def generate_flashcards(text, num_cards=5):
63
+ """Generate flashcards from text"""
64
+ sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 20]
65
+ selected = random.sample(sentences, min(num_cards, len(sentences)))
66
+
67
+ flashcards = []
68
+ for i, sentence in enumerate(selected, 1):
69
+ # Extract key concept (simple heuristic)
70
+ words = sentence.split()
71
+ if len(words) > 5:
72
+ question = f"Q{i}: What is explained by: '{' '.join(words[:5])}...'?"
73
+ answer = sentence
74
+ flashcards.append(f"**{question}**\n\nA: {answer}\n")
75
+
76
+ return "\n---\n\n".join(flashcards) if flashcards else "Unable to generate flashcards."
77
+
78
+ def generate_quiz(text, num_questions=3):
79
+ """Generate multiple choice quiz"""
80
+ sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 30]
81
+ selected = random.sample(sentences, min(num_questions, len(sentences)))
82
+
83
+ quiz = []
84
+ for i, sentence in enumerate(selected, 1):
85
+ prompt = f"Create a multiple choice question about: {sentence[:200]}"
86
+ try:
87
+ result = qa_generator(prompt, max_length=100)
88
+ quiz.append(f"**Question {i}:**\n{result[0]['generated_text']}\n")
89
+ except:
90
+ quiz.append(f"**Question {i}:**\nBased on the text: {sentence[:150]}... (provide your answer)\n")
91
+
92
+ return "\n---\n\n".join(quiz) if quiz else "Unable to generate quiz."
93
+
94
+ def process_document(pdf_file, text_input, features):
95
+ """Main processing function"""
96
+ # Get text from PDF or text input
97
+ if pdf_file is not None:
98
+ text = extract_text_from_pdf(pdf_file)
99
+ elif text_input.strip():
100
+ text = text_input[:15000]
101
+ else:
102
+ return "Please provide a PDF file or paste text.", "", "", ""
103
+
104
+ if text.startswith("Error"):
105
+ return text, "", "", ""
106
+
107
+ # Generate outputs based on selected features
108
+ summary = generate_summary(text) if "Summary" in features else ""
109
+ flashcards = generate_flashcards(text) if "Flashcards" in features else ""
110
+ quiz = generate_quiz(text) if "Quiz" in features else ""
111
+
112
+ return text[:500] + "..." if len(text) > 500 else text, summary, flashcards, quiz
113
+
114
+ # Gradio Interface
115
+ with gr.Blocks(theme=gr.themes.Soft(), title="StudyForge AI") as demo:
116
+ gr.Markdown("""
117
+ # 📚 StudyForge AI - Your Intelligent Study Companion
118
+ Transform any textbook chapter or notes into summaries, flashcards, and practice quizzes instantly!
119
+ """)
120
+
121
+ with gr.Row():
122
+ with gr.Column():
123
+ pdf_input = gr.File(label="Upload PDF (Max 10 pages)", file_types=[".pdf"])
124
+ text_input = gr.Textbox(label="Or Paste Text Here", lines=5, placeholder="Paste your study material...")
125
+
126
+ features = gr.CheckboxGroup(
127
+ ["Summary", "Flashcards", "Quiz"],
128
+ label="Select What You Need",
129
+ value=["Summary", "Flashcards"]
130
+ )
131
+
132
+ generate_btn = gr.Button("🚀 Generate Study Materials", variant="primary")
133
+
134
+ with gr.Column():
135
+ text_preview = gr.Textbox(label="Text Preview", lines=3)
136
+ summary_output = gr.Markdown(label="Summary")
137
+ flashcards_output = gr.Markdown(label="Flashcards")
138
+ quiz_output = gr.Markdown(label="Practice Quiz")
139
+
140
+ generate_btn.click(
141
+ fn=process_document,
142
+ inputs=[pdf_input, text_input, features],
143
+ outputs=[text_preview, summary_output, flashcards_output, quiz_output]
144
+ )
145
+
146
+ gr.Markdown("""
147
+ ### Tips:
148
+ - For best results, use clear, well-formatted text (10 pages max for CPU performance)
149
+ - Flashcards work best with content that has clear concepts
150
+ - Processing may take 30-60 seconds on CPU
151
+ """)
152
+
153
+ if __name__ == "__main__":
154
+ demo.launch()