Azidan commited on
Commit
641953a
Β·
verified Β·
1 Parent(s): af3ae44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -45
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import re
3
  from transformers import pipeline, AutoTokenizer
4
  from PyPDF2 import PdfReader
 
5
 
6
  # =========================
7
  # Model setup (CPU-safe)
@@ -15,20 +16,20 @@ summarizer = pipeline(
15
  device=-1 # CPU only
16
  )
17
 
18
- # Better AI advice generator - use flan-t5-base for improved quality (still CPU-friendly)
19
  advice_generator = pipeline(
20
  "text2text-generation",
21
  model="google/flan-t5-base",
22
  device=-1 # CPU only
23
  )
24
 
25
- CHUNK_SIZE = 900 # safe margin for summarizer
26
 
27
  # =========================
28
  # Utilities
29
  # =========================
30
  def clean_text(text: str) -> str:
31
- """Fix quotes, spacing, repetition, and broken punctuation."""
32
  text = text.replace("β€˜", "'").replace("’", "'")
33
  text = text.replace("β€œ", '"').replace("”", '"')
34
  text = re.sub(r"[.]{2,}", ".", text)
@@ -55,11 +56,9 @@ def chunk_text(text: str):
55
  return chunks
56
 
57
  def generate_ai_advice(summary: str) -> str:
58
- """Use AI to generate personalized study advice based on the summary."""
59
- # Truncate summary if too long
60
- truncated_summary = summary[:1000] # Slightly longer for base model
61
 
62
- # Improved prompt for better, more relevant tips
63
  prompt = (
64
  f"Read this summary of a technical paper: '{truncated_summary}'\n\n"
65
  "Generate exactly 5 practical study tips for a student to better understand and retain this content. "
@@ -72,26 +71,25 @@ def generate_ai_advice(summary: str) -> str:
72
  prompt,
73
  max_length=250,
74
  num_return_sequences=1,
75
- do_sample=False, # Greedy for consistency
76
- temperature=0.7 # Slight creativity if needed
77
  )[0]["generated_text"]
78
 
79
- # Post-process to ensure bullet format
80
  tips = [tip.strip() for tip in generated.split('\n') if tip.strip().startswith('-') or tip.strip()]
81
- if not tips:
82
- tips = generated.split('. ') # Fallback split
83
 
84
  advice_md = "\n\n---\n\n### πŸ“š AI-Generated Study Tips\n\n"
85
  for i, tip in enumerate(tips[:5], 1):
86
  clean_tip = tip.lstrip('- ').strip()
87
  advice_md += f"- {clean_tip}\n"
88
 
89
- advice_md += "\n**Pro tip**: Combine these with spaced repetition for long-term mastery!"
90
-
91
  return advice_md
92
 
93
  def summarize_long_text(text: str) -> str:
94
- """Summarize arbitrarily long text safely + add AI study advice."""
95
  if not text or len(text.strip()) == 0:
96
  return "No text provided."
97
 
@@ -99,20 +97,21 @@ def summarize_long_text(text: str) -> str:
99
  summaries = []
100
 
101
  for chunk in chunks:
102
- summary = summarizer(
103
- chunk,
104
- max_length=150,
105
- min_length=40,
106
- do_sample=False
107
- )[0]["summary_text"]
108
- summaries.append(summary)
 
 
 
109
 
110
  merged = " ".join(summaries)
111
  cleaned_summary = clean_text(merged)
112
 
113
- # Generate AI advice based on the summary
114
  ai_advice = generate_ai_advice(cleaned_summary)
115
-
116
  return cleaned_summary + ai_advice
117
 
118
  def read_pdf(file) -> str:
@@ -122,15 +121,34 @@ def read_pdf(file) -> str:
122
  pages = [page.extract_text() or "" for page in reader.pages]
123
  return " ".join(pages)
124
  except Exception as e:
125
- return f"PDF read error: {e}"
 
 
 
 
 
 
 
 
 
126
 
127
  # =========================
128
  # Main handler
129
  # =========================
130
- def process_input(text, file):
 
 
131
  if file is not None:
132
- text = read_pdf(file)
133
- return summarize_long_text(text)
 
 
 
 
 
 
 
 
134
 
135
  # =========================
136
  # Gradio UI
@@ -138,35 +156,41 @@ def process_input(text, file):
138
  with gr.Blocks() as demo:
139
  gr.Markdown("# πŸ“„ Long Text Summarizer + AI Study Assistant")
140
  gr.Markdown(
141
- "β€’ Handles **thousands of words**\n"
142
- "β€’ Supports **PDF upload**\n"
143
- "β€’ Optimized for **CPU / free tier**\n"
144
- "β€’ Includes **AI-generated study tips** based on the summary content"
 
145
  )
146
 
147
- text_input = gr.Textbox(
148
- lines=15,
149
- label="Paste text (optional)",
150
- placeholder="Paste lecture notes, textbook chapter, article..."
151
- )
 
 
 
 
 
152
 
153
- file_input = gr.File(
154
- label="Upload PDF (optional)",
155
- file_types=[".pdf"]
156
- )
157
 
158
  output = gr.Textbox(
159
  lines=16,
160
- label="Summary + AI Study Advice",
161
- placeholder="Summary appears first, followed by AI-generated learning tips..."
162
  )
163
 
164
- summarize_btn = gr.Button("Summarize & Get AI Study Tips", variant="primary")
 
 
 
165
 
166
  summarize_btn.click(
167
  fn=process_input,
168
  inputs=[text_input, file_input],
169
- outputs=output
170
  )
171
 
172
  demo.launch()
 
2
  import re
3
  from transformers import pipeline, AutoTokenizer
4
  from PyPDF2 import PdfReader
5
+ import tempfile
6
 
7
  # =========================
8
  # Model setup (CPU-safe)
 
16
  device=-1 # CPU only
17
  )
18
 
19
+ # Better AI advice generator - flan-t5-base is still quite CPU friendly
20
  advice_generator = pipeline(
21
  "text2text-generation",
22
  model="google/flan-t5-base",
23
  device=-1 # CPU only
24
  )
25
 
26
+ CHUNK_SIZE = 900 # safe margin under typical max input
27
 
28
  # =========================
29
  # Utilities
30
  # =========================
31
  def clean_text(text: str) -> str:
32
+ """Fix quotes, spacing, repetition, broken punctuation."""
33
  text = text.replace("β€˜", "'").replace("’", "'")
34
  text = text.replace("β€œ", '"').replace("”", '"')
35
  text = re.sub(r"[.]{2,}", ".", text)
 
56
  return chunks
57
 
58
  def generate_ai_advice(summary: str) -> str:
59
+ """Generate personalized study advice based on the paper summary."""
60
+ truncated_summary = summary[:1000]
 
61
 
 
62
  prompt = (
63
  f"Read this summary of a technical paper: '{truncated_summary}'\n\n"
64
  "Generate exactly 5 practical study tips for a student to better understand and retain this content. "
 
71
  prompt,
72
  max_length=250,
73
  num_return_sequences=1,
74
+ do_sample=False,
75
+ temperature=0.7
76
  )[0]["generated_text"]
77
 
78
+ # Try to clean into bullet points
79
  tips = [tip.strip() for tip in generated.split('\n') if tip.strip().startswith('-') or tip.strip()]
80
+ if not tips or len(tips) < 3:
81
+ tips = [t.strip() for t in generated.split('.') if t.strip()]
82
 
83
  advice_md = "\n\n---\n\n### πŸ“š AI-Generated Study Tips\n\n"
84
  for i, tip in enumerate(tips[:5], 1):
85
  clean_tip = tip.lstrip('- ').strip()
86
  advice_md += f"- {clean_tip}\n"
87
 
88
+ advice_md += "\n**Pro tip**: Combine these with spaced repetition (Anki / Quizlet) for long-term retention!"
 
89
  return advice_md
90
 
91
  def summarize_long_text(text: str) -> str:
92
+ """Summarize long text in chunks + add AI study advice."""
93
  if not text or len(text.strip()) == 0:
94
  return "No text provided."
95
 
 
97
  summaries = []
98
 
99
  for chunk in chunks:
100
+ try:
101
+ summary = summarizer(
102
+ chunk,
103
+ max_length=150,
104
+ min_length=40,
105
+ do_sample=False
106
+ )[0]["summary_text"]
107
+ summaries.append(summary)
108
+ except Exception:
109
+ pass # skip problematic chunks
110
 
111
  merged = " ".join(summaries)
112
  cleaned_summary = clean_text(merged)
113
 
 
114
  ai_advice = generate_ai_advice(cleaned_summary)
 
115
  return cleaned_summary + ai_advice
116
 
117
  def read_pdf(file) -> str:
 
121
  pages = [page.extract_text() or "" for page in reader.pages]
122
  return " ".join(pages)
123
  except Exception as e:
124
+ return f"PDF read error: {str(e)}"
125
+
126
+ # =========================
127
+ # Download helper
128
+ # =========================
129
+ def create_download_file(content: str) -> str:
130
+ """Create temporary file for Gradio file download component"""
131
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8") as tmp:
132
+ tmp.write(content)
133
+ return tmp.name
134
 
135
  # =========================
136
  # Main handler
137
  # =========================
138
+ def process_input(text: str, file):
139
+ input_text = ""
140
+
141
  if file is not None:
142
+ input_text = read_pdf(file)
143
+ elif text.strip():
144
+ input_text = text
145
+ else:
146
+ return "Please paste some text or upload a PDF.", None
147
+
148
+ result = summarize_long_text(input_text)
149
+ download_path = create_download_file(result)
150
+
151
+ return result, download_path
152
 
153
  # =========================
154
  # Gradio UI
 
156
  with gr.Blocks() as demo:
157
  gr.Markdown("# πŸ“„ Long Text Summarizer + AI Study Assistant")
158
  gr.Markdown(
159
+ "β€’ Handles very long documents (thousands of words)\n"
160
+ "β€’ Supports **PDF** upload or direct paste\n"
161
+ "β€’ Runs on CPU – works on free hardware\n"
162
+ "β€’ Gives you **5 AI-generated study tips** tailored to the content\n"
163
+ "β€’ Download result as .txt file"
164
  )
165
 
166
+ with gr.Row():
167
+ text_input = gr.Textbox(
168
+ lines=10,
169
+ label="Paste your text here (optional)",
170
+ placeholder="Paste lecture notes, article, book chapter...",
171
+ )
172
+ file_input = gr.File(
173
+ label="Or upload a PDF",
174
+ file_types=[".pdf"]
175
+ )
176
 
177
+ summarize_btn = gr.Button("Summarize & Get Study Tips", variant="primary")
 
 
 
178
 
179
  output = gr.Textbox(
180
  lines=16,
181
+ label="Summary + AI-generated study advice",
182
+ interactive=False
183
  )
184
 
185
+ download_output = gr.File(
186
+ label="Download full result (.txt)",
187
+ interactive=False
188
+ )
189
 
190
  summarize_btn.click(
191
  fn=process_input,
192
  inputs=[text_input, file_input],
193
+ outputs=[output, download_output]
194
  )
195
 
196
  demo.launch()