Azidan commited on
Commit
2f00a52
·
verified ·
1 Parent(s): 8091a97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -99
app.py CHANGED
@@ -15,12 +15,13 @@ summarizer = pipeline(
15
  device=-1 # CPU only
16
  )
17
 
18
- CHUNK_SIZE = 900 # safe margin
19
 
20
  # =========================
21
- # Utilities (unchanged)
22
  # =========================
23
  def clean_text(text: str) -> str:
 
24
  text = text.replace("‘", "'").replace("’", "'")
25
  text = text.replace("“", '"').replace("”", '"')
26
  text = re.sub(r"[.]{2,}", ".", text)
@@ -36,7 +37,9 @@ def clean_text(text: str) -> str:
36
  result.append(s.strip())
37
  return " ".join(result)
38
 
 
39
  def chunk_text(text: str):
 
40
  tokens = tokenizer.encode(text, add_special_tokens=False)
41
  chunks = []
42
  for i in range(0, len(tokens), CHUNK_SIZE):
@@ -45,11 +48,15 @@ def chunk_text(text: str):
45
  chunks.append(chunk_text)
46
  return chunks
47
 
 
48
  def summarize_long_text(text: str) -> str:
 
49
  if not text or len(text.strip()) == 0:
50
  return "No text provided."
 
51
  chunks = chunk_text(text)
52
  summaries = []
 
53
  for chunk in chunks:
54
  summary = summarizer(
55
  chunk,
@@ -58,10 +65,36 @@ def summarize_long_text(text: str) -> str:
58
  do_sample=False
59
  )[0]["summary_text"]
60
  summaries.append(summary)
 
61
  merged = " ".join(summaries)
62
- return clean_text(merged)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  def read_pdf(file) -> str:
 
65
  try:
66
  reader = PdfReader(file)
67
  pages = [page.extract_text() or "" for page in reader.pages]
@@ -69,6 +102,7 @@ def read_pdf(file) -> str:
69
  except Exception as e:
70
  return f"PDF read error: {e}"
71
 
 
72
  # =========================
73
  # Main handler
74
  # =========================
@@ -77,111 +111,38 @@ def process_input(text, file):
77
  text = read_pdf(file)
78
  return summarize_long_text(text)
79
 
80
- # =========================
81
- # Custom theme + stronger CSS for black text
82
- # =========================
83
- custom_theme = gr.themes.Default(
84
- primary_hue="blue",
85
- secondary_hue="gray",
86
- neutral_hue="gray",
87
- font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'sans-serif'],
88
- ).set(
89
- body_background_fill="#ffffff",
90
- body_background_fill_dark="#ffffff",
91
- block_background_fill="#ffffff",
92
- block_background_fill_dark="#ffffff",
93
- button_primary_background_fill="#2563eb",
94
- button_primary_background_fill_hover="#1d4ed8",
95
- button_primary_text_color="#ffffff",
96
- button_primary_border_color="#2563eb",
97
- body_text_color="#111111", # ← dark almost-black
98
- body_text_color_dark="#111111",
99
- )
100
-
101
- custom_css = """
102
- .gradio-container, body, .main, .app, .wrap {
103
- background-color: #ffffff !important;
104
- color: #000000 !important;
105
- }
106
-
107
- * {
108
- color: #000000 !important; /* Force almost everything black */
109
- }
110
-
111
- label, .label, span, p, div, h1, h2, h3, h4, h5, h6 {
112
- color: #000000 !important;
113
- }
114
-
115
- .gr-markdown, .prose, .prose p, .prose li, .prose strong {
116
- color: #111111 !important;
117
- }
118
-
119
- input, textarea, .textbox, .gr-text-input, .gr-textarea {
120
- color: #000000 !important;
121
- background-color: #ffffff !important;
122
- border: 1px solid #d1d5db !important;
123
- }
124
-
125
- ::placeholder {
126
- color: #6b7280 !important; /* Gray placeholder – easier to read */
127
- }
128
-
129
- .gr-button-primary {
130
- color: #ffffff !important; /* White text on blue button */
131
- border-radius: 8px !important;
132
- font-weight: 600 !important;
133
- }
134
-
135
- header, .gr-top, .gr-header {
136
- background-color: #f8f9fa !important;
137
- border-bottom: 1px solid #e5e7eb !important;
138
- padding: 12px 24px !important;
139
- color: #000000 !important;
140
- }
141
-
142
- a {
143
- color: #2563eb !important; /* Blue links for contrast */
144
- }
145
-
146
- a:hover {
147
- color: #1d4ed8 !important;
148
- }
149
- """
150
 
151
  # =========================
152
  # Gradio UI
153
  # =========================
154
- with gr.Blocks(theme=custom_theme, css=custom_css) as demo:
 
155
  gr.Markdown(
156
- "# 📄 Long Text Summarizer (Free-Tier Safe)",
 
 
 
157
  )
158
- gr.Markdown(
159
- "• Handles **thousands of words** \n"
160
- "• Supports **PDF upload** \n"
161
- " Optimized for **CPU / free tier**",
 
162
  )
163
-
164
- with gr.Row():
165
- with gr.Column(scale=5):
166
- text_input = gr.Textbox(
167
- lines=15,
168
- label="Paste text (optional)",
169
- placeholder="Paste your long article / text here...",
170
- )
171
- with gr.Column(scale=1, min_width=240):
172
- file_input = gr.File(
173
- label="Upload PDF (optional)",
174
- file_types=[".pdf"],
175
- )
176
-
177
- summarize_btn = gr.Button("Summarize", variant="primary")
178
-
179
  output = gr.Textbox(
180
- lines=10,
181
- label="Summary",
182
- placeholder="Summary will appear here...",
183
  )
184
-
 
 
185
  summarize_btn.click(
186
  fn=process_input,
187
  inputs=[text_input, file_input],
 
15
  device=-1 # CPU only
16
  )
17
 
18
+ CHUNK_SIZE = 900 # safe margin
19
 
20
  # =========================
21
+ # Utilities
22
  # =========================
23
  def clean_text(text: str) -> str:
24
+ """Fix quotes, spacing, repetition, and broken punctuation."""
25
  text = text.replace("‘", "'").replace("’", "'")
26
  text = text.replace("“", '"').replace("”", '"')
27
  text = re.sub(r"[.]{2,}", ".", text)
 
37
  result.append(s.strip())
38
  return " ".join(result)
39
 
40
+
41
  def chunk_text(text: str):
42
+ """Token-aware chunking to avoid model overflow."""
43
  tokens = tokenizer.encode(text, add_special_tokens=False)
44
  chunks = []
45
  for i in range(0, len(tokens), CHUNK_SIZE):
 
48
  chunks.append(chunk_text)
49
  return chunks
50
 
51
+
52
  def summarize_long_text(text: str) -> str:
53
+ """Summarize arbitrarily long text safely."""
54
  if not text or len(text.strip()) == 0:
55
  return "No text provided."
56
+
57
  chunks = chunk_text(text)
58
  summaries = []
59
+
60
  for chunk in chunks:
61
  summary = summarizer(
62
  chunk,
 
65
  do_sample=False
66
  )[0]["summary_text"]
67
  summaries.append(summary)
68
+
69
  merged = " ".join(summaries)
70
+ cleaned_summary = clean_text(merged)
71
+
72
+ # Add study advice section
73
+ study_advice = """
74
+
75
+ ---
76
+
77
+ ### 📚 How to Study This Summary Effectively
78
+
79
+ Here are some proven techniques to help you learn and remember the material better:
80
+
81
+ - **Active Recall** — Cover the summary (or close your eyes) and try to explain each main point in your own words. This is one of the most powerful ways to strengthen memory.
82
+ - **Spaced Repetition** — Review this summary today, again in 2–3 days, then in one week. Use free apps like Anki or Quizlet to turn key points into flashcards.
83
+ - **Feynman Technique** — Pretend you're teaching this topic to a friend (or a 12-year-old). Explaining it simply reveals what you truly understand.
84
+ - **Self-Testing** — Create 3–5 questions from the summary (e.g. “What is…?”, “Why does…?”, “Give an example of…”). Answer them without looking.
85
+ - **Make Connections** — Draw a quick mind map or diagram linking the main ideas together. This helps see the big picture.
86
+ - **Apply It** — If possible, solve related problems, write a short paragraph, or discuss the topic with someone.
87
+
88
+ Re-reading alone is weak — **active engagement** is what makes information stick!
89
+
90
+ Good luck with your studies! 🚀
91
+ """
92
+
93
+ return cleaned_summary + study_advice
94
+
95
 
96
  def read_pdf(file) -> str:
97
+ """Safely extract text from PDF."""
98
  try:
99
  reader = PdfReader(file)
100
  pages = [page.extract_text() or "" for page in reader.pages]
 
102
  except Exception as e:
103
  return f"PDF read error: {e}"
104
 
105
+
106
  # =========================
107
  # Main handler
108
  # =========================
 
111
  text = read_pdf(file)
112
  return summarize_long_text(text)
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  # =========================
116
  # Gradio UI
117
  # =========================
118
+ with gr.Blocks() as demo:
119
+ gr.Markdown("# 📄 Long Text Summarizer (Free-Tier Safe)")
120
  gr.Markdown(
121
+ " Handles **thousands of words**\n"
122
+ "• Supports **PDF upload**\n"
123
+ "• Optimized for **CPU / free tier**\n"
124
+ "• Includes **study tips** to help you learn better"
125
  )
126
+
127
+ text_input = gr.Textbox(
128
+ lines=15,
129
+ label="Paste text (optional)",
130
+ placeholder="Paste your lecture notes, article, or book chapter here..."
131
  )
132
+
133
+ file_input = gr.File(
134
+ label="Upload PDF (optional)",
135
+ file_types=[".pdf"]
136
+ )
137
+
 
 
 
 
 
 
 
 
 
 
138
  output = gr.Textbox(
139
+ lines=14,
140
+ label="Summary + Study Advice",
141
+ placeholder="Your summary and learning tips will appear here..."
142
  )
143
+
144
+ summarize_btn = gr.Button("Summarize & Get Study Tips", variant="primary")
145
+
146
  summarize_btn.click(
147
  fn=process_input,
148
  inputs=[text_input, file_input],