Azidan commited on
Commit
bb331f0
·
verified ·
1 Parent(s): aba9518

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -12
app.py CHANGED
@@ -7,7 +7,8 @@ import tempfile
7
  # =========================
8
  # Model setup (CPU-safe)
9
  # =========================
10
- MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
 
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
  summarizer = pipeline(
13
  "summarization",
@@ -16,10 +17,10 @@ summarizer = pipeline(
16
  device=-1 # CPU only
17
  )
18
 
19
- # Better AI advice generator - flan-t5-base is still quite CPU friendly
20
  advice_generator = pipeline(
21
  "text2text-generation",
22
- model="google/flan-t5-base",
23
  device=-1 # CPU only
24
  )
25
 
@@ -101,28 +102,32 @@ def extract_possible_headings(text: str) -> str:
101
  return "### Extracted Possible Headings/Subtitles\n\n" + "\n- ".join([''] + headings) + "\n\n---\n\n"
102
  return ""
103
 
104
- def summarize_long_text(text: str) -> str:
105
  """Summarize long text in chunks + add AI study advice.
106
  Now with longer summaries per chunk and formatted as bullet points."""
107
  if not text or len(text.strip()) == 0:
108
  return "No text provided."
109
 
 
110
  # Extract possible headings first
111
  headings_section = extract_possible_headings(text)
112
 
 
113
  chunks = chunk_text(text)
114
- summaries = []
115
 
116
- for i, chunk in enumerate(chunks, 1):
 
 
 
117
  try:
118
  summary = summarizer(
119
  chunk,
120
- max_length=250, # Increased for longer summaries
121
- min_length=80, # Increased for more detail
122
  do_sample=False
123
  )[0]["summary_text"]
124
  cleaned = clean_text(summary)
125
- summaries.append(f"**Chunk {i} Summary:** {cleaned}")
126
  except Exception:
127
  pass # skip problematic chunks
128
 
@@ -131,8 +136,10 @@ def summarize_long_text(text: str) -> str:
131
  for s in summaries:
132
  summary_md += f"- {s}\n"
133
 
 
134
  ai_advice = generate_ai_advice(summary_md) # Use the bulleted summary for advice generation
135
 
 
136
  return headings_section + summary_md + ai_advice
137
 
138
  def read_pdf(file) -> str:
@@ -156,9 +163,10 @@ def create_download_file(content: str) -> str:
156
  # =========================
157
  # Main handler
158
  # =========================
159
- def process_input(text: str, file):
160
  input_text = ""
161
 
 
162
  if file is not None:
163
  input_text = read_pdf(file)
164
  elif text.strip():
@@ -166,7 +174,7 @@ def process_input(text: str, file):
166
  else:
167
  return "Please paste some text or upload a PDF.", None
168
 
169
- result = summarize_long_text(input_text)
170
  download_path = create_download_file(result)
171
 
172
  return result, download_path
@@ -182,7 +190,8 @@ with gr.Blocks() as demo:
182
  "• Runs on CPU – works on free hardware\n"
183
  "• Gives you **longer, bullet-point summaries** with possible headings/subtitles\n"
184
  "• Includes **5 AI-generated study tips** tailored to the content\n"
185
- "• Download result as .txt file"
 
186
  )
187
 
188
  with gr.Row():
 
7
  # =========================
8
  # Model setup (CPU-safe)
9
  # =========================
10
+ # Use smaller, faster models to speed up processing
11
+ MODEL_NAME = "sshleifer/distilbart-cnn-6-6" # Smaller than 12-6, faster on CPU
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
  summarizer = pipeline(
14
  "summarization",
 
17
  device=-1 # CPU only
18
  )
19
 
20
+ # Use smaller flan-t5-small for faster advice generation
21
  advice_generator = pipeline(
22
  "text2text-generation",
23
+ model="google/flan-t5-small",
24
  device=-1 # CPU only
25
  )
26
 
 
102
  return "### Extracted Possible Headings/Subtitles\n\n" + "\n- ".join([''] + headings) + "\n\n---\n\n"
103
  return ""
104
 
105
+ def summarize_long_text(text: str, progress=gr.Progress()) -> str:
106
  """Summarize long text in chunks + add AI study advice.
107
  Now with longer summaries per chunk and formatted as bullet points."""
108
  if not text or len(text.strip()) == 0:
109
  return "No text provided."
110
 
111
+ progress(0, desc="Extracting headings...")
112
  # Extract possible headings first
113
  headings_section = extract_possible_headings(text)
114
 
115
+ progress(0.1, desc="Chunking text...")
116
  chunks = chunk_text(text)
 
117
 
118
+ summaries = []
119
+ progress(0.2, desc="Summarizing chunks...")
120
+ for i in progress.tqdm(range(len(chunks))):
121
+ chunk = chunks[i]
122
  try:
123
  summary = summarizer(
124
  chunk,
125
+ max_length=200, # Reduced slightly for speed (compromise between length and time)
126
+ min_length=60, # Reduced for speed
127
  do_sample=False
128
  )[0]["summary_text"]
129
  cleaned = clean_text(summary)
130
+ summaries.append(f"**Chunk {i+1} Summary:** {cleaned}")
131
  except Exception:
132
  pass # skip problematic chunks
133
 
 
136
  for s in summaries:
137
  summary_md += f"- {s}\n"
138
 
139
+ progress(0.8, desc="Generating AI advice...")
140
  ai_advice = generate_ai_advice(summary_md) # Use the bulleted summary for advice generation
141
 
142
+ progress(1, desc="Done!")
143
  return headings_section + summary_md + ai_advice
144
 
145
  def read_pdf(file) -> str:
 
163
  # =========================
164
  # Main handler
165
  # =========================
166
+ def process_input(text: str, file, progress=gr.Progress()):
167
  input_text = ""
168
 
169
+ progress(0, desc="Reading input...")
170
  if file is not None:
171
  input_text = read_pdf(file)
172
  elif text.strip():
 
174
  else:
175
  return "Please paste some text or upload a PDF.", None
176
 
177
+ result = summarize_long_text(input_text, progress)
178
  download_path = create_download_file(result)
179
 
180
  return result, download_path
 
190
  "• Runs on CPU – works on free hardware\n"
191
  "• Gives you **longer, bullet-point summaries** with possible headings/subtitles\n"
192
  "• Includes **5 AI-generated study tips** tailored to the content\n"
193
+ "• Download result as .txt file\n"
194
+ "**Note**: Processing may take time for long documents on CPU (initial model load + inference). Please be patient!"
195
  )
196
 
197
  with gr.Row():