kritsadaK commited on
Commit
4719733
·
verified ·
1 Parent(s): 447a5b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -13
app.py CHANGED
@@ -253,7 +253,6 @@ else:
253
  if st.button("Summarize"):
254
  text_to_summarize = st.session_state["pdf_text"].strip() if st.session_state["pdf_text"] else input_text.strip()
255
 
256
- # Debugging: Print text length before processing
257
  st.write(f"Original text length: {len(text_to_summarize.split())} words")
258
 
259
  if not text_to_summarize:
@@ -261,26 +260,36 @@ else:
261
  else:
262
  try:
263
  with st.spinner("Generating summary..."):
264
- # Tokenize input and truncate properly
265
- input_tokens = tokenizer.encode(text_to_summarize, truncation=True, max_length=1024)
266
- truncated_text = tokenizer.decode(input_tokens)
 
 
 
 
 
 
267
 
268
- # Debugging: Check tokenized text length
269
- st.write(f"Tokenized length: {len(input_tokens)} tokens")
 
270
 
271
- summary = summarizer(
272
- truncated_text,
273
- max_length=256, # Keep max summary length reasonable
 
274
  min_length=50,
275
  do_sample=False
276
  )
277
 
 
 
 
278
  st.write("Summary:")
279
- st.success(summary[0]["summary_text"])
280
 
281
  except IndexError:
282
  st.error("Summarization failed: Index out of range.")
283
- st.write(f"Debugging Info:\n- Original text length: {len(text_to_summarize.split())} words\n- Tokenized length: {len(input_tokens)} tokens\n- Model: {summarizer.model}")
284
  except Exception as e:
285
- st.error(f"Summarization failed: {e}")
286
-
 
253
  if st.button("Summarize"):
254
  text_to_summarize = st.session_state["pdf_text"].strip() if st.session_state["pdf_text"] else input_text.strip()
255
 
 
256
  st.write(f"Original text length: {len(text_to_summarize.split())} words")
257
 
258
  if not text_to_summarize:
 
260
  else:
261
  try:
262
  with st.spinner("Generating summary..."):
263
+ # Tokenize and truncate input properly
264
+ input_tokens = tokenizer.encode_plus(
265
+ text_to_summarize,
266
+ truncation=True,
267
+ max_length=1024,
268
+ return_tensors="pt" # Ensure proper tensor formatting for PyTorch
269
+ )
270
+
271
+ st.write(f"Tokenized length: {input_tokens['input_ids'].shape[1]} tokens") # Check final token count
272
 
273
+ # Move tensor to CPU (or change to CUDA if available)
274
+ device = torch.device("cpu")
275
+ summarizer.model.to(device)
276
 
277
+ # Generate summary with strict max_length settings
278
+ summary_ids = summarizer.model.generate(
279
+ input_tokens["input_ids"].to(device),
280
+ max_length=256,
281
  min_length=50,
282
  do_sample=False
283
  )
284
 
285
+ # Decode output summary
286
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
287
+
288
  st.write("Summary:")
289
+ st.success(summary)
290
 
291
  except IndexError:
292
  st.error("Summarization failed: Index out of range.")
293
+ st.write(f"Debugging Info:\n- Original text length: {len(text_to_summarize.split())} words\n- Tokenized length: {input_tokens['input_ids'].shape[1]} tokens")
294
  except Exception as e:
295
+ st.error(f"Summarization failed: {e}")