kritsadaK commited on
Commit
5d6cd67
·
verified ·
1 Parent(s): 4719733

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -244,6 +244,9 @@ else:
244
 
245
 
246
  # Step 4: Summarization (Using full text)
 
 
 
247
  input_text = st.text_area(
248
  "Enter text to summarize",
249
  height=400,
@@ -268,28 +271,35 @@ else:
268
  return_tensors="pt" # Ensure proper tensor formatting for PyTorch
269
  )
270
 
271
- st.write(f"Tokenized length: {input_tokens['input_ids'].shape[1]} tokens") # Check final token count
 
272
 
273
  # Move tensor to CPU (or change to CUDA if available)
274
  device = torch.device("cpu")
275
  summarizer.model.to(device)
276
 
277
- # Generate summary with strict max_length settings
278
  summary_ids = summarizer.model.generate(
279
  input_tokens["input_ids"].to(device),
280
- max_length=256,
281
  min_length=50,
282
  do_sample=False
283
  )
284
 
285
  # Decode output summary
286
- summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
 
 
 
287
 
288
- st.write("Summary:")
289
- st.success(summary)
 
 
 
290
 
291
  except IndexError:
292
  st.error("Summarization failed: Index out of range.")
293
- st.write(f"Debugging Info:\n- Original text length: {len(text_to_summarize.split())} words\n- Tokenized length: {input_tokens['input_ids'].shape[1]} tokens")
294
  except Exception as e:
295
  st.error(f"Summarization failed: {e}")
 
244
 
245
 
246
  # Step 4: Summarization (Using full text)
247
+ st.subheader("Summarization")
248
+
249
+ # Display full extracted text
250
  input_text = st.text_area(
251
  "Enter text to summarize",
252
  height=400,
 
271
  return_tensors="pt" # Ensure proper tensor formatting for PyTorch
272
  )
273
 
274
+ tokenized_length = input_tokens["input_ids"].shape[1]
275
+ st.write(f"Tokenized length: {tokenized_length} tokens") # Check final token count
276
 
277
  # Move tensor to CPU (or change to CUDA if available)
278
  device = torch.device("cpu")
279
  summarizer.model.to(device)
280
 
281
+ # Generate summary
282
  summary_ids = summarizer.model.generate(
283
  input_tokens["input_ids"].to(device),
284
+ max_length=256, # Ensure output is within reasonable size
285
  min_length=50,
286
  do_sample=False
287
  )
288
 
289
  # Decode output summary
290
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True).strip()
291
+
292
+ # **🔹 Print debugging logs**
293
+ st.write(f"Generated summary token length: {len(summary.split())} words")
294
 
295
+ if not summary:
296
+ st.error("Summarization failed: The model did not return any output.")
297
+ else:
298
+ st.write("Summary:")
299
+ st.success(summary)
300
 
301
  except IndexError:
302
  st.error("Summarization failed: Index out of range.")
303
+ st.write(f"Debugging Info:\n- Original text length: {len(text_to_summarize.split())} words\n- Tokenized length: {tokenized_length} tokens")
304
  except Exception as e:
305
  st.error(f"Summarization failed: {e}")