kritsadaK commited on
Commit
447a5b6
·
verified ·
1 Parent(s): 2207b67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -19
app.py CHANGED
@@ -244,9 +244,6 @@ else:
244
 
245
 
246
  # Step 4: Summarization (Using full text)
247
- st.subheader("Summarization")
248
-
249
- # Display full extracted text (not just first 1000 characters)
250
  input_text = st.text_area(
251
  "Enter text to summarize",
252
  height=400,
@@ -256,31 +253,34 @@ else:
256
  if st.button("Summarize"):
257
  text_to_summarize = st.session_state["pdf_text"].strip() if st.session_state["pdf_text"] else input_text.strip()
258
 
259
- # Debugging: Print input length
260
- st.write(f"**Text Length:** {len(text_to_summarize.split())} words")
261
-
262
- # Handle empty text case
263
  if not text_to_summarize:
264
- st.error("No text provided for summarization!")
265
- elif len(text_to_summarize.split()) < 20:
266
- st.error("The text is too short! Provide at least 20 words.")
267
  else:
268
  try:
269
  with st.spinner("Generating summary..."):
270
- max_input_length = 1024 # Adjust max input length for BART
271
- text_to_summarize = " ".join(text_to_summarize.split()[:max_input_length]) #Trim long text
 
 
 
 
272
 
273
  summary = summarizer(
274
- text_to_summarize,
275
  max_length=256, # Keep max summary length reasonable
276
  min_length=50,
277
  do_sample=False
278
  )
279
- st.write("**Summary:**")
 
280
  st.success(summary[0]["summary_text"])
281
-
282
- except IndexError as e:
283
- st.error(f"**Summarization failed: Index out of range.**")
284
- st.write(f"Debugging Info:\n- Text Length: {len(text_to_summarize.split())} words\n- Model: {summarizer.model}")
285
  except Exception as e:
286
- st.error(f"**Summarization failed:** {e}")
 
 
244
 
245
 
246
  # Step 4: Summarization (Using full text)
 
 
 
247
  input_text = st.text_area(
248
  "Enter text to summarize",
249
  height=400,
 
253
  if st.button("Summarize"):
254
  text_to_summarize = st.session_state["pdf_text"].strip() if st.session_state["pdf_text"] else input_text.strip()
255
 
256
+ # Debugging: Print text length before processing
257
+ st.write(f"Original text length: {len(text_to_summarize.split())} words")
258
+
 
259
  if not text_to_summarize:
260
+ st.error("No text provided for summarization.")
 
 
261
  else:
262
  try:
263
  with st.spinner("Generating summary..."):
264
+ # Tokenize input and truncate properly
265
+ input_tokens = tokenizer.encode(text_to_summarize, truncation=True, max_length=1024)
266
+ truncated_text = tokenizer.decode(input_tokens)
267
+
268
+ # Debugging: Check tokenized text length
269
+ st.write(f"Tokenized length: {len(input_tokens)} tokens")
270
 
271
  summary = summarizer(
272
+ truncated_text,
273
  max_length=256, # Keep max summary length reasonable
274
  min_length=50,
275
  do_sample=False
276
  )
277
+
278
+ st.write("Summary:")
279
  st.success(summary[0]["summary_text"])
280
+
281
+ except IndexError:
282
+ st.error("Summarization failed: Index out of range.")
283
+ st.write(f"Debugging Info:\n- Original text length: {len(text_to_summarize.split())} words\n- Tokenized length: {len(input_tokens)} tokens\n- Model: {summarizer.model}")
284
  except Exception as e:
285
+ st.error(f"Summarization failed: {e}")
286
+