Update app.py
Browse files
app.py
CHANGED
|
@@ -244,9 +244,6 @@ else:
|
|
| 244 |
|
| 245 |
|
| 246 |
# Step 4: Summarization (Using full text)
|
| 247 |
-
st.subheader("Summarization")
|
| 248 |
-
|
| 249 |
-
# Display full extracted text (not just first 1000 characters)
|
| 250 |
input_text = st.text_area(
|
| 251 |
"Enter text to summarize",
|
| 252 |
height=400,
|
|
@@ -256,31 +253,34 @@ else:
|
|
| 256 |
if st.button("Summarize"):
|
| 257 |
text_to_summarize = st.session_state["pdf_text"].strip() if st.session_state["pdf_text"] else input_text.strip()
|
| 258 |
|
| 259 |
-
# Debugging: Print
|
| 260 |
-
st.write(f"
|
| 261 |
-
|
| 262 |
-
# Handle empty text case
|
| 263 |
if not text_to_summarize:
|
| 264 |
-
st.error("No text provided for summarization
|
| 265 |
-
elif len(text_to_summarize.split()) < 20:
|
| 266 |
-
st.error("The text is too short! Provide at least 20 words.")
|
| 267 |
else:
|
| 268 |
try:
|
| 269 |
with st.spinner("Generating summary..."):
|
| 270 |
-
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
summary = summarizer(
|
| 274 |
-
|
| 275 |
max_length=256, # Keep max summary length reasonable
|
| 276 |
min_length=50,
|
| 277 |
do_sample=False
|
| 278 |
)
|
| 279 |
-
|
|
|
|
| 280 |
st.success(summary[0]["summary_text"])
|
| 281 |
-
|
| 282 |
-
except IndexError
|
| 283 |
-
st.error(
|
| 284 |
-
st.write(f"Debugging Info:\n-
|
| 285 |
except Exception as e:
|
| 286 |
-
st.error(f"
|
|
|
|
|
|
| 244 |
|
| 245 |
|
| 246 |
# Step 4: Summarization (Using full text)
|
|
|
|
|
|
|
|
|
|
| 247 |
input_text = st.text_area(
|
| 248 |
"Enter text to summarize",
|
| 249 |
height=400,
|
|
|
|
| 253 |
if st.button("Summarize"):
|
| 254 |
text_to_summarize = st.session_state["pdf_text"].strip() if st.session_state["pdf_text"] else input_text.strip()
|
| 255 |
|
| 256 |
+
# Debugging: Print text length before processing
|
| 257 |
+
st.write(f"Original text length: {len(text_to_summarize.split())} words")
|
| 258 |
+
|
|
|
|
| 259 |
if not text_to_summarize:
|
| 260 |
+
st.error("No text provided for summarization.")
|
|
|
|
|
|
|
| 261 |
else:
|
| 262 |
try:
|
| 263 |
with st.spinner("Generating summary..."):
|
| 264 |
+
# Tokenize input and truncate properly
|
| 265 |
+
input_tokens = tokenizer.encode(text_to_summarize, truncation=True, max_length=1024)
|
| 266 |
+
truncated_text = tokenizer.decode(input_tokens)
|
| 267 |
+
|
| 268 |
+
# Debugging: Check tokenized text length
|
| 269 |
+
st.write(f"Tokenized length: {len(input_tokens)} tokens")
|
| 270 |
|
| 271 |
summary = summarizer(
|
| 272 |
+
truncated_text,
|
| 273 |
max_length=256, # Keep max summary length reasonable
|
| 274 |
min_length=50,
|
| 275 |
do_sample=False
|
| 276 |
)
|
| 277 |
+
|
| 278 |
+
st.write("Summary:")
|
| 279 |
st.success(summary[0]["summary_text"])
|
| 280 |
+
|
| 281 |
+
except IndexError:
|
| 282 |
+
st.error("Summarization failed: Index out of range.")
|
| 283 |
+
st.write(f"Debugging Info:\n- Original text length: {len(text_to_summarize.split())} words\n- Tokenized length: {len(input_tokens)} tokens\n- Model: {summarizer.model}")
|
| 284 |
except Exception as e:
|
| 285 |
+
st.error(f"Summarization failed: {e}")
|
| 286 |
+
|