Update app.py
Browse files
app.py
CHANGED
|
@@ -253,7 +253,6 @@ else:
|
|
| 253 |
if st.button("Summarize"):
|
| 254 |
text_to_summarize = st.session_state["pdf_text"].strip() if st.session_state["pdf_text"] else input_text.strip()
|
| 255 |
|
| 256 |
-
# Debugging: Print text length before processing
|
| 257 |
st.write(f"Original text length: {len(text_to_summarize.split())} words")
|
| 258 |
|
| 259 |
if not text_to_summarize:
|
|
@@ -261,26 +260,36 @@ else:
|
|
| 261 |
else:
|
| 262 |
try:
|
| 263 |
with st.spinner("Generating summary..."):
|
| 264 |
-
# Tokenize
|
| 265 |
-
input_tokens = tokenizer.
|
| 266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
-
#
|
| 269 |
-
|
|
|
|
| 270 |
|
| 271 |
-
summary
|
| 272 |
-
|
| 273 |
-
|
|
|
|
| 274 |
min_length=50,
|
| 275 |
do_sample=False
|
| 276 |
)
|
| 277 |
|
|
|
|
|
|
|
|
|
|
| 278 |
st.write("Summary:")
|
| 279 |
-
st.success(summary
|
| 280 |
|
| 281 |
except IndexError:
|
| 282 |
st.error("Summarization failed: Index out of range.")
|
| 283 |
-
st.write(f"Debugging Info:\n- Original text length: {len(text_to_summarize.split())} words\n- Tokenized length: {
|
| 284 |
except Exception as e:
|
| 285 |
-
st.error(f"Summarization failed: {e}")
|
| 286 |
-
|
|
|
|
| 253 |
if st.button("Summarize"):
|
| 254 |
text_to_summarize = st.session_state["pdf_text"].strip() if st.session_state["pdf_text"] else input_text.strip()
|
| 255 |
|
|
|
|
| 256 |
st.write(f"Original text length: {len(text_to_summarize.split())} words")
|
| 257 |
|
| 258 |
if not text_to_summarize:
|
|
|
|
| 260 |
else:
|
| 261 |
try:
|
| 262 |
with st.spinner("Generating summary..."):
|
| 263 |
+
# Tokenize and truncate input properly
|
| 264 |
+
input_tokens = tokenizer.encode_plus(
|
| 265 |
+
text_to_summarize,
|
| 266 |
+
truncation=True,
|
| 267 |
+
max_length=1024,
|
| 268 |
+
return_tensors="pt" # Ensure proper tensor formatting for PyTorch
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
st.write(f"Tokenized length: {input_tokens['input_ids'].shape[1]} tokens") # Check final token count
|
| 272 |
|
| 273 |
+
# Move tensor to CPU (or change to CUDA if available)
|
| 274 |
+
device = torch.device("cpu")
|
| 275 |
+
summarizer.model.to(device)
|
| 276 |
|
| 277 |
+
# Generate summary with strict max_length settings
|
| 278 |
+
summary_ids = summarizer.model.generate(
|
| 279 |
+
input_tokens["input_ids"].to(device),
|
| 280 |
+
max_length=256,
|
| 281 |
min_length=50,
|
| 282 |
do_sample=False
|
| 283 |
)
|
| 284 |
|
| 285 |
+
# Decode output summary
|
| 286 |
+
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
| 287 |
+
|
| 288 |
st.write("Summary:")
|
| 289 |
+
st.success(summary)
|
| 290 |
|
| 291 |
except IndexError:
|
| 292 |
st.error("Summarization failed: Index out of range.")
|
| 293 |
+
st.write(f"Debugging Info:\n- Original text length: {len(text_to_summarize.split())} words\n- Tokenized length: {input_tokens['input_ids'].shape[1]} tokens")
|
| 294 |
except Exception as e:
|
| 295 |
+
st.error(f"Summarization failed: {e}")
|
|
|