Update app.py
Browse files
app.py
CHANGED
|
@@ -244,6 +244,9 @@ else:
|
|
| 244 |
|
| 245 |
|
| 246 |
# Step 4: Summarization (Using full text)
|
|
|
|
|
|
|
|
|
|
| 247 |
input_text = st.text_area(
|
| 248 |
"Enter text to summarize",
|
| 249 |
height=400,
|
|
@@ -268,28 +271,35 @@ else:
|
|
| 268 |
return_tensors="pt" # Ensure proper tensor formatting for PyTorch
|
| 269 |
)
|
| 270 |
|
| 271 |
-
|
|
|
|
| 272 |
|
| 273 |
# Move tensor to CPU (or change to CUDA if available)
|
| 274 |
device = torch.device("cpu")
|
| 275 |
summarizer.model.to(device)
|
| 276 |
|
| 277 |
-
# Generate summary
|
| 278 |
summary_ids = summarizer.model.generate(
|
| 279 |
input_tokens["input_ids"].to(device),
|
| 280 |
-
max_length=256,
|
| 281 |
min_length=50,
|
| 282 |
do_sample=False
|
| 283 |
)
|
| 284 |
|
| 285 |
# Decode output summary
|
| 286 |
-
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
except IndexError:
|
| 292 |
st.error("Summarization failed: Index out of range.")
|
| 293 |
-
st.write(f"Debugging Info:\n- Original text length: {len(text_to_summarize.split())} words\n- Tokenized length: {
|
| 294 |
except Exception as e:
|
| 295 |
st.error(f"Summarization failed: {e}")
|
|
|
|
| 244 |
|
| 245 |
|
| 246 |
# Step 4: Summarization (Using full text)
|
| 247 |
+
st.subheader("Summarization")
|
| 248 |
+
|
| 249 |
+
# Display full extracted text
|
| 250 |
input_text = st.text_area(
|
| 251 |
"Enter text to summarize",
|
| 252 |
height=400,
|
|
|
|
| 271 |
return_tensors="pt" # Ensure proper tensor formatting for PyTorch
|
| 272 |
)
|
| 273 |
|
| 274 |
+
tokenized_length = input_tokens["input_ids"].shape[1]
|
| 275 |
+
st.write(f"Tokenized length: {tokenized_length} tokens") # Check final token count
|
| 276 |
|
| 277 |
# Move tensor to CPU (or change to CUDA if available)
|
| 278 |
device = torch.device("cpu")
|
| 279 |
summarizer.model.to(device)
|
| 280 |
|
| 281 |
+
# Generate summary
|
| 282 |
summary_ids = summarizer.model.generate(
|
| 283 |
input_tokens["input_ids"].to(device),
|
| 284 |
+
max_length=256, # Ensure output is within reasonable size
|
| 285 |
min_length=50,
|
| 286 |
do_sample=False
|
| 287 |
)
|
| 288 |
|
| 289 |
# Decode output summary
|
| 290 |
+
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True).strip()
|
| 291 |
+
|
| 292 |
+
# **🔹 Print debugging logs**
|
| 293 |
+
st.write(f"Generated summary token length: {len(summary.split())} words")
|
| 294 |
|
| 295 |
+
if not summary:
|
| 296 |
+
st.error("Summarization failed: The model did not return any output.")
|
| 297 |
+
else:
|
| 298 |
+
st.write("Summary:")
|
| 299 |
+
st.success(summary)
|
| 300 |
|
| 301 |
except IndexError:
|
| 302 |
st.error("Summarization failed: Index out of range.")
|
| 303 |
+
st.write(f"Debugging Info:\n- Original text length: {len(text_to_summarize.split())} words\n- Tokenized length: {tokenized_length} tokens")
|
| 304 |
except Exception as e:
|
| 305 |
st.error(f"Summarization failed: {e}")
|