Spaces:
Runtime error
Runtime error
Upload 14 files
Browse files- .gitignore +9 -0
- .streamlit/config.toml +2 -0
- Dockerfile +28 -20
- README.md +98 -19
- app.py +197 -0
- evaluation.py +466 -0
- evaluation_dashboard.py +286 -0
- evaluation_results_en.csv +95 -0
- evaluation_results_fr.csv +27 -0
- evaluation_results_summary.csv +3 -0
- packages.txt +3 -0
- requirements.txt +0 -0
- summariser.py +599 -0
- test.py +57 -0
.gitignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
venv/
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
|
| 6 |
+
.vscode/
|
| 7 |
+
.github/
|
| 8 |
+
|
| 9 |
+
lid.176.bin
|
.streamlit/config.toml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[server]
|
| 2 |
+
fileWatcherType = "poll"
|
Dockerfile
CHANGED
|
@@ -1,20 +1,28 @@
|
|
| 1 |
-
FROM python:3.
|
| 2 |
-
|
| 3 |
-
WORKDIR /
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
RUN
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /code
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
cmake \
|
| 9 |
+
pkg-config \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Install python dependencies
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
# Create a non-root user
|
| 17 |
+
RUN useradd -m -u 1000 user
|
| 18 |
+
USER user
|
| 19 |
+
ENV HOME=/home/user \
|
| 20 |
+
PATH=/home/user/.local/bin:$PATH
|
| 21 |
+
|
| 22 |
+
WORKDIR $HOME/app
|
| 23 |
+
|
| 24 |
+
COPY --chown=user . $HOME/app
|
| 25 |
+
|
| 26 |
+
EXPOSE 7860
|
| 27 |
+
|
| 28 |
+
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
README.md
CHANGED
|
@@ -1,19 +1,98 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Multilingual News Article Summarizer
|
| 2 |
+
|
| 3 |
+
A complete multilingual news summarization solution using Google Pegasus and Facebook NLLB-200 models with Streamlit web interface and comprehensive evaluation pipeline.
|
| 4 |
+
|
| 5 |
+
## Demo
|
| 6 |
+
|
| 7 |
+
[View Demo](http://bluelantern.tplinkdns.com:8501/)
|
| 8 |
+
|
| 9 |
+
### Features
|
| 10 |
+
|
| 11 |
+
- ✅ **Multilingual Support**: Auto-detects 70+ languages, translates to English for summarization, then back-translates
|
| 12 |
+
- ✅ **Chunking for Long Articles**: Handles unlimited text length with overlapping token-based chunking
|
| 13 |
+
- ✅ **Customizable Parameters**: Adjustable min/max summary lengths and chunk processing
|
| 14 |
+
- ✅ **Evaluation Pipeline**: ROUGE metrics on CNN/DailyMail (English) and MLSUM (French) datasets
|
| 15 |
+
- ✅ **Interactive Dashboard**: Compare results with official Pegasus benchmarks
|
| 16 |
+
- ✅ **Navigation System**: Seamless switching between summarizer and evaluation views
|
| 17 |
+
|
| 18 |
+
### Models Used
|
| 19 |
+
|
| 20 |
+
- **Summarization**: [google/pegasus-cnn_dailymail](https://huggingface.co/google/pegasus-cnn_dailymail)
|
| 21 |
+
- **Translation**: [facebook/nllb-200-distilled-600M](https://huggingface.co/facebook/nllb-200-distilled-600M)
|
| 22 |
+
|
| 23 |
+
### Installation
|
| 24 |
+
|
| 25 |
+
1. **Virtual Environment** (recommended):
|
| 26 |
+
|
| 27 |
+
```powershell
|
| 28 |
+
python -m venv venv
|
| 29 |
+
venv\Scripts\activate
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
2. **Install Dependencies**:
|
| 33 |
+
|
| 34 |
+
```powershell
|
| 35 |
+
pip install -r requirements.txt
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
3. **Optional**: Install CUDA for GPU acceleration (faster processing)
|
| 39 |
+
|
| 40 |
+
4. **Download fastText Model**: [fastText Model](https://fasttext.cc/docs/en/language-identification.html)
|
| 41 |
+
|
| 42 |
+
5. **Run Application**:
|
| 43 |
+
|
| 44 |
+
```powershell
|
| 45 |
+
streamlit run app.py
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
6. **Access**: Open browser to `http://localhost:8501`
|
| 49 |
+
|
| 50 |
+
### File Structure
|
| 51 |
+
|
| 52 |
+
```
|
| 53 |
+
📁 nlp/
|
| 54 |
+
├── 📄 app.py # Main Streamlit application with navigation
|
| 55 |
+
├── 📄 summariser.py # Core multilingual summarizer class
|
| 56 |
+
├── 📄 evaluation.py # Evaluation pipeline for ROUGE metrics
|
| 57 |
+
├── 📄 evaluation_dashboard.py # Results dashboard with benchmark comparisons
|
| 58 |
+
├── 📄 requirements.txt # Python dependencies
|
| 59 |
+
├── 📄 README.md # This documentation
|
| 60 |
+
├── 📊 evaluation_results_en.csv # English evaluation results
|
| 61 |
+
├── 📊 evaluation_results_fr.csv # French evaluation results
|
| 62 |
+
└── 📊 evaluation_results_summary.csv # Aggregated metrics summary
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
### Quick Start
|
| 66 |
+
|
| 67 |
+
```powershell
|
| 68 |
+
# Clone and setup
|
| 69 |
+
git clone <repository-url>
|
| 70 |
+
cd nlp
|
| 71 |
+
|
| 72 |
+
# Create virtual environment
|
| 73 |
+
python -m venv venv
|
| 74 |
+
venv\Scripts\activate
|
| 75 |
+
|
| 76 |
+
# Install dependencies
|
| 77 |
+
pip install -r requirements.txt
|
| 78 |
+
|
| 79 |
+
# Run application
|
| 80 |
+
streamlit run app.py
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
### Usage
|
| 84 |
+
|
| 85 |
+
1. **News Summarizer**: Paste any article (70+ languages supported) and get intelligent summaries
|
| 86 |
+
2. **Evaluation Dashboard**: View ROUGE metrics and compare with official Pegasus benchmarks
|
| 87 |
+
3. **Navigation**: Use sidebar to switch between summarizer and evaluation views
|
| 88 |
+
|
| 89 |
+
### Technical Details
|
| 90 |
+
|
| 91 |
+
- **Chunking**: 462-token chunks with 50-token overlap for long articles
|
| 92 |
+
- **Languages**: Auto-detection with langdetect, NLLB-200 translation support
|
| 93 |
+
- **Evaluation**: 25 samples per language, ROUGE-1/2/L metrics
|
| 94 |
+
- **GPU Support**: Automatic CUDA detection for faster processing
|
| 95 |
+
|
| 96 |
+
### Team
|
| 97 |
+
|
| 98 |
+
Made by **Group 31**: Loh Lit Hoong, John Ong Ming Hom, Liew Jin Sze, Kueh Pang Lang
|
app.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from summariser import Summarizer
|
| 3 |
+
from evaluation_dashboard import show_evaluation_page
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@st.cache_resource
|
| 7 |
+
def load_summarizer_model():
|
| 8 |
+
model = Summarizer()
|
| 9 |
+
return model
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def show_summarizer_page():
|
| 13 |
+
"""Main summarizer page function."""
|
| 14 |
+
st.title("📰 Multilingual News Article Summarizer")
|
| 15 |
+
st.markdown(
|
| 16 |
+
"Enter a news article below. If non-English, it will be translated to English, summarized, and the summary translated back."
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
with st.spinner("Loading models... This may take longer on first run."):
|
| 20 |
+
summarizer = load_summarizer_model()
|
| 21 |
+
|
| 22 |
+
article_text = st.text_area(
|
| 23 |
+
"Paste your article here (supports multiple languages):",
|
| 24 |
+
height=250,
|
| 25 |
+
key="article_input",
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
col1, col2 = st.columns(2)
|
| 29 |
+
with col1:
|
| 30 |
+
min_summary_length = st.slider(
|
| 31 |
+
"Minimum Final Summary Length (optional):",
|
| 32 |
+
min_value=0,
|
| 33 |
+
max_value=150,
|
| 34 |
+
value=0,
|
| 35 |
+
step=5,
|
| 36 |
+
help="Set to 0 to let the model decide."
|
| 37 |
+
)
|
| 38 |
+
with col2:
|
| 39 |
+
max_summary_length = st.slider(
|
| 40 |
+
"Maximum Final Summary Length (optional):",
|
| 41 |
+
min_value=0,
|
| 42 |
+
max_value=500,
|
| 43 |
+
value=0,
|
| 44 |
+
step=10,
|
| 45 |
+
help="Set to 0 to let the model decide."
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
show_intermediate = st.checkbox(
|
| 49 |
+
"Show intermediate translation and English summary", value=True
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
if st.button("✨ Summarize Article", type="primary"):
|
| 53 |
+
if not article_text.strip():
|
| 54 |
+
st.warning("Please enter some text to summarize.")
|
| 55 |
+
else:
|
| 56 |
+
with st.spinner(
|
| 57 |
+
"Processing and generating summary... This can take a while for long non-English texts."
|
| 58 |
+
):
|
| 59 |
+
try:
|
| 60 |
+
# Convert 0 to None before passing to the summarizer
|
| 61 |
+
effective_min_length = None if min_summary_length == 0 else min_summary_length
|
| 62 |
+
effective_max_length = None if max_summary_length == 0 else max_summary_length
|
| 63 |
+
|
| 64 |
+
summary_results = summarizer.summarize(
|
| 65 |
+
article_text,
|
| 66 |
+
overall_min_length=effective_min_length,
|
| 67 |
+
overall_max_length=effective_max_length,
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
if summary_results.get('error'):
|
| 71 |
+
st.error(summary_results['error'])
|
| 72 |
+
else:
|
| 73 |
+
detected_lang = summary_results.get('detected_language_raw')
|
| 74 |
+
confidence = summary_results.get('detected_language_confidence')
|
| 75 |
+
|
| 76 |
+
if detected_lang:
|
| 77 |
+
lang_display_text = f"Detected Input Language: **{detected_lang}**"
|
| 78 |
+
if confidence is not None:
|
| 79 |
+
lang_display_text += f" (Confidence: {confidence:.2f})"
|
| 80 |
+
st.info(lang_display_text)
|
| 81 |
+
if summary_results.get('translation_performed') and \
|
| 82 |
+
(detected_lang != 'en' and detected_lang != 'eng'):
|
| 83 |
+
st.caption("Translation to English was performed before summarization.")
|
| 84 |
+
elif summary_results.get('translation_performed') and \
|
| 85 |
+
(detected_lang == 'en' or detected_lang == 'eng'):
|
| 86 |
+
st.caption("Input detected as English with low confidence; an English-to-English 'translation' pass was performed for normalization before summarization.")
|
| 87 |
+
|
| 88 |
+
if show_intermediate and summary_results.get(
|
| 89 |
+
"english_translation"
|
| 90 |
+
):
|
| 91 |
+
st.subheader(
|
| 92 |
+
"Intermediate: Translated to English (for Summarization)"
|
| 93 |
+
)
|
| 94 |
+
st.text_area(
|
| 95 |
+
"English Translation",
|
| 96 |
+
value=summary_results["english_translation"],
|
| 97 |
+
height=200,
|
| 98 |
+
disabled=True,
|
| 99 |
+
key="eng_trans",
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
if show_intermediate and summary_results.get("english_summary"):
|
| 103 |
+
st.subheader("Intermediate: English Summary (from Pegasus)")
|
| 104 |
+
st.text_area(
|
| 105 |
+
"English Summary",
|
| 106 |
+
value=summary_results["english_summary"],
|
| 107 |
+
height=150,
|
| 108 |
+
disabled=True,
|
| 109 |
+
key="eng_sum",
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
st.subheader("Final Summary")
|
| 113 |
+
if summary_results.get("final_summary"):
|
| 114 |
+
st.success(summary_results["final_summary"])
|
| 115 |
+
else:
|
| 116 |
+
st.warning("No final summary was generated.")
|
| 117 |
+
|
| 118 |
+
except Exception as e:
|
| 119 |
+
st.error(f"A critical error occurred in the application: {e}")
|
| 120 |
+
import traceback
|
| 121 |
+
|
| 122 |
+
st.exception(traceback.format_exc())
|
| 123 |
+
|
| 124 |
+
st.markdown("---")
|
| 125 |
+
st.markdown(
|
| 126 |
+
"<div style='text-align: center; color: #666; font-size: 16px;'>"
|
| 127 |
+
"Powered by google/pegasus-cnn_dailymail<br>"
|
| 128 |
+
"<strong>Made by Group 31:</strong> Loh Lit Hoong, John Ong Ming Hom, Liew Jin Sze, Kueh Pang Lang"
|
| 129 |
+
"</div>",
|
| 130 |
+
unsafe_allow_html=True,
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def main():
|
| 135 |
+
"""Main application with navigation."""
|
| 136 |
+
st.set_page_config(
|
| 137 |
+
page_title="Multilingual News Summarizer", page_icon="📰", layout="wide"
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
custom_font_css = """
|
| 141 |
+
<style>
|
| 142 |
+
@import url('https://fonts.googleapis.com/css2?family=Oswald:wght@200..700&display=swap');
|
| 143 |
+
|
| 144 |
+
/* Apply to general elements first */
|
| 145 |
+
html, body, [class*="st-"],
|
| 146 |
+
.stTextArea textarea, .stTextInput input,
|
| 147 |
+
.stButton button, .stSelectbox select, .stSlider label,
|
| 148 |
+
p, span, div, li /* Common text holding elements */
|
| 149 |
+
{
|
| 150 |
+
font-family: 'Oswald', sans-serif;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
/* Specifically target headings, with !important */
|
| 154 |
+
h1, h2, h3, h4, h5, h6,
|
| 155 |
+
[data-testid="stAppViewContainer"] h1, /* Main page title */
|
| 156 |
+
[data-testid="stAppViewContainer"] h2,
|
| 157 |
+
[data-testid="stAppViewContainer"] h3,
|
| 158 |
+
[data-testid="stSidebarContent"] h1, /* Sidebar title */
|
| 159 |
+
[data-testid="stSidebarContent"] h2,
|
| 160 |
+
[data-testid="stSidebarContent"] h3,
|
| 161 |
+
div[data-testid="stHeading"] > h1, /* For st.title() */
|
| 162 |
+
div[data-testid="stHeading"] > h2, /* For st.header() */
|
| 163 |
+
div[data-testid="stHeading"] > h3 /* For st.subheader() */
|
| 164 |
+
{
|
| 165 |
+
font-family: 'Oswald', sans-serif !important;
|
| 166 |
+
}
|
| 167 |
+
</style>
|
| 168 |
+
"""
|
| 169 |
+
st.markdown(custom_font_css, unsafe_allow_html=True)
|
| 170 |
+
|
| 171 |
+
# Navigation sidebar
|
| 172 |
+
with st.sidebar:
|
| 173 |
+
st.title("🧭 Navigation")
|
| 174 |
+
page = st.selectbox(
|
| 175 |
+
"Select Page:",
|
| 176 |
+
["📰 News Summarizer", "📊 Evaluation Dashboard"],
|
| 177 |
+
index=0,
|
| 178 |
+
key="navigation",
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
st.markdown("---")
|
| 182 |
+
st.markdown("**About this App:**")
|
| 183 |
+
st.markdown("• Multilingual news summarization")
|
| 184 |
+
st.markdown(
|
| 185 |
+
"• Powered by [Pegasus](https://huggingface.co/google/pegasus-cnn_dailymail) & [NLLB-200](https://huggingface.co/facebook/nllb-200-distilled-600M)"
|
| 186 |
+
)
|
| 187 |
+
st.markdown("• Evaluation with ROUGE metrics")
|
| 188 |
+
|
| 189 |
+
# Display selected page
|
| 190 |
+
if page == "📰 News Summarizer":
|
| 191 |
+
show_summarizer_page()
|
| 192 |
+
elif page == "📊 Evaluation Dashboard":
|
| 193 |
+
show_evaluation_page()
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
if __name__ == "__main__":
|
| 197 |
+
main()
|
evaluation.py
ADDED
|
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Evaluation pipeline for Multilingual News Article Summarizer.
|
| 3 |
+
|
| 4 |
+
Evaluates model performance on:
|
| 5 |
+
- CNN/DailyMail (English)
|
| 6 |
+
- MLSUM French
|
| 7 |
+
- MLSUM German
|
| 8 |
+
|
| 9 |
+
Uses 100 test samples per language and calculates ROUGE-1, ROUGE-2, and ROUGE-L scores.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import csv
|
| 13 |
+
import os
|
| 14 |
+
from typing import List, Dict, Any
|
| 15 |
+
from datasets import load_dataset
|
| 16 |
+
from rouge_score import rouge_scorer
|
| 17 |
+
from summariser import Summarizer
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class SummarizerEvaluator:
|
| 21 |
+
"""Evaluator for multilingual news article summarization."""
|
| 22 |
+
|
| 23 |
+
def __init__(self):
|
| 24 |
+
"""Initialize the evaluator with required models."""
|
| 25 |
+
print("Initializing Summarizer...")
|
| 26 |
+
self.summarizer = Summarizer()
|
| 27 |
+
self.rouge_scorer = rouge_scorer.RougeScorer(
|
| 28 |
+
["rouge1", "rouge2", "rougeL"], use_stemmer=True
|
| 29 |
+
)
|
| 30 |
+
print("Evaluator initialized successfully.\n")
|
| 31 |
+
|
| 32 |
+
def load_dataset_samples(
|
| 33 |
+
self, dataset_name: str, language: str, num_samples: int = 100
|
| 34 |
+
) -> List[Dict[str, Any]]:
|
| 35 |
+
"""
|
| 36 |
+
Load samples from a dataset using streaming to avoid memory issues.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
dataset_name: Name of the dataset to load
|
| 40 |
+
language: Language identifier
|
| 41 |
+
num_samples: Number of samples to load (default: 100)
|
| 42 |
+
|
| 43 |
+
Returns:
|
| 44 |
+
List of sample dictionaries with 'article' and 'summary' keys
|
| 45 |
+
"""
|
| 46 |
+
print(f"Loading {num_samples} samples from {dataset_name} ({language})...")
|
| 47 |
+
|
| 48 |
+
samples = []
|
| 49 |
+
successful_loads = 0
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
# Configure dataset loading
|
| 53 |
+
if dataset_name == "cnn_dailymail":
|
| 54 |
+
dataset = load_dataset(
|
| 55 |
+
dataset_name, "3.0.0", streaming=True, split="test"
|
| 56 |
+
)
|
| 57 |
+
article_key = "article"
|
| 58 |
+
summary_key = "highlights"
|
| 59 |
+
elif dataset_name == "mlsum":
|
| 60 |
+
# MLSUM needs trust_remote_code=True
|
| 61 |
+
dataset = load_dataset(
|
| 62 |
+
dataset_name,
|
| 63 |
+
language,
|
| 64 |
+
streaming=True,
|
| 65 |
+
split="test",
|
| 66 |
+
trust_remote_code=True,
|
| 67 |
+
)
|
| 68 |
+
article_key = "text"
|
| 69 |
+
summary_key = "summary"
|
| 70 |
+
else:
|
| 71 |
+
raise ValueError(f"Unsupported dataset: {dataset_name}")
|
| 72 |
+
|
| 73 |
+
# Stream and collect samples until we get exactly num_samples successful ones
|
| 74 |
+
for sample in dataset:
|
| 75 |
+
try:
|
| 76 |
+
# Extract article and summary
|
| 77 |
+
article = sample.get(article_key, "").strip()
|
| 78 |
+
summary = sample.get(summary_key, "").strip()
|
| 79 |
+
|
| 80 |
+
# Skip if either article or summary is empty
|
| 81 |
+
if not article or not summary:
|
| 82 |
+
continue
|
| 83 |
+
|
| 84 |
+
samples.append({"article": article, "summary": summary})
|
| 85 |
+
|
| 86 |
+
successful_loads += 1 # Progress logging every 10 samples
|
| 87 |
+
if successful_loads % 10 == 0:
|
| 88 |
+
print(f" Loaded {successful_loads}/{num_samples} samples...")
|
| 89 |
+
|
| 90 |
+
# Stop when we have enough samples
|
| 91 |
+
if successful_loads >= num_samples:
|
| 92 |
+
break
|
| 93 |
+
|
| 94 |
+
except Exception as e:
|
| 95 |
+
print(f" Warning: Skipping sample due to error: {e}")
|
| 96 |
+
continue
|
| 97 |
+
|
| 98 |
+
print(
|
| 99 |
+
f"Successfully loaded {len(samples)} samples from {dataset_name} ({language})\n"
|
| 100 |
+
)
|
| 101 |
+
return samples
|
| 102 |
+
|
| 103 |
+
except Exception as e:
|
| 104 |
+
print(f"Error loading dataset {dataset_name} ({language}): {e}")
|
| 105 |
+
return []
|
| 106 |
+
|
| 107 |
+
def evaluate_sample(self, article: str, reference_summary: str) -> Dict[str, Any]:
|
| 108 |
+
"""
|
| 109 |
+
Evaluate a single article-summary pair.
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
article: Input article text
|
| 113 |
+
reference_summary: Ground truth summary
|
| 114 |
+
|
| 115 |
+
Returns:
|
| 116 |
+
Dictionary with evaluation results
|
| 117 |
+
"""
|
| 118 |
+
try:
|
| 119 |
+
# Generate summary using the summarizer
|
| 120 |
+
result = self.summarizer.summarize(article)
|
| 121 |
+
|
| 122 |
+
if result["error"]:
|
| 123 |
+
return {
|
| 124 |
+
"success": False,
|
| 125 |
+
"error": result["error"],
|
| 126 |
+
"rouge1_f": 0.0,
|
| 127 |
+
"rouge2_f": 0.0,
|
| 128 |
+
"rougeL_f": 0.0,
|
| 129 |
+
"generated_summary": None,
|
| 130 |
+
"detected_language": result.get("detected_language_ld"),
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
generated_summary = result["final_summary"]
|
| 134 |
+
if not generated_summary:
|
| 135 |
+
return {
|
| 136 |
+
"success": False,
|
| 137 |
+
"error": "No summary generated",
|
| 138 |
+
"rouge1_f": 0.0,
|
| 139 |
+
"rouge2_f": 0.0,
|
| 140 |
+
"rougeL_f": 0.0,
|
| 141 |
+
"generated_summary": None,
|
| 142 |
+
"detected_language": result.get("detected_language_ld"),
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
# Calculate ROUGE scores
|
| 146 |
+
scores = self.rouge_scorer.score(reference_summary, generated_summary)
|
| 147 |
+
|
| 148 |
+
return {
|
| 149 |
+
"success": True,
|
| 150 |
+
"error": None,
|
| 151 |
+
"rouge1_f": scores["rouge1"].fmeasure,
|
| 152 |
+
"rouge2_f": scores["rouge2"].fmeasure,
|
| 153 |
+
"rougeL_f": scores["rougeL"].fmeasure,
|
| 154 |
+
"generated_summary": generated_summary,
|
| 155 |
+
"detected_language": result.get("detected_language_ld"),
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
except Exception as e:
|
| 159 |
+
return {
|
| 160 |
+
"success": False,
|
| 161 |
+
"error": str(e),
|
| 162 |
+
"rouge1_f": 0.0,
|
| 163 |
+
"rouge2_f": 0.0,
|
| 164 |
+
"rougeL_f": 0.0,
|
| 165 |
+
"generated_summary": None,
|
| 166 |
+
"detected_language": None,
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
def evaluate_dataset(
|
| 170 |
+
self, dataset_name: str, language: str, num_samples: int = 25
|
| 171 |
+
) -> Dict[str, Any]:
|
| 172 |
+
"""
|
| 173 |
+
Evaluate summarizer on a complete dataset.
|
| 174 |
+
|
| 175 |
+
Args:
|
| 176 |
+
dataset_name: Name of the dataset
|
| 177 |
+
language: Language identifier
|
| 178 |
+
num_samples: Number of samples to evaluate
|
| 179 |
+
|
| 180 |
+
Returns:
|
| 181 |
+
Dictionary with aggregated results
|
| 182 |
+
"""
|
| 183 |
+
print(f"=== Evaluating {dataset_name} ({language}) ===")
|
| 184 |
+
|
| 185 |
+
# Load samples
|
| 186 |
+
samples = self.load_dataset_samples(dataset_name, language, num_samples)
|
| 187 |
+
if not samples:
|
| 188 |
+
return {
|
| 189 |
+
"dataset": dataset_name,
|
| 190 |
+
"language": language,
|
| 191 |
+
"total_samples": 0,
|
| 192 |
+
"successful_evaluations": 0,
|
| 193 |
+
"avg_rouge1_f": 0.0,
|
| 194 |
+
"avg_rouge2_f": 0.0,
|
| 195 |
+
"avg_rougeL_f": 0.0,
|
| 196 |
+
"individual_results": [],
|
| 197 |
+
"error": "Failed to load samples",
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
# Evaluate each sample
|
| 201 |
+
individual_results = []
|
| 202 |
+
successful_evaluations = 0
|
| 203 |
+
total_rouge1 = 0.0
|
| 204 |
+
total_rouge2 = 0.0
|
| 205 |
+
total_rougeL = 0.0
|
| 206 |
+
|
| 207 |
+
for i, sample in enumerate(samples):
|
| 208 |
+
try: # Progress logging every 10 evaluations
|
| 209 |
+
if (i + 1) % 10 == 0:
|
| 210 |
+
print(f" Evaluating sample {i + 1}/{len(samples)}...")
|
| 211 |
+
|
| 212 |
+
# Evaluate single sample
|
| 213 |
+
eval_result = self.evaluate_sample(sample["article"], sample["summary"])
|
| 214 |
+
|
| 215 |
+
# Store individual result
|
| 216 |
+
individual_result = {
|
| 217 |
+
"sample_id": i + 1,
|
| 218 |
+
"dataset": dataset_name,
|
| 219 |
+
"language": language,
|
| 220 |
+
"success": eval_result["success"],
|
| 221 |
+
"error": eval_result["error"],
|
| 222 |
+
"rouge1_f": eval_result["rouge1_f"],
|
| 223 |
+
"rouge2_f": eval_result["rouge2_f"],
|
| 224 |
+
"rougeL_f": eval_result["rougeL_f"],
|
| 225 |
+
"detected_language": eval_result["detected_language"],
|
| 226 |
+
"reference_summary": (
|
| 227 |
+
sample["summary"][:200] + "..."
|
| 228 |
+
if len(sample["summary"]) > 200
|
| 229 |
+
else sample["summary"]
|
| 230 |
+
),
|
| 231 |
+
"generated_summary": (
|
| 232 |
+
eval_result["generated_summary"][:200] + "..."
|
| 233 |
+
if eval_result["generated_summary"]
|
| 234 |
+
and len(eval_result["generated_summary"]) > 200
|
| 235 |
+
else eval_result["generated_summary"]
|
| 236 |
+
),
|
| 237 |
+
}
|
| 238 |
+
individual_results.append(individual_result)
|
| 239 |
+
|
| 240 |
+
# Accumulate scores for successful evaluations
|
| 241 |
+
if eval_result["success"]:
|
| 242 |
+
successful_evaluations += 1
|
| 243 |
+
total_rouge1 += eval_result["rouge1_f"]
|
| 244 |
+
total_rouge2 += eval_result["rouge2_f"]
|
| 245 |
+
total_rougeL += eval_result["rougeL_f"]
|
| 246 |
+
|
| 247 |
+
except Exception as e:
|
| 248 |
+
print(f" Warning: Error evaluating sample {i + 1}: {e}")
|
| 249 |
+
individual_results.append(
|
| 250 |
+
{
|
| 251 |
+
"sample_id": i + 1,
|
| 252 |
+
"dataset": dataset_name,
|
| 253 |
+
"language": language,
|
| 254 |
+
"success": False,
|
| 255 |
+
"error": str(e),
|
| 256 |
+
"rouge1_f": 0.0,
|
| 257 |
+
"rouge2_f": 0.0,
|
| 258 |
+
"rougeL_f": 0.0,
|
| 259 |
+
"detected_language": None,
|
| 260 |
+
"reference_summary": (
|
| 261 |
+
sample["summary"][:200] + "..."
|
| 262 |
+
if len(sample["summary"]) > 200
|
| 263 |
+
else sample["summary"]
|
| 264 |
+
),
|
| 265 |
+
"generated_summary": None,
|
| 266 |
+
}
|
| 267 |
+
)
|
| 268 |
+
continue
|
| 269 |
+
|
| 270 |
+
# Calculate averages
|
| 271 |
+
avg_rouge1 = (
|
| 272 |
+
total_rouge1 / successful_evaluations if successful_evaluations > 0 else 0.0
|
| 273 |
+
)
|
| 274 |
+
avg_rouge2 = (
|
| 275 |
+
total_rouge2 / successful_evaluations if successful_evaluations > 0 else 0.0
|
| 276 |
+
)
|
| 277 |
+
avg_rougeL = (
|
| 278 |
+
total_rougeL / successful_evaluations if successful_evaluations > 0 else 0.0
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
print(
|
| 282 |
+
f"Completed evaluation: {successful_evaluations}/{len(samples)} successful"
|
| 283 |
+
)
|
| 284 |
+
print(f"Average ROUGE-1: {avg_rouge1:.4f}")
|
| 285 |
+
print(f"Average ROUGE-2: {avg_rouge2:.4f}")
|
| 286 |
+
print(f"Average ROUGE-L: {avg_rougeL:.4f}\n")
|
| 287 |
+
|
| 288 |
+
return {
|
| 289 |
+
"dataset": dataset_name,
|
| 290 |
+
"language": language,
|
| 291 |
+
"total_samples": len(samples),
|
| 292 |
+
"successful_evaluations": successful_evaluations,
|
| 293 |
+
"avg_rouge1_f": avg_rouge1,
|
| 294 |
+
"avg_rouge2_f": avg_rouge2,
|
| 295 |
+
"avg_rougeL_f": avg_rougeL,
|
| 296 |
+
"individual_results": individual_results,
|
| 297 |
+
"error": None,
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
def run_full_evaluation(self, num_samples: int = 25) -> str:
|
| 301 |
+
"""
|
| 302 |
+
Run complete evaluation pipeline on all datasets.
|
| 303 |
+
|
| 304 |
+
Args:
|
| 305 |
+
num_samples: Number of samples per dataset
|
| 306 |
+
|
| 307 |
+
Returns:
|
| 308 |
+
Path to the saved results CSV file
|
| 309 |
+
"""
|
| 310 |
+
print(
|
| 311 |
+
"🚀 Starting Full Multilingual Evaluation Pipeline 🚀\n"
|
| 312 |
+
) # Define datasets to evaluate
|
| 313 |
+
datasets_config = [
|
| 314 |
+
{"dataset_name": "cnn_dailymail", "language": "en"},
|
| 315 |
+
{"dataset_name": "mlsum", "language": "fr"},
|
| 316 |
+
]
|
| 317 |
+
|
| 318 |
+
all_summary_results = []
|
| 319 |
+
|
| 320 |
+
# Evaluate each dataset
|
| 321 |
+
for config in datasets_config:
|
| 322 |
+
dataset_result = self.evaluate_dataset(
|
| 323 |
+
config["dataset_name"], config["language"], num_samples
|
| 324 |
+
)
|
| 325 |
+
|
| 326 |
+
# Save individual CSV immediately after each language evaluation
|
| 327 |
+
individual_csv_path = self.save_individual_results_to_csv(
|
| 328 |
+
dataset_result["individual_results"],
|
| 329 |
+
dataset_result["dataset"],
|
| 330 |
+
dataset_result["language"],
|
| 331 |
+
)
|
| 332 |
+
print(
|
| 333 |
+
f"✅ Saved {dataset_result['language'].upper()} results to: {individual_csv_path}"
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
# Store summary results
|
| 337 |
+
all_summary_results.append(
|
| 338 |
+
{
|
| 339 |
+
"dataset": dataset_result["dataset"],
|
| 340 |
+
"language": dataset_result["language"],
|
| 341 |
+
"total_samples": dataset_result["total_samples"],
|
| 342 |
+
"successful_evaluations": dataset_result["successful_evaluations"],
|
| 343 |
+
"success_rate": (
|
| 344 |
+
dataset_result["successful_evaluations"]
|
| 345 |
+
/ dataset_result["total_samples"]
|
| 346 |
+
if dataset_result["total_samples"] > 0
|
| 347 |
+
else 0.0
|
| 348 |
+
),
|
| 349 |
+
"avg_rouge1_f": dataset_result["avg_rouge1_f"],
|
| 350 |
+
"avg_rouge2_f": dataset_result["avg_rouge2_f"],
|
| 351 |
+
"avg_rougeL_f": dataset_result["avg_rougeL_f"],
|
| 352 |
+
"error": dataset_result["error"],
|
| 353 |
+
}
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
# Save combined summary CSV
|
| 357 |
+
summary_csv_path = self.save_summary_results_to_csv(
|
| 358 |
+
all_summary_results
|
| 359 |
+
) # Print final summary
|
| 360 |
+
print("📊 FINAL EVALUATION SUMMARY 📊")
|
| 361 |
+
print("=" * 50)
|
| 362 |
+
for result in all_summary_results:
|
| 363 |
+
print(f"{result['dataset']} ({result['language'].upper()}):")
|
| 364 |
+
print(
|
| 365 |
+
f" Success Rate: {result['success_rate']:.1%} ({result['successful_evaluations']}/{result['total_samples']})"
|
| 366 |
+
)
|
| 367 |
+
print(f" ROUGE-1: {result['avg_rouge1_f']:.4f}")
|
| 368 |
+
print(f" ROUGE-2: {result['avg_rouge2_f']:.4f}")
|
| 369 |
+
print(f" ROUGE-L: {result['avg_rougeL_f']:.4f}")
|
| 370 |
+
print()
|
| 371 |
+
|
| 372 |
+
print(f"✅ Combined summary saved to: {summary_csv_path}")
|
| 373 |
+
return summary_csv_path
|
| 374 |
+
|
| 375 |
+
def save_individual_results_to_csv(
|
| 376 |
+
self, individual_results: List[Dict], dataset_name: str, language: str
|
| 377 |
+
) -> str:
|
| 378 |
+
"""
|
| 379 |
+
Save individual evaluation results to a language-specific CSV file with summary row.
|
| 380 |
+
|
| 381 |
+
Args:
|
| 382 |
+
individual_results: List of individual sample results
|
| 383 |
+
dataset_name: Name of the dataset
|
| 384 |
+
language: Language identifier
|
| 385 |
+
|
| 386 |
+
Returns:
|
| 387 |
+
Path to the saved CSV file
|
| 388 |
+
"""
|
| 389 |
+
# Create filename
|
| 390 |
+
csv_path = f"evaluation_results_{language}.csv"
|
| 391 |
+
|
| 392 |
+
with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
|
| 393 |
+
if individual_results:
|
| 394 |
+
fieldnames = individual_results[0].keys()
|
| 395 |
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
| 396 |
+
writer.writeheader()
|
| 397 |
+
writer.writerows(individual_results)
|
| 398 |
+
|
| 399 |
+
# Add summary row
|
| 400 |
+
successful_results = [r for r in individual_results if r["success"]]
|
| 401 |
+
if successful_results:
|
| 402 |
+
avg_rouge1 = sum(r["rouge1_f"] for r in successful_results) / len(
|
| 403 |
+
successful_results
|
| 404 |
+
)
|
| 405 |
+
avg_rouge2 = sum(r["rouge2_f"] for r in successful_results) / len(
|
| 406 |
+
successful_results
|
| 407 |
+
)
|
| 408 |
+
avg_rougeL = sum(r["rougeL_f"] for r in successful_results) / len(
|
| 409 |
+
successful_results
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
+
summary_row = {
|
| 413 |
+
"sample_id": "SUMMARY",
|
| 414 |
+
"dataset": dataset_name,
|
| 415 |
+
"language": language,
|
| 416 |
+
"success": f"{len(successful_results)}/{len(individual_results)}",
|
| 417 |
+
"error": None,
|
| 418 |
+
"rouge1_f": avg_rouge1,
|
| 419 |
+
"rouge2_f": avg_rouge2,
|
| 420 |
+
"rougeL_f": avg_rougeL,
|
| 421 |
+
"detected_language": None,
|
| 422 |
+
"reference_summary": "AVERAGE SCORES",
|
| 423 |
+
"generated_summary": f"Success Rate: {len(successful_results)/len(individual_results):.1%}",
|
| 424 |
+
}
|
| 425 |
+
writer.writerow(summary_row)
|
| 426 |
+
|
| 427 |
+
return csv_path
|
| 428 |
+
|
| 429 |
+
def save_summary_results_to_csv(self, summary_results: List[Dict]) -> str:
|
| 430 |
+
"""
|
| 431 |
+
Save aggregated summary results to CSV file.
|
| 432 |
+
|
| 433 |
+
Args:
|
| 434 |
+
summary_results: List of aggregated dataset results
|
| 435 |
+
|
| 436 |
+
Returns:
|
| 437 |
+
Path to the saved CSV file
|
| 438 |
+
"""
|
| 439 |
+
summary_csv_path = "evaluation_results_summary.csv"
|
| 440 |
+
with open(summary_csv_path, "w", newline="", encoding="utf-8") as csvfile:
|
| 441 |
+
if summary_results:
|
| 442 |
+
fieldnames = summary_results[0].keys()
|
| 443 |
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
| 444 |
+
writer.writeheader()
|
| 445 |
+
writer.writerows(summary_results)
|
| 446 |
+
|
| 447 |
+
return summary_csv_path
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
def main():
|
| 451 |
+
"""Main evaluation function."""
|
| 452 |
+
try:
|
| 453 |
+
evaluator = SummarizerEvaluator()
|
| 454 |
+
csv_path = evaluator.run_full_evaluation(num_samples=25)
|
| 455 |
+
print(f"\n🎉 Evaluation completed successfully!")
|
| 456 |
+
print(f"Results available in: {csv_path}")
|
| 457 |
+
|
| 458 |
+
except Exception as e:
|
| 459 |
+
print(f"❌ Evaluation failed: {e}")
|
| 460 |
+
import traceback
|
| 461 |
+
|
| 462 |
+
traceback.print_exc()
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
if __name__ == "__main__":
|
| 466 |
+
main()
|
evaluation_dashboard.py
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Evaluation Results Dashboard for Multilingual News Article Summarizer
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import streamlit as st
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import math
|
| 8 |
+
from typing import Dict, Any
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def load_evaluation_data():
|
| 12 |
+
"""Load evaluation results from CSV files."""
|
| 13 |
+
try:
|
| 14 |
+
# Load English results
|
| 15 |
+
df_en = pd.read_csv("evaluation_results_en.csv")
|
| 16 |
+
|
| 17 |
+
# Load French results
|
| 18 |
+
df_fr = pd.read_csv("evaluation_results_fr.csv")
|
| 19 |
+
|
| 20 |
+
return df_en, df_fr
|
| 21 |
+
except FileNotFoundError as e:
|
| 22 |
+
st.error(f"Could not load evaluation files: {e}")
|
| 23 |
+
return None, None
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def display_summary_metrics(df: pd.DataFrame, title: str):
|
| 27 |
+
"""Display summary metrics in a highlighted card format."""
|
| 28 |
+
# Get summary row (last row with sample_id = 'SUMMARY')
|
| 29 |
+
summary_row = (
|
| 30 |
+
df[df["sample_id"] == "SUMMARY"].iloc[0]
|
| 31 |
+
if len(df[df["sample_id"] == "SUMMARY"]) > 0
|
| 32 |
+
else None
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
if summary_row is not None:
|
| 36 |
+
st.markdown(f"### 📊 {title} - Summary Results")
|
| 37 |
+
|
| 38 |
+
# Create metrics columns with ample spacing
|
| 39 |
+
col1, col2, col3 = st.columns([1, 1, 1])
|
| 40 |
+
|
| 41 |
+
with col1:
|
| 42 |
+
st.metric(
|
| 43 |
+
label="ROUGE-1", value=f"{summary_row['rouge1_f']:.4f}", delta=None
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
with col2:
|
| 47 |
+
st.metric(
|
| 48 |
+
label="ROUGE-2", value=f"{summary_row['rouge2_f']:.4f}", delta=None
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
with col3:
|
| 52 |
+
st.metric(
|
| 53 |
+
label="ROUGE-L", value=f"{summary_row['rougeL_f']:.4f}", delta=None
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
st.markdown("---")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def display_paginated_table(
|
| 60 |
+
df: pd.DataFrame, columns_to_show: list, page_size: int = 15
|
| 61 |
+
):
|
| 62 |
+
"""Display a paginated table with the specified columns."""
|
| 63 |
+
# Filter out summary row for the detailed table
|
| 64 |
+
df_filtered = df[df["sample_id"] != "SUMMARY"].copy()
|
| 65 |
+
df_display = df_filtered[columns_to_show].copy()
|
| 66 |
+
|
| 67 |
+
# Rename columns for better display
|
| 68 |
+
column_rename = {
|
| 69 |
+
"sample_id": "Sample ID",
|
| 70 |
+
"rouge1_f": "ROUGE-1",
|
| 71 |
+
"rouge2_f": "ROUGE-2",
|
| 72 |
+
"rougeL_f": "ROUGE-L",
|
| 73 |
+
"reference_summary": "Reference Summary",
|
| 74 |
+
"generated_summary": "Generated Summary",
|
| 75 |
+
}
|
| 76 |
+
df_display = df_display.rename(columns=column_rename)
|
| 77 |
+
|
| 78 |
+
# Format ROUGE scores to 4 decimal places
|
| 79 |
+
for col in ["ROUGE-1", "ROUGE-2", "ROUGE-L"]:
|
| 80 |
+
if col in df_display.columns:
|
| 81 |
+
df_display[col] = df_display[col].apply(lambda x: f"{x:.4f}")
|
| 82 |
+
|
| 83 |
+
# Calculate pagination
|
| 84 |
+
total_rows = len(df_display)
|
| 85 |
+
total_pages = math.ceil(total_rows / page_size)
|
| 86 |
+
|
| 87 |
+
if total_pages > 1:
|
| 88 |
+
# Page selector
|
| 89 |
+
col1, col2, col3 = st.columns([1, 2, 1])
|
| 90 |
+
with col2:
|
| 91 |
+
page = st.selectbox(
|
| 92 |
+
"Select Page",
|
| 93 |
+
range(1, total_pages + 1),
|
| 94 |
+
format_func=lambda x: f"Page {x} of {total_pages}",
|
| 95 |
+
key="page_selector",
|
| 96 |
+
)
|
| 97 |
+
else:
|
| 98 |
+
page = 1
|
| 99 |
+
|
| 100 |
+
# Calculate start and end indices
|
| 101 |
+
start_idx = (page - 1) * page_size
|
| 102 |
+
end_idx = min(start_idx + page_size, total_rows)
|
| 103 |
+
|
| 104 |
+
# Display page info
|
| 105 |
+
st.caption(f"Showing rows {start_idx + 1}-{end_idx} of {total_rows}")
|
| 106 |
+
|
| 107 |
+
# Display the table
|
| 108 |
+
df_page = df_display.iloc[start_idx:end_idx]
|
| 109 |
+
st.dataframe(
|
| 110 |
+
df_page,
|
| 111 |
+
use_container_width=True,
|
| 112 |
+
hide_index=True,
|
| 113 |
+
column_config={
|
| 114 |
+
"Reference Summary": st.column_config.TextColumn(
|
| 115 |
+
width="medium", help="Ground truth summary from the dataset"
|
| 116 |
+
),
|
| 117 |
+
"Generated Summary": st.column_config.TextColumn(
|
| 118 |
+
width="medium", help="Summary generated by our model"
|
| 119 |
+
),
|
| 120 |
+
"ROUGE-1": st.column_config.NumberColumn(
|
| 121 |
+
help="ROUGE-1 F1 score", format="%.4f"
|
| 122 |
+
),
|
| 123 |
+
"ROUGE-2": st.column_config.NumberColumn(
|
| 124 |
+
help="ROUGE-2 F1 score", format="%.4f"
|
| 125 |
+
),
|
| 126 |
+
"ROUGE-L": st.column_config.NumberColumn(
|
| 127 |
+
help="ROUGE-L F1 score", format="%.4f"
|
| 128 |
+
),
|
| 129 |
+
},
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def display_benchmark_table():
|
| 134 |
+
"""Display the official Pegasus benchmark results."""
|
| 135 |
+
st.markdown("### 🏆 Official Google Pegasus Benchmark Results")
|
| 136 |
+
st.markdown("*ROUGE scores in format: ROUGE-1/ROUGE-2/ROUGE-L*")
|
| 137 |
+
|
| 138 |
+
# Create the benchmark data
|
| 139 |
+
benchmark_data = {
|
| 140 |
+
"Dataset": ["xsum", "cnn_dailymail", "newsroom", "multi_news", "gigaword"],
|
| 141 |
+
"C4": [
|
| 142 |
+
"45.20/22.06/36.99",
|
| 143 |
+
"43.90/21.20/40.76",
|
| 144 |
+
"45.07/33.39/41.28",
|
| 145 |
+
"46.74/17.95/24.26",
|
| 146 |
+
"38.75/19.96/36.14",
|
| 147 |
+
],
|
| 148 |
+
"HugeNews": [
|
| 149 |
+
"47.21/24.56/39.25",
|
| 150 |
+
"44.17/21.47/41.11",
|
| 151 |
+
"45.15/33.51/41.33",
|
| 152 |
+
"47.52/18.72/24.91",
|
| 153 |
+
"39.12/19.86/36.24",
|
| 154 |
+
],
|
| 155 |
+
"Mixed & Stochastic": [
|
| 156 |
+
"47.60/24.83/39.64",
|
| 157 |
+
"44.16/21.56/41.30",
|
| 158 |
+
"45.98/34.20/42.18",
|
| 159 |
+
"47.65/18.75/24.95",
|
| 160 |
+
"39.65/20.47/36.76",
|
| 161 |
+
],
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
df_benchmark = pd.DataFrame(benchmark_data)
|
| 165 |
+
|
| 166 |
+
st.dataframe(
|
| 167 |
+
df_benchmark,
|
| 168 |
+
use_container_width=True,
|
| 169 |
+
hide_index=True,
|
| 170 |
+
column_config={
|
| 171 |
+
"Dataset": st.column_config.TextColumn(
|
| 172 |
+
width="medium", help="Evaluation dataset"
|
| 173 |
+
),
|
| 174 |
+
"C4": st.column_config.TextColumn(
|
| 175 |
+
width="medium", help="C4 pre-training configuration"
|
| 176 |
+
),
|
| 177 |
+
"HugeNews": st.column_config.TextColumn(
|
| 178 |
+
width="medium", help="HugeNews pre-training configuration"
|
| 179 |
+
),
|
| 180 |
+
"Mixed & Stochastic": st.column_config.TextColumn(
|
| 181 |
+
width="medium", help="Mixed & Stochastic pre-training configuration"
|
| 182 |
+
),
|
| 183 |
+
},
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def show_evaluation_page():
|
| 188 |
+
"""Evaluation dashboard page function for navigation."""
|
| 189 |
+
st.title("📊 Evaluation Results Dashboard")
|
| 190 |
+
st.markdown(
|
| 191 |
+
"*Comprehensive evaluation results for the Multilingual News Article Summarizer*"
|
| 192 |
+
)
|
| 193 |
+
st.markdown("---")
|
| 194 |
+
|
| 195 |
+
# Load data
|
| 196 |
+
df_en, df_fr = load_evaluation_data()
|
| 197 |
+
|
| 198 |
+
if df_en is None or df_fr is None:
|
| 199 |
+
st.error(
|
| 200 |
+
"⚠️ Could not load evaluation data. Please ensure evaluation CSV files are present."
|
| 201 |
+
)
|
| 202 |
+
return # Create selection dropdown
|
| 203 |
+
st.markdown("### 🔍 Select Evaluation Results to View")
|
| 204 |
+
|
| 205 |
+
# Use dropdown for clean, modern selection
|
| 206 |
+
option = st.selectbox(
|
| 207 |
+
"Choose an option:",
|
| 208 |
+
[
|
| 209 |
+
"🏆 Official Google Pegasus Benchmark Results",
|
| 210 |
+
"🇺🇸 Our English Evaluation (CNN/DailyMail)",
|
| 211 |
+
"🇫🇷 Our French Evaluation (MLSUM)",
|
| 212 |
+
],
|
| 213 |
+
index=0,
|
| 214 |
+
key="evaluation_option",
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
st.markdown("---") # Display content based on selection
|
| 218 |
+
if option == "🏆 Official Google Pegasus Benchmark Results":
|
| 219 |
+
display_benchmark_table()
|
| 220 |
+
|
| 221 |
+
# Page-specific disclaimer
|
| 222 |
+
st.markdown("---")
|
| 223 |
+
st.info(
|
| 224 |
+
"📖 **Additional Information**: For more details about the Pegasus model, visit the [official HuggingFace model page](https://huggingface.co/google/pegasus-cnn_dailymail)."
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
elif option == "🇺🇸 Our English Evaluation (CNN/DailyMail)":
|
| 228 |
+
# Display summary metrics first
|
| 229 |
+
display_summary_metrics(df_en, "English (CNN/DailyMail)")
|
| 230 |
+
|
| 231 |
+
# Display detailed results
|
| 232 |
+
st.markdown("### 📝 Detailed Sample Results")
|
| 233 |
+
columns_to_show = [
|
| 234 |
+
"sample_id",
|
| 235 |
+
"rouge1_f",
|
| 236 |
+
"rouge2_f",
|
| 237 |
+
"rougeL_f",
|
| 238 |
+
"reference_summary",
|
| 239 |
+
"generated_summary",
|
| 240 |
+
]
|
| 241 |
+
display_paginated_table(df_en, columns_to_show)
|
| 242 |
+
|
| 243 |
+
# Page-specific disclaimer
|
| 244 |
+
st.markdown("---")
|
| 245 |
+
st.warning(
|
| 246 |
+
"⚠️ **Disclaimer**: ROUGE scores shown are based on a small test set of 25 articles per dataset, due to time and computational constraints. These results are indicative but not fully representative. Performance is expected to improve with larger, more comprehensive test sets."
|
| 247 |
+
)
|
| 248 |
+
st.info(
|
| 249 |
+
"📖 **Dataset Information**: For more details about the CNN/DailyMail dataset used in this evaluation, visit the [official dataset page](https://huggingface.co/datasets/abisee/cnn_dailymail)."
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
elif option == "🇫🇷 Our French Evaluation (MLSUM)":
|
| 253 |
+
# Display summary metrics first
|
| 254 |
+
display_summary_metrics(df_fr, "French (MLSUM)")
|
| 255 |
+
|
| 256 |
+
# Display detailed results
|
| 257 |
+
st.markdown("### 📝 Detailed Sample Results")
|
| 258 |
+
columns_to_show = [
|
| 259 |
+
"sample_id",
|
| 260 |
+
"rouge1_f",
|
| 261 |
+
"rouge2_f",
|
| 262 |
+
"rougeL_f",
|
| 263 |
+
"reference_summary",
|
| 264 |
+
"generated_summary",
|
| 265 |
+
]
|
| 266 |
+
display_paginated_table(df_fr, columns_to_show) # Page-specific disclaimer
|
| 267 |
+
st.markdown("---")
|
| 268 |
+
st.warning(
|
| 269 |
+
"⚠️ **Disclaimer**: Evaluations for non-English summaries (e.g., French) tend to be lower than for English ones primarily due to cascading errors introduced during the machine translation step. Our translation model, while generally good, is a distilled, research-focused version and not intended for production deployment. This means it can struggle with the nuances of news articles, introducing inaccuracies or losing subtle context. These translation imperfections are then amplified when fed into the English-optimized summarization model, often resulting in less precise content in the final summary. For more details about the translation model limitations and specifications, visit the [NLLB-200 distilled model page](https://huggingface.co/facebook/nllb-200-distilled-600M)."
|
| 270 |
+
)
|
| 271 |
+
st.info(
|
| 272 |
+
"📖 **Dataset Information**: For more details about the MLSUM dataset used in this evaluation, visit the [official dataset page](https://huggingface.co/datasets/reciTAL/mlsum)."
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
# For backwards compatibility when run directly
|
| 277 |
+
def main():
|
| 278 |
+
"""Main function for backwards compatibility."""
|
| 279 |
+
st.set_page_config(
|
| 280 |
+
page_title="Evaluation Results Dashboard", page_icon="📊", layout="wide"
|
| 281 |
+
)
|
| 282 |
+
show_evaluation_page()
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
if __name__ == "__main__":
|
| 286 |
+
main()
|
evaluation_results_en.csv
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sample_id,dataset,language,success,error,rouge1_f,rouge2_f,rougeL_f,detected_language,reference_summary,generated_summary
|
| 2 |
+
1,cnn_dailymail,en,True,,0.39999999999999997,0.22727272727272727,0.33333333333333337,en,"Membership gives the ICC jurisdiction over alleged crimes committed in Palestinian territories since last June .
|
| 3 |
+
Israel and the United States opposed the move, which could open the door to war crimes ...","The Palestinian Authority officially becomes the 123rd member of the International Criminal Court.
|
| 4 |
+
The move gives the court jurisdiction over alleged crimes in Palestinian territories. The United Stat..."
|
| 5 |
+
2,cnn_dailymail,en,True,,0.4888888888888889,0.31818181818181823,0.4444444444444445,en,"Theia, a bully breed mix, was apparently hit by a car, whacked with a hammer and buried in a field .
|
| 6 |
+
""She's a true miracle dog and she deserves a good life,"" says Sara Mellado, who is looking for a ho...","Theia, a white-and-black bully breed mix, was apparently hit by a car and buried in a field.
|
| 7 |
+
Four days later, she staggered to a nearby farm and was taken in by a worker.
|
| 8 |
+
She's been treated at Washing..."
|
| 9 |
+
3,cnn_dailymail,en,True,,0.4672897196261682,0.24761904761904763,0.26168224299065423,en,"Mohammad Javad Zarif has spent more time with John Kerry than any other foreign minister .
|
| 10 |
+
He once participated in a takeover of the Iranian Consulate in San Francisco .
|
| 11 |
+
The Iranian foreign minister t...","Mohammad Javad Zarif is the Iranian foreign minister.
|
| 12 |
+
He has been the U.S. secretary of state's opposite number in nuclear talks. Iranian Foreign Minister Javad Zarif and U.S. Secretary of State John ..."
|
| 13 |
+
4,cnn_dailymail,en,True,,0.3561643835616438,0.16901408450704225,0.24657534246575344,en,"17 Americans were exposed to the Ebola virus while in Sierra Leone in March .
|
| 14 |
+
Another person was diagnosed with the disease and taken to hospital in Maryland .
|
| 15 |
+
National Institutes of Health says the p...","One of the five had a heart-related issue on Saturday and has been discharged but hasn't left the area.
|
| 16 |
+
They were exposed to Ebola in Sierra Leone in March."
|
| 17 |
+
5,cnn_dailymail,en,True,,0.3611111111111111,0.1142857142857143,0.2222222222222222,en,"Student is no longer on Duke University campus and will face disciplinary review .
|
| 18 |
+
School officials identified student during investigation and the person admitted to hanging the noose, Duke says .
|
| 19 |
+
Th...","A student has admitted to hanging a noose from a tree near a student union, Duke says.
|
| 20 |
+
The prestigious private school didn't identify the student, citing federal privacy laws."
|
| 21 |
+
6,cnn_dailymail,en,True,,0.22988505747126436,0.07058823529411765,0.1839080459770115,en,"College-bound basketball star asks girl with Down syndrome to high school prom .
|
| 22 |
+
Pictures of the two during the ""prom-posal"" have gone viral .","Trey Moses is a star high school basketball player in Louisville, Kentucky.
|
| 23 |
+
He made the prom-posal in Ellie Meredith's P.E. class.
|
| 24 |
+
""That's the kind of person Trey is,"" a teacher says. Trey's future co..."
|
| 25 |
+
7,cnn_dailymail,en,True,,0.36363636363636365,0.13793103448275862,0.21590909090909088,en,"Amnesty's annual death penalty report catalogs encouraging signs, but setbacks in numbers of those sentenced to death .
|
| 26 |
+
Organization claims that governments around the world are using the threat of te...","Amnesty International's annual death penalty report says number of executions fell by 22% in 2014.
|
| 27 |
+
But mass-sentencing cases increased by 28% globally last year.
|
| 28 |
+
Pakistan, China and Egypt are cited as..."
|
| 29 |
+
8,cnn_dailymail,en,True,,0.358974358974359,0.15789473684210528,0.33333333333333337,en,"Andrew Getty's death appears to be from natural causes, police say, citing coroner's early assessment .
|
| 30 |
+
In a petition for a restraining order, Getty had written he had a serious medical condition.
|
| 31 |
+
Pol...","Andrew Getty appears to have died of natural causes, police say.
|
| 32 |
+
Coroner's preliminary assessment is there was no foul play involved, detective says.
|
| 33 |
+
The 47-year-old had ""several health issues,"" detec..."
|
| 34 |
+
9,cnn_dailymail,en,True,,0.23076923076923078,0.08,0.15384615384615385,en,"Once a super typhoon, Maysak is now a tropical storm with 70 mph winds .
|
| 35 |
+
It could still cause flooding, landslides and other problems in the Philippines .","Tropical storm Maysak is centered 200 miles southwest of Aurora province.
|
| 36 |
+
It's expected to make landfall Sunday morning on the southeastern coast of Isabela province."
|
| 37 |
+
10,cnn_dailymail,en,True,,0.5365853658536586,0.30769230769230765,0.3902439024390244,en,"Bob Barker returned to host ""The Price Is Right"" on Wednesday .
|
| 38 |
+
Barker, 91, had retired as host in 2007 .","Barker hosted ""The Price Is Right"" for 35 years before stepping down in 2007.
|
| 39 |
+
He returned to the show on April 1."
|
| 40 |
+
11,cnn_dailymail,en,True,,0.24242424242424243,0.0625,0.18181818181818182,en,"London's Metropolitan Police say the man was arrested at Luton airport after landing on a flight from Istanbul .
|
| 41 |
+
He's been charged with terror offenses allegedly committed since the start of November ...","Yahya Rashid is charged with engaging in conduct in preparation of acts of terrorism.
|
| 42 |
+
He's also charged with engaging in conduct with the intention of assisting others to commit acts of terrorism."
|
| 43 |
+
12,cnn_dailymail,en,True,,0.2765957446808511,0.10869565217391305,0.19148936170212763,en,"""Furious 7"" pays tribute to star Paul Walker, who died during filming .
|
| 44 |
+
Vin Diesel: ""This movie is more than a movie""
|
| 45 |
+
""Furious 7"" opens Friday .","Paul Walker died in a car crash while filming ""Furious 7""
|
| 46 |
+
Fans have been paying tribute to the late actor. A week after Walker's death, about 5,000 people attended an outdoor memorial to him in Los An..."
|
| 47 |
+
13,cnn_dailymail,en,True,,0.31775700934579443,0.15238095238095237,0.205607476635514,en,"Museum: Anne Frank died earlier than previously believed .
|
| 48 |
+
Researchers re-examined archives and testimonies of survivors .
|
| 49 |
+
Anne and older sister Margot Frank are believed to have died in February 1945...","Anne Frank died of typhus in a Nazi concentration camp at the age of 15.
|
| 50 |
+
Two weeks after her supposed death, the Bergen-Belsen camp was liberated.
|
| 51 |
+
New research shows Anne and her older sister, Margot ..."
|
| 52 |
+
14,cnn_dailymail,en,True,,0.2346368715083799,0.03389830508474576,0.1340782122905028,en,"LZ: Indiana law pushing back LGBT rights, and other states' anti-LGBT moves, bow to far right wing that GOP candidates need for 2016 .
|
| 53 |
+
Cruz, Huckabee, Jindal, Carson, Walker are reviving culture wars,...","Indiana Gov. Mike Pence signed a religious freedom law last week that opens the door to discrimination against gays and lesbians.
|
| 54 |
+
John Avlon: The law was Pence's way of shoring up his street cred with..."
|
| 55 |
+
15,cnn_dailymail,en,True,,0.37209302325581395,0.1951219512195122,0.37209302325581395,en,"Singing the national anthem is a risky proposition .
|
| 56 |
+
Whitney Houston nailed it; Roseanne Barr destroyed it .","Mtley Cre's Vince Neil sang the national anthem at a Las Vegas football game.
|
| 57 |
+
Whitney Houston, Roseanne Barr and Jimi Hendrix have all butchered the song."
|
| 58 |
+
16,cnn_dailymail,en,True,,0.15384615384615383,0.01111111111111111,0.07692307692307691,en,"While Republican Gov. Asa Hutchinson was weighing an Arkansas religious freedom bill, Walmart voiced its opposition .
|
| 59 |
+
Walmart and other high-profile businesses are showing their support for gay and le...","Walmart's opposition to a religious freedom law in its home state of Arkansas resonated most deeply.
|
| 60 |
+
The company is emerging as a bellwether for shifting public opinion on hot-button political issues...."
|
| 61 |
+
17,cnn_dailymail,en,True,,0.30158730158730157,0.17741935483870966,0.253968253968254,en,"Amnesty International releases its annual review of the death penalty worldwide; much of it makes for grim reading .
|
| 62 |
+
Salil Shetty: Countries that use executions to deal with problems are on the wrong ...","Amnesty International releases its annual review of the death penalty worldwide.
|
| 63 |
+
In Pakistan, the government lifted a six-year moratorium on the execution of civilians.
|
| 64 |
+
A sharp spike in death sentence..."
|
| 65 |
+
18,cnn_dailymail,en,True,,0.26666666666666666,0.10762331838565023,0.1688888888888889,en,"Marseille prosecutor says ""so far no videos were used in the crash investigation"" despite media reports .
|
| 66 |
+
Journalists at Bild and Paris Match are ""very confident"" the video clip is real, an editor say...","""So far no videos were used in the crash investigation,"" Marseille prosecutor Brice Robin says.
|
| 67 |
+
Paris Match and Bild report that a cell phone video was found at the crash site.
|
| 68 |
+
The publications descri..."
|
| 69 |
+
19,cnn_dailymail,en,True,,0.33644859813084116,0.11428571428571428,0.24299065420560745,en,"The Rev. Robert Schuller, 88, had been diagnosed with esophageal cancer in 2013 .
|
| 70 |
+
His TV show, ""Hour of Power,"" was enormously popular in the 1970s and 1980s .","Schuller was diagnosed with esophageal cancer in August 2013.
|
| 71 |
+
He was born in an Iowa farmhouse without running water. Schuller began broadcasting ""Hour of Power"" in 1970.
|
| 72 |
+
The show, which ran for decad..."
|
| 73 |
+
20,cnn_dailymail,en,True,,0.30303030303030304,0.03125,0.18181818181818182,en,"Former GOP representative compares President Obama to Andreas Lubitz .
|
| 74 |
+
Bachmann said with possible Iran deal, Obama will fly ""entire nation into the rocks""
|
| 75 |
+
Reaction on social media? She was blasted by...","""With his Iran deal, Barack Obama is for the 300 million souls of the United States,"" she wrote.
|
| 76 |
+
Bachmann is no stranger to voicing her opinion on the President's dealing with Iran."
|
| 77 |
+
21,cnn_dailymail,en,True,,0.3503649635036496,0.16296296296296298,0.24817518248175185,en,"Father: ""I know he went through what he went through""
|
| 78 |
+
Louis Jordan was found on his sailboat, which was listing and in bad shape, rescuer says .
|
| 79 |
+
He appears to be in good shape, physically and mentally...","Louis Jordan, 37, left Conway, South Carolina, to fish in the ocean.
|
| 80 |
+
Just a few days into his trip, a storm capsized his boat and broke his mast. The Coast Guard says it has found no reason to doubt J..."
|
| 81 |
+
22,cnn_dailymail,en,True,,0.24858757062146897,0.057142857142857134,0.1581920903954802,en,"Richard Klass: Iran framework agreement on nukes is strong, but opponents will cast doubts on this and try to obscure its facts .
|
| 82 |
+
He says the deal would cut uranium stockpile, centrifuges, implement r...","U.S., Iran reach framework deal to limit Iran's nuclear program.
|
| 83 |
+
Aaron Miller: Opponents will likely raise questions about the agreement. David Rothkopf: Some in Congress are skeptical of Iran nuclear..."
|
| 84 |
+
23,cnn_dailymail,en,True,,0.3178807947019867,0.053691275167785234,0.15894039735099336,en,"Americans paid more for some fruits and vegetables last year because of the drought .
|
| 85 |
+
Tourists will now have to ask for a glass of water at a California restaurant .
|
| 86 |
+
Perhaps the only good thing is ano...","California is a breadbasket to the nation, growing more than a third of its vegetables and nearly two-thirds of its fruits and nuts.
|
| 87 |
+
The historic California drought hurts the rest of the union, too. T..."
|
| 88 |
+
24,cnn_dailymail,en,True,,0.711864406779661,0.5614035087719298,0.4067796610169491,en,"The FBI cites social media messages sent by Keonna Thomas, 30 .
|
| 89 |
+
She's accused of trying to travel overseas to join ISIS .
|
| 90 |
+
Thomas is one of three women facing federal terror charges this week .","Keonna Thomas is one of three women arrested this week on terror charges.
|
| 91 |
+
She's accused of trying to travel overseas to fight for ISIS."
|
| 92 |
+
25,cnn_dailymail,en,True,,0.4074074074074074,0.2692307692307692,0.37037037037037035,en,"Iranian sports official: The ban will be lifted for some events in the coming year .
|
| 93 |
+
But he says ""families are not interested in attending"" some sports matches .","Deputy Sports Minister Abdolhamid Ahmadi says the ban will be lifted for some events.
|
| 94 |
+
It isn't clear exactly which games women will be able to attend."
|
| 95 |
+
SUMMARY,cnn_dailymail,en,25/25,,0.34537982149532837,0.15716789755737007,0.24550524500330861,,AVERAGE SCORES,Success Rate: 100.0%
|
evaluation_results_fr.csv
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sample_id,dataset,language,success,error,rouge1_f,rouge2_f,rougeL_f,detected_language,reference_summary,generated_summary
|
| 2 |
+
1,mlsum,fr,True,,0.1754385964912281,0.0,0.10526315789473685,fr,Mohamed Salah et Divock Origi ont permis à Liverpool de remporter son sixième trophée en C1 au terme d’une finale décevante.,"Liverpool a battu Tottenham 2-0 en finale de la Ligue des champions, mais Manchester United a perdu à Arsenal aux quarts de finale. Chelsea est éliminé par Llorinous Corious FC au dernier 16."
|
| 3 |
+
2,mlsum,fr,True,,0.23376623376623376,0.026666666666666665,0.15584415584415584,fr,"Le philosophe, mort samedi à 88 ans, a su faire vivre une longue tradition française alliant les charmes de la plume, le travail heureux de la pensée et les générosités du cœur.","Les Français ont trouvé un moyen de comprendre le désaccord avec les idées des autres. Au lieu d'être ouvertement, il a rendu possible la création d'œuvres comme celle du Sirénage nationaliste françai..."
|
| 4 |
+
3,mlsum,fr,True,,0.3333333333333333,0.17391304347826086,0.2916666666666667,fr,"L’auteur des best-sellers « Les Cinq Sens », « Petite Poucette », « Le Gaucher boiteux », s’est éteint à l’âge de 88 ans, « entouré de sa famille ».","Michel Serres est décédé samedi à l'âge de 88 ans, en paix à 19 heures amoureux des saveurs et de la science."
|
| 5 |
+
4,mlsum,fr,True,,0.358974358974359,0.07894736842105264,0.1794871794871795,fr,"Deuxième demi-finale de suite, pour un club seulement remonté dans l’élite en 2016 : la performance est remarquable et ancre un peu plus le LOU dans le paysage du rugby français.",Le Lyonnais se qualifie pour la deuxième année consécutive en demi-finale.L'équipe française est capable de retrouver sa force après deux points.Barceland parvient à faire 10 points supplémentaires da...
|
| 6 |
+
5,mlsum,fr,True,,0.2716049382716049,0.025316455696202528,0.14814814814814814,fr,"Plusieurs de ces manifestants arboraient une kippa, alors que le port de cette calotte dans l’espace public est l’objet d’un âpre débat depuis une semaine en Allemagne.","Plusieurs politiciens antisémites, membres du Bundestag et un membre de l'ambassadeur des États-Unis en Allemagne ont participé à une manifestation anti-semitiste organisée en solidarité avec les Juif..."
|
| 7 |
+
6,mlsum,fr,True,,0.1627906976744186,0.07142857142857142,0.1627906976744186,fr,"Le chanteur et guitariste texan fut un des fondateurs du genre en 1966 avec son groupe, The 13th Floor Elevators.","Roky Erickson est décédé le vendredi 31 mai 2018 dans sa ville natale d'Austin, au Texas où il est né le 15 juillet 1947. Après une carrière inconsciente empêchée par la schizophrénie et la mort préma..."
|
| 8 |
+
7,mlsum,fr,True,,0.3055555555555555,0.1142857142857143,0.19444444444444445,fr,"Le militant des droits humains avait été placé en détention le 31 mars pour « atteintes aux institutions », et observait depuis une grève de la faim.",Plusieurs milliers de personnes ont assisté aux funérailles de Kamil Eddin Fekher. Le médecin âgé de 54 ans était également un défenseur de la cause du Mozambique. Il a été arrêté le 31 mars pour avoi...
|
| 9 |
+
8,mlsum,fr,True,,0.21052631578947367,0.02702702702702703,0.10526315789473684,fr,Une équipe de l’Inrap a mis au jour un dépôt d’une trentaine de monnaies d’or et d’argent qui nous transportent dans la Bourgogne de la toute fin du XVe siècle.,Environ 33 pièces de monnaie ont été imprimées sur du papier sans valeur significative. Le gouvernement français a commencé à utiliser le système nouvellement créé des taux de change entre les États-U...
|
| 10 |
+
9,mlsum,fr,True,,0.47457627118644075,0.21052631578947367,0.2711864406779661,fr,Le Figaro 3 qui équipe tous les concurrents de la course partie dimanche de La Baule est le premier monotype de série équipé de foils.,La course Solitaire Urgo Le Figaro sera lancée le 2 juin à partir de la baie de La Baule (Loire-Atlantique) Les quarante-sept concurrents deviendront tous admirateurs du premier Figaro 3 riggé.
|
| 11 |
+
10,mlsum,fr,True,,0.14705882352941174,0.030303030303030307,0.11764705882352941,fr,"Dimanche, 47 concurrents s’élancent pour la première des quatre étapes de la Solitaire Urgo « Le Figaro ». Parmi eux, Loïck Peyron, Michel Desjoyeaux, ou Alain Gautier.","Le Solitaires du Figaro juin 2017 fête son 50e anniversaire. Pour cet anniversair, la course a été menée sur tous les bateaux identiques. Pour ce jubilé, il sera en compétition avec un nouveau bateau,..."
|
| 12 |
+
11,mlsum,fr,True,,0.14545454545454545,0.0,0.07272727272727272,fr,"La confession catholique ne doit pas être conçue en opposition à d’autres et utilisée à des fins politiques, a expliqué le pontife argentin.",Le sanctuaire commémore à la fois une victoire contre les Ottomans et le refus par les habitants de la région d'être forcés dans le Saint-Siège à la fin du 16ème siècle. C'est ici que certains frères ...
|
| 13 |
+
12,mlsum,fr,True,,0.6363636363636365,0.46875,0.5757575757575758,fr,"L’explosion a eu lieu dans l’usine Kristall à Dzerzhinsk, une ville située à environ 400 kilomètres à l’est de Moscou.","Une explosion dans une grande usine d'explosifs du centre de la Russie tue au moins 79 personnes. L'explosion a eu lieu à l'usine Kristall de Dzerzhinsk, ville située à environ 400 kilomètres à l-est ..."
|
| 14 |
+
13,mlsum,fr,True,,0.1694915254237288,0.035087719298245605,0.1016949152542373,fr,"Tsitsipas, Halep, Fognini et Zverev ont gagné leurs matchs. « Le Monde » livre les résultats en détail au fil de la journée.","Serena Williams a été éliminée au troisième tour de Roland Garres par un autre Américain Antoine Kenin (35e) Ils essaient maintenant de battre leur meilleur rival, Jean de la Potrocki (115ème)."
|
| 15 |
+
14,mlsum,fr,True,,0.273972602739726,0.056338028169014086,0.1643835616438356,fr,"Des appels au rassemblement ont été lancés notamment à Paris, où des manifestants se sont élancés en direction de la place de la Nation.","Le dimanche vers 13h, à la place Denfert-Rochereau dans le 14e arrondissement, plusieurs partis politiques ont organisé de nouvelles manifestations. Ils ont également appelé à des mouvements de justic..."
|
| 16 |
+
15,mlsum,fr,True,,0.2,0.08333333333333333,0.2,fr,"L’ancien joueur du FC Séville, d’Arsenal et du Real Madrid, avait également disputé la Coupe du monde 2006 avec l’Espagne.",Jose Antonio Reyas est mort à l'âge de 35 ans dans un accident de voiture. Il a joué pour Arsenal et le Real Madrid entre autres.
|
| 17 |
+
16,mlsum,fr,True,,0.15384615384615385,0.05405405405405406,0.15384615384615385,fr,Le neurobiologiste fait une lecture de la pièce de Molière (1672) située quelque part entre la psychanalyse freudienne et l’existentialisme pascalien. Subtil et drôle.,"Les femmes de la science, par Molire (1672), Act III."
|
| 18 |
+
17,mlsum,fr,True,,0.20689655172413793,0.0,0.13793103448275862,fr,"Sa musique était un mélange de vieux blues, de jazz du temps du ragtime, de country minimaliste et de folk.","Leon Redbone est guitariste, banjoteur et chanteur du groupe American Blues Band. En 1977, il écrit trois livres pour le genre de musique country, dont ""How Can I Live Without You?"" et ""Where Will I G..."
|
| 19 |
+
18,mlsum,fr,True,,0.5066666666666667,0.21917808219178084,0.3733333333333333,fr,"Alors que le projet de loi est examiné lundi à l’Assemblée, le Réseau Action Climat demande notamment la fin de la vente des voitures thermiques dès 2030 et une taxe sur les vols.","Le projet de loi sur l'orientation à la mobilité (MoB) est examiné lundi 3 juin 2050 à l'Assemblée nationale.Il est urgent que les parlementaires et le gouvernement révisent leur copie de la loi, exho..."
|
| 20 |
+
19,mlsum,fr,True,,0.1951219512195122,0.025,0.0975609756097561,fr,Les élections européennes du 26 mai ont confirmé le fossé entre un vote La République en marche urbain et un vote Rassemblement national dans les zones périphériques.,"Au niveau national, le NRM et le LRM représentent 45,7 des 20 circonscriptions électorales. Certaines régions avec 24,3% de la population, y compris les zones rurales, restent inférieures à 25,7%; le ..."
|
| 21 |
+
20,mlsum,fr,True,,0.13333333333333333,0.0,0.08888888888888889,fr,"Le chercheur en gestion Sihem Dekhili livre dans une tribune au « Monde » les résultats d’une enquête sur les obstacles, essentiellement psychologiques, en particulier avec la réticence de l’utilisati...",Les restaurateurs français ont été recommandés de permettre aux clients d'emporter avec eux tout aliment restant dont ils pourraient avoir besoin. Cet incitatif est devenu une obligation le 27 mai 201...
|
| 22 |
+
21,mlsum,fr,True,,0.2337662337662338,0.026666666666666665,0.15584415584415584,fr,Le premier film du réalisateur Jordan Peele détourne les codes du gothique et de la science-fiction pour mieux appuyer sa dénonciation.,L'idée d'un jeune homme qu'il allait devenir un vampire est encore trop troublée et beaucoup plus efficace que la suggestion de sa femme. Le film montre également comment les cinéastes se sentent en c...
|
| 23 |
+
22,mlsum,fr,True,,0.23728813559322032,0.0,0.11864406779661016,fr,"Le journalisme est un métier qui nécessite parfois de faire face aux situations les plus extrêmes. Aujourd’hui, c’est à une expérience en lisière de l’anthropophagie que je dois me livrer, puisque m...",Le sandwich hommage des Burgers de Papa devait rester sur les charts pendant deux mois. Il a été lancé fin mars par la chaîne de restaurants Les Burgers De Papa. Jacques Heckerton et moi avons essayé ...
|
| 24 |
+
23,mlsum,fr,True,,0.48192771084337355,0.19753086419753085,0.2891566265060241,fr,"Le 1er juin 2009, le vol Air France AF 447 disparaissait dans l’Atlantique, entre Rio et Paris. A bord de l’appareil, 228 passagers et membres d’équipage.","Le 1er juin 2009, l'AF 447 Rio-Paris s'est écrasée dans l'Atlantique, tuant 228 personnes. D'une part, Airbus insiste sur les fautes de l'équipage à Rio-Paris. d'autre part, les syndicats des pilotes,..."
|
| 25 |
+
24,mlsum,fr,True,,0.2857142857142857,0.07407407407407408,0.17857142857142855,fr,"Le projet de loi, qui vise à donner un nouveau cadre aux déplacements des Français, arrive lundi à l’Assemblée.","La Commission a proposé que la nouvelle loi sur les orientations en matière de mobilité (LOG) soit examinée lors d'une session publique à l'Assemblée nationale, qui débutera le 3 juin."
|
| 26 |
+
25,mlsum,fr,True,,0.21782178217821782,0.0,0.11881188118811882,fr,"L’abondance des récoltes a fait baisser le cours des fruits du caféier mais les industriels misent sur cette boisson et ses versions glacées pour séduire les plus jeunes, explique dans sa chronique, L...",La Colombie a été particulièrement touchée par la pression de ses presses au brocolique et l'ajout de mousse à son prix du marché. Son obsession pour le grand secteur du chocolat blanc a également con...
|
| 27 |
+
SUMMARY,mlsum,fr,25/25,,0.2700516095775533,0.07993708060322793,0.1823557191602453,,AVERAGE SCORES,Success Rate: 100.0%
|
evaluation_results_summary.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset,language,total_samples,successful_evaluations,success_rate,avg_rouge1_f,avg_rouge2_f,avg_rougeL_f,error
|
| 2 |
+
cnn_dailymail,en,25,25,1.0,0.34537982149532837,0.15716789755737007,0.24550524500330861,
|
| 3 |
+
mlsum,fr,25,25,1.0,0.2700516095775533,0.07993708060322793,0.1823557191602453,
|
packages.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
build-essential
|
| 2 |
+
cmake
|
| 3 |
+
pkg-config
|
requirements.txt
CHANGED
|
Binary files a/requirements.txt and b/requirements.txt differ
|
|
|
summariser.py
ADDED
|
@@ -0,0 +1,599 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import PegasusForConditionalGeneration, PegasusTokenizer, pipeline
|
| 2 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer as NLLBTokenizer
|
| 3 |
+
from transformers import M2M100ForConditionalGeneration, M2M100Config
|
| 4 |
+
# from langdetect import detect, LangDetectException
|
| 5 |
+
import torch
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
_ft_model = None
|
| 9 |
+
_FASTTEXT_MODEL_PATH = "lid.176.bin"
|
| 10 |
+
_ENGLISH_CONFIDENCE_THRESHOLD = 0.95
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
import fasttext
|
| 14 |
+
if os.path.exists(_FASTTEXT_MODEL_PATH):
|
| 15 |
+
print(f"Loading fastText language identification model from: {_FASTTEXT_MODEL_PATH}")
|
| 16 |
+
_ft_model = fasttext.load_model(_FASTTEXT_MODEL_PATH)
|
| 17 |
+
print("fastText model loaded successfully.")
|
| 18 |
+
else:
|
| 19 |
+
print(f"Warning: fastText model file not found at {_FASTTEXT_MODEL_PATH}.")
|
| 20 |
+
_ft_model = None
|
| 21 |
+
except ImportError:
|
| 22 |
+
print("Warning: fastText library not installed.")
|
| 23 |
+
_ft_model = None
|
| 24 |
+
except Exception as e:
|
| 25 |
+
print(f"Error loading fastText model: {e}")
|
| 26 |
+
_ft_model = None
|
| 27 |
+
|
| 28 |
+
def get_language_code(text: str) -> tuple[str | None, str | None, float | None, bool]:
|
| 29 |
+
"""
|
| 30 |
+
Detects language using fastText and returns a tuple:
|
| 31 |
+
(detected_code_raw, nllb_compatible_code, confidence, force_translation_to_english_flag)
|
| 32 |
+
The last flag is True if detected as English but confidence is below threshold.
|
| 33 |
+
"""
|
| 34 |
+
global _ft_model
|
| 35 |
+
detected_code_raw = None
|
| 36 |
+
confidence = 0.0
|
| 37 |
+
force_translation = False
|
| 38 |
+
|
| 39 |
+
if not _ft_model:
|
| 40 |
+
print("Error: fastText model not available for language detection.")
|
| 41 |
+
return None, None, None, False
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
cleaned_text = text.replace("\n", " ").replace("\r", " ")
|
| 45 |
+
if not cleaned_text.strip():
|
| 46 |
+
print("Warning: Input text is empty or whitespace only for language detection.")
|
| 47 |
+
return None, None, None, False
|
| 48 |
+
|
| 49 |
+
predictions = _ft_model.predict(cleaned_text, k=1)
|
| 50 |
+
if predictions and predictions[0]:
|
| 51 |
+
detected_code_raw = predictions[0][0].replace('__label__', '')
|
| 52 |
+
confidence = predictions[1][0]
|
| 53 |
+
print(f"fastText detected: {detected_code_raw} with confidence {confidence:.4f}")
|
| 54 |
+
|
| 55 |
+
# Check for low confidence English
|
| 56 |
+
if (detected_code_raw == 'en' or detected_code_raw == 'eng') and confidence < _ENGLISH_CONFIDENCE_THRESHOLD:
|
| 57 |
+
print(f"English detected with low confidence ({confidence:.4f} < {_ENGLISH_CONFIDENCE_THRESHOLD}). Flagging for potential translation.")
|
| 58 |
+
force_translation = True
|
| 59 |
+
else:
|
| 60 |
+
print("fastText could not predict language.")
|
| 61 |
+
return None, None, None, False
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"Error during fastText language detection: {e}")
|
| 64 |
+
return None, None, None, False
|
| 65 |
+
|
| 66 |
+
if not detected_code_raw:
|
| 67 |
+
return None, None, None, False
|
| 68 |
+
|
| 69 |
+
# --- NLLB Language Code Mapping ---
|
| 70 |
+
# NLLB uses Flores-200 codes
|
| 71 |
+
nllb_lang_map = {
|
| 72 |
+
'en': 'eng_Latn', 'eng': 'eng_Latn',
|
| 73 |
+
'es': 'spa_Latn', 'spa': 'spa_Latn',
|
| 74 |
+
'fr': 'fra_Latn',
|
| 75 |
+
'de': 'deu_Latn', 'ger': 'deu_Latn',
|
| 76 |
+
'it': 'ita_Latn',
|
| 77 |
+
'pt': 'por_Latn',
|
| 78 |
+
'zh': 'zho_Hans',
|
| 79 |
+
'zh-cn': 'zho_Hans',
|
| 80 |
+
'zh-tw': 'zho_Hant',
|
| 81 |
+
'ja': 'jpn_Jpan', 'jpn': 'jpn_Jpan',
|
| 82 |
+
'ko': 'kor_Hang', 'kor': 'kor_Hang',
|
| 83 |
+
'ar': 'ara_Arab',
|
| 84 |
+
'hi': 'hin_Deva',
|
| 85 |
+
'ru': 'rus_Cyrl',
|
| 86 |
+
'bn': 'ben_Beng',
|
| 87 |
+
'pa': 'pan_Guru',
|
| 88 |
+
'ur': 'urd_Arab',
|
| 89 |
+
'ta': 'tam_Taml',
|
| 90 |
+
'te': 'tel_Telu',
|
| 91 |
+
'ml': 'mal_Mlym',
|
| 92 |
+
'gu': 'guj_Gujr',
|
| 93 |
+
'mr': 'mar_Deva',
|
| 94 |
+
'id': 'ind_Latn', 'ind': 'ind_Latn',
|
| 95 |
+
'vi': 'vie_Latn', 'vie': 'vie_Latn',
|
| 96 |
+
'th': 'tha_Thai',
|
| 97 |
+
'tr': 'tur_Latn',
|
| 98 |
+
'fa': 'pes_Arab', 'per': 'pes_Arab',
|
| 99 |
+
'uk': 'ukr_Cyrl',
|
| 100 |
+
'pl': 'pol_Latn',
|
| 101 |
+
'nl': 'nld_Latn', 'dut': 'nld_Latn',
|
| 102 |
+
'ro': 'ron_Latn', 'rum': 'ron_Latn',
|
| 103 |
+
'cs': 'ces_Latn', 'cze': 'ces_Latn',
|
| 104 |
+
'sv': 'swe_Latn',
|
| 105 |
+
'fi': 'fin_Latn',
|
| 106 |
+
'da': 'dan_Latn',
|
| 107 |
+
'no': 'nob_Latn',
|
| 108 |
+
'el': 'ell_Grek', 'gre': 'ell_Grek',
|
| 109 |
+
'he': 'heb_Hebr',
|
| 110 |
+
'hu': 'hun_Latn',
|
| 111 |
+
'bg': 'bul_Cyrl',
|
| 112 |
+
'sr': 'srp_Cyrl',
|
| 113 |
+
'hr': 'hrv_Latn',
|
| 114 |
+
'sk': 'slk_Latn',
|
| 115 |
+
'sl': 'slv_Latn',
|
| 116 |
+
'et': 'est_Latn',
|
| 117 |
+
'lv': 'lav_Latn',
|
| 118 |
+
'lt': 'lit_Latn',
|
| 119 |
+
'sw': 'swh_Latn',
|
| 120 |
+
'am': 'amh_Ethi',
|
| 121 |
+
'yo': 'yor_Latn',
|
| 122 |
+
'ig': 'ibo_Latn',
|
| 123 |
+
'zu': 'zul_Latn',
|
| 124 |
+
'xh': 'xho_Latn',
|
| 125 |
+
'my': 'mya_Mymr',
|
| 126 |
+
'km': 'khm_Khmr',
|
| 127 |
+
'lo': 'lao_Laoo',
|
| 128 |
+
'ne': 'npi_Deva',
|
| 129 |
+
'si': 'sin_Sinh',
|
| 130 |
+
'az': 'azj_Latn',
|
| 131 |
+
'kk': 'kaz_Cyrl',
|
| 132 |
+
'uz': 'uzn_Latn',
|
| 133 |
+
'mn': 'khk_Cyrl',
|
| 134 |
+
'ps': 'pbt_Arab',
|
| 135 |
+
'tg': 'tgk_Cyrl',
|
| 136 |
+
'tk': 'tuk_Latn',
|
| 137 |
+
'so': 'som_Latn',
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
nllb_code = nllb_lang_map.get(detected_code_raw)
|
| 141 |
+
if not nllb_code and '-' in detected_code_raw:
|
| 142 |
+
nllb_code = nllb_lang_map.get(detected_code_raw.split('-')[0])
|
| 143 |
+
|
| 144 |
+
if not nllb_code:
|
| 145 |
+
print(f"Warning: No NLLB mapping for detected language code '{detected_code_raw}'.")
|
| 146 |
+
|
| 147 |
+
return detected_code_raw, nllb_code, confidence, force_translation
|
| 148 |
+
|
| 149 |
+
class Summarizer:
|
| 150 |
+
def __init__(self,
|
| 151 |
+
summarizer_model_name="google/pegasus-cnn_dailymail",
|
| 152 |
+
translator_model_name="facebook/nllb-200-distilled-1.3B"): # NLLB model
|
| 153 |
+
|
| 154 |
+
self.summarizer_model_name = summarizer_model_name
|
| 155 |
+
self.translator_model_name = translator_model_name
|
| 156 |
+
|
| 157 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 158 |
+
print(f"Using device: {self.device}")
|
| 159 |
+
|
| 160 |
+
self.pegasus_tokenizer = None
|
| 161 |
+
self.pegasus_model = None
|
| 162 |
+
self.translator_tokenizer = None
|
| 163 |
+
self.translator_model = None
|
| 164 |
+
|
| 165 |
+
if "pegasus" in self.summarizer_model_name.lower():
|
| 166 |
+
self.effective_input_token_limit = 512
|
| 167 |
+
# elif "bart" in self.summarizer_model_name.lower(): # mbart is a bart model
|
| 168 |
+
# self.effective_input_token_limit = 1024
|
| 169 |
+
else: # Default
|
| 170 |
+
self.effective_input_token_limit = 512
|
| 171 |
+
|
| 172 |
+
# Add effective input token limit for the translator model
|
| 173 |
+
self.translator_effective_input_token_limit = 500
|
| 174 |
+
|
| 175 |
+
self._load_models()
|
| 176 |
+
|
| 177 |
+
def _load_models(self):
|
| 178 |
+
try:
|
| 179 |
+
print(f"Loading Pegasus tokenizer: {self.summarizer_model_name}...")
|
| 180 |
+
self.pegasus_tokenizer = PegasusTokenizer.from_pretrained(self.summarizer_model_name)
|
| 181 |
+
print(f"{self.summarizer_model_name} tokenizer loaded.")
|
| 182 |
+
print(f"Loading Pegasus model: {self.summarizer_model_name}...")
|
| 183 |
+
self.pegasus_model = PegasusForConditionalGeneration.from_pretrained(self.summarizer_model_name).to(self.device)
|
| 184 |
+
print(f"{self.summarizer_model_name} model loaded.")
|
| 185 |
+
|
| 186 |
+
print(f"Loading NLLB tokenizer: {self.translator_model_name}...")
|
| 187 |
+
self.translator_tokenizer = NLLBTokenizer.from_pretrained(self.translator_model_name)
|
| 188 |
+
print(f"NLLB tokenizer type: {type(self.translator_tokenizer)}")
|
| 189 |
+
print("NLLB tokenizer loaded.")
|
| 190 |
+
|
| 191 |
+
print(f"Loading NLLB model config using M2M100Config: {self.translator_model_name}...")
|
| 192 |
+
translator_config = M2M100Config.from_pretrained(self.translator_model_name)
|
| 193 |
+
print(f"Explicit NLLB/M2M100 config type: {type(translator_config)}")
|
| 194 |
+
|
| 195 |
+
print(f"Loading NLLB model using M2M100ForConditionalGeneration: {self.translator_model_name}...")
|
| 196 |
+
self.translator_model = M2M100ForConditionalGeneration.from_pretrained(
|
| 197 |
+
self.translator_model_name,
|
| 198 |
+
config=translator_config
|
| 199 |
+
).to(self.device)
|
| 200 |
+
print(f"NLLB model type: {type(self.translator_model)}")
|
| 201 |
+
print("NLLB model loaded.")
|
| 202 |
+
|
| 203 |
+
# --- DEBUGGING ---
|
| 204 |
+
# if self.translator_model and hasattr(self.translator_model, 'config'):
|
| 205 |
+
# print(f"NLLB model config type: {type(self.translator_model.config)}") # Should be M2M100Config
|
| 206 |
+
# print(f"NLLB model config keys: {list(self.translator_model.config.to_dict().keys())}")
|
| 207 |
+
# if hasattr(self.translator_model.config, 'lang_code_to_id'):
|
| 208 |
+
# print("SUCCESS: 'lang_code_to_id' FOUND in translator_model.config")
|
| 209 |
+
# else:
|
| 210 |
+
# print("FAILURE: 'lang_code_to_id' NOT FOUND in translator_model.config")
|
| 211 |
+
# else:
|
| 212 |
+
# print("NLLB model or its config is None after loading attempts.")
|
| 213 |
+
# --- END DEBUGGING ---
|
| 214 |
+
|
| 215 |
+
except Exception as e:
|
| 216 |
+
print(f"Error loading models: {e}")
|
| 217 |
+
import traceback
|
| 218 |
+
traceback.print_exc()
|
| 219 |
+
raise
|
| 220 |
+
|
| 221 |
+
def _translate_text(self, text_to_translate: str, src_nllb_lang: str, tgt_nllb_lang: str) -> str | None:
|
| 222 |
+
if not self.translator_model or not self.translator_tokenizer:
|
| 223 |
+
print("Translator model/tokenizer not loaded.")
|
| 224 |
+
return None
|
| 225 |
+
if not src_nllb_lang:
|
| 226 |
+
print(f"Missing NLLB source language code for translation. Cannot translate.")
|
| 227 |
+
return text_to_translate # Keep original behavior
|
| 228 |
+
|
| 229 |
+
if not text_to_translate.strip():
|
| 230 |
+
return "" # Handle empty string input explicitly
|
| 231 |
+
|
| 232 |
+
token_limit = self.translator_effective_input_token_limit
|
| 233 |
+
|
| 234 |
+
try:
|
| 235 |
+
# Set the source language for the NLLB tokenizer
|
| 236 |
+
self.translator_tokenizer.src_lang = src_nllb_lang
|
| 237 |
+
|
| 238 |
+
all_input_ids = self.translator_tokenizer.encode(text_to_translate, add_special_tokens=False)
|
| 239 |
+
total_tokens = len(all_input_ids)
|
| 240 |
+
|
| 241 |
+
if total_tokens == 0:
|
| 242 |
+
print(f"Warning: Text resulted in 0 tokens after encoding. Original text (first 100 chars): '{text_to_translate[:100]}...'")
|
| 243 |
+
return ""
|
| 244 |
+
|
| 245 |
+
translated_parts = []
|
| 246 |
+
|
| 247 |
+
if total_tokens <= token_limit:
|
| 248 |
+
print(f"DEBUG: Translating text as a single chunk ({total_tokens} tokens).")
|
| 249 |
+
inputs = self.translator_tokenizer(
|
| 250 |
+
text_to_translate,
|
| 251 |
+
return_tensors="pt",
|
| 252 |
+
truncation=True,
|
| 253 |
+
padding=True,
|
| 254 |
+
max_length=token_limit
|
| 255 |
+
).to(self.device)
|
| 256 |
+
|
| 257 |
+
try:
|
| 258 |
+
target_lang_token_id = self.translator_tokenizer.convert_tokens_to_ids(tgt_nllb_lang)
|
| 259 |
+
except Exception as e_conv:
|
| 260 |
+
print(f"Error converting target language code '{tgt_nllb_lang}' to ID: {e_conv}")
|
| 261 |
+
return None
|
| 262 |
+
|
| 263 |
+
if target_lang_token_id == self.translator_tokenizer.unk_token_id:
|
| 264 |
+
print(f"Warning: Target language code '{tgt_nllb_lang}' was converted to UNK token ID.")
|
| 265 |
+
return None
|
| 266 |
+
|
| 267 |
+
generated_tokens = self.translator_model.generate(
|
| 268 |
+
**inputs,
|
| 269 |
+
forced_bos_token_id=target_lang_token_id,
|
| 270 |
+
max_length=1024,
|
| 271 |
+
repetition_penalty=1.2,
|
| 272 |
+
no_repeat_ngram_size=3
|
| 273 |
+
)
|
| 274 |
+
translated_text_segment = self.translator_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
| 275 |
+
translated_parts.append(translated_text_segment)
|
| 276 |
+
else:
|
| 277 |
+
print(f"Input text for translation has {total_tokens} tokens. Applying chunking (token limit for tokenizer: {token_limit}).")
|
| 278 |
+
|
| 279 |
+
# Pattern similar to summarizer chunking:
|
| 280 |
+
# chunk_size for slicing from all_input_ids
|
| 281 |
+
chunk_slice_size = token_limit - 50
|
| 282 |
+
if chunk_slice_size <= 0: # Ensure positive slice size
|
| 283 |
+
chunk_slice_size = token_limit // 2 if token_limit > 1 else 1
|
| 284 |
+
|
| 285 |
+
overlap_tokens = 50
|
| 286 |
+
if overlap_tokens >= chunk_slice_size and chunk_slice_size > 0:
|
| 287 |
+
overlap_tokens = chunk_slice_size // 3
|
| 288 |
+
elif chunk_slice_size == 0:
|
| 289 |
+
overlap_tokens = 0
|
| 290 |
+
|
| 291 |
+
start_idx = 0
|
| 292 |
+
chunk_num = 1
|
| 293 |
+
while start_idx < total_tokens:
|
| 294 |
+
end_idx = min(start_idx + chunk_slice_size, total_tokens)
|
| 295 |
+
chunk_token_ids_for_decode = all_input_ids[start_idx:end_idx]
|
| 296 |
+
|
| 297 |
+
current_chunk_text_to_translate = self.translator_tokenizer.decode(
|
| 298 |
+
chunk_token_ids_for_decode,
|
| 299 |
+
skip_special_tokens=True,
|
| 300 |
+
clean_up_tokenization_spaces=True
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
if not current_chunk_text_to_translate.strip():
|
| 304 |
+
print(f"DEBUG: Skipping empty decoded chunk {chunk_num}.")
|
| 305 |
+
# Advance start_idx
|
| 306 |
+
if end_idx == total_tokens: break
|
| 307 |
+
step = chunk_slice_size - overlap_tokens
|
| 308 |
+
start_idx += step if step > 0 else 1
|
| 309 |
+
if start_idx >= end_idx and end_idx < total_tokens : start_idx = end_idx # Ensure progress
|
| 310 |
+
chunk_num += 1
|
| 311 |
+
continue
|
| 312 |
+
|
| 313 |
+
print(f"DEBUG: Translating chunk {chunk_num}. Input token slice: {start_idx}-{end_idx-1}. Decoded text length: {len(current_chunk_text_to_translate)}")
|
| 314 |
+
|
| 315 |
+
inputs = self.translator_tokenizer(
|
| 316 |
+
current_chunk_text_to_translate,
|
| 317 |
+
return_tensors="pt",
|
| 318 |
+
truncation=True,
|
| 319 |
+
padding=True,
|
| 320 |
+
max_length=token_limit # Tokenizer will cap the input from this text chunk
|
| 321 |
+
).to(self.device)
|
| 322 |
+
|
| 323 |
+
try:
|
| 324 |
+
target_lang_token_id = self.translator_tokenizer.convert_tokens_to_ids(tgt_nllb_lang)
|
| 325 |
+
except Exception as e_conv:
|
| 326 |
+
print(f"Error converting target language code '{tgt_nllb_lang}' to ID for chunk {chunk_num}: {e_conv}")
|
| 327 |
+
return None
|
| 328 |
+
if target_lang_token_id == self.translator_tokenizer.unk_token_id:
|
| 329 |
+
print(f"Warning: Target language code '{tgt_nllb_lang}' was UNK for chunk {chunk_num}.")
|
| 330 |
+
return None
|
| 331 |
+
|
| 332 |
+
generated_tokens = self.translator_model.generate(
|
| 333 |
+
**inputs,
|
| 334 |
+
forced_bos_token_id=target_lang_token_id,
|
| 335 |
+
max_length=1024,
|
| 336 |
+
repetition_penalty=1.2,
|
| 337 |
+
no_repeat_ngram_size=3
|
| 338 |
+
)
|
| 339 |
+
translated_segment = self.translator_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
| 340 |
+
translated_parts.append(translated_segment)
|
| 341 |
+
|
| 342 |
+
chunk_num +=1
|
| 343 |
+
if end_idx == total_tokens:
|
| 344 |
+
break
|
| 345 |
+
|
| 346 |
+
step = chunk_slice_size - overlap_tokens
|
| 347 |
+
start_idx += step if step > 0 else 1 # Ensure progress even if step is not positive
|
| 348 |
+
|
| 349 |
+
# Safety break if start_idx doesn't advance properly (should not be needed with step logic)
|
| 350 |
+
if start_idx >= total_tokens and end_idx < total_tokens: break
|
| 351 |
+
if start_idx >= end_idx and end_idx < total_tokens : start_idx = end_idx
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
final_translated_text = " ".join(translated_parts).strip()
|
| 355 |
+
return final_translated_text
|
| 356 |
+
|
| 357 |
+
except Exception as e:
|
| 358 |
+
print(f"Error during translation from {src_nllb_lang} to {tgt_nllb_lang}: {e}")
|
| 359 |
+
import traceback
|
| 360 |
+
traceback.print_exc()
|
| 361 |
+
return None
|
| 362 |
+
|
| 363 |
+
def _summarize_english_text(self, text_chunk: str, min_length: int, max_length: int) -> str:
|
| 364 |
+
# Refined from your version for clarity and robustness
|
| 365 |
+
if not self.pegasus_model or not self.pegasus_tokenizer:
|
| 366 |
+
print("Error: Pegasus model/tokenizer not loaded.")
|
| 367 |
+
return "Error: Summarization model not available."
|
| 368 |
+
try:
|
| 369 |
+
# Provide default values if None
|
| 370 |
+
effective_min_length = min_length if min_length is not None else 30
|
| 371 |
+
effective_max_length = max_length if max_length is not None else 128
|
| 372 |
+
|
| 373 |
+
inputs = self.pegasus_tokenizer(
|
| 374 |
+
text_chunk, return_tensors="pt", truncation=True, max_length=self.effective_input_token_limit
|
| 375 |
+
).to(self.device)
|
| 376 |
+
|
| 377 |
+
summary_ids = self.pegasus_model.generate(
|
| 378 |
+
inputs["input_ids"],
|
| 379 |
+
num_beams=4,
|
| 380 |
+
min_length=effective_min_length,
|
| 381 |
+
max_length=effective_max_length,
|
| 382 |
+
early_stopping=True
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
summary_text_raw = self.pegasus_tokenizer.decode(
|
| 386 |
+
summary_ids[0],
|
| 387 |
+
skip_special_tokens=True, # Important
|
| 388 |
+
clean_up_tokenization_spaces=True
|
| 389 |
+
)
|
| 390 |
+
|
| 391 |
+
# Post-processing for <n> tokens
|
| 392 |
+
processed_summary = summary_text_raw.replace("<n>", "\n")
|
| 393 |
+
lines = [line.strip() for line in processed_summary.split('\n')]
|
| 394 |
+
processed_summary = "\n".join(filter(None, lines)) # Removes empty lines
|
| 395 |
+
|
| 396 |
+
return processed_summary.strip()
|
| 397 |
+
except Exception as e:
|
| 398 |
+
print(f"Error during Pegasus summarization of chunk: {e}")
|
| 399 |
+
import traceback
|
| 400 |
+
traceback.print_exc()
|
| 401 |
+
return f"Error summarizing chunk: {e}"
|
| 402 |
+
|
| 403 |
+
def summarize(self, text: str, min_length_per_chunk: int = None, max_length_per_chunk: int = None,
|
| 404 |
+
overall_min_length: int = None, overall_max_length: int = None) -> dict:
|
| 405 |
+
result = {
|
| 406 |
+
'final_summary': None, 'detected_language_raw': None, 'detected_language_confidence': None,
|
| 407 |
+
'english_translation': None, 'english_summary': None, 'error': None,
|
| 408 |
+
'translation_performed': False # New key to indicate if translation to English happened
|
| 409 |
+
}
|
| 410 |
+
|
| 411 |
+
if not all([self.pegasus_model, self.pegasus_tokenizer, self.translator_model, self.translator_tokenizer]):
|
| 412 |
+
result['error'] = "Error: Core models not loaded."
|
| 413 |
+
return result
|
| 414 |
+
if _ft_model is None:
|
| 415 |
+
result['error'] = "Error: Language detection model (fastText) not available."
|
| 416 |
+
return result
|
| 417 |
+
if not text.strip():
|
| 418 |
+
result['error'] = "Error: Input text is empty."
|
| 419 |
+
return result
|
| 420 |
+
|
| 421 |
+
original_text_to_process = text
|
| 422 |
+
detected_lang_raw, detected_lang_nllb, confidence, force_translation_flag = get_language_code(text)
|
| 423 |
+
|
| 424 |
+
result['detected_language_raw'] = detected_lang_raw
|
| 425 |
+
result['detected_language_confidence'] = confidence
|
| 426 |
+
|
| 427 |
+
if not detected_lang_raw:
|
| 428 |
+
result['error'] = "Error: Could not detect language."
|
| 429 |
+
return result
|
| 430 |
+
|
| 431 |
+
# Determine if translation to English is needed
|
| 432 |
+
needs_translation_to_english = False
|
| 433 |
+
if (detected_lang_raw != 'en' and detected_lang_raw != 'eng'):
|
| 434 |
+
needs_translation_to_english = True
|
| 435 |
+
elif force_translation_flag:
|
| 436 |
+
print(f"INFO: Detected as English ('{detected_lang_raw}') but confidence {confidence} is low. Attempting 'translation' to English to normalize.")
|
| 437 |
+
needs_translation_to_english = True
|
| 438 |
+
if not detected_lang_nllb:
|
| 439 |
+
detected_lang_nllb = 'eng_Latn'
|
| 440 |
+
result['translation_performed'] = True
|
| 441 |
+
|
| 442 |
+
if needs_translation_to_english:
|
| 443 |
+
if detected_lang_nllb:
|
| 444 |
+
print(f"Original language: {detected_lang_raw} (NLLB src: {detected_lang_nllb}). Translating to English (eng_Latn)...")
|
| 445 |
+
english_text_translation = self._translate_text(original_text_to_process, detected_lang_nllb, "eng_Latn")
|
| 446 |
+
result['translation_performed'] = True
|
| 447 |
+
|
| 448 |
+
if not english_text_translation or english_text_translation.startswith("Error"):
|
| 449 |
+
err_msg = f"Error: Translation to English failed for lang {detected_lang_raw}."
|
| 450 |
+
result['error'] = err_msg
|
| 451 |
+
return result
|
| 452 |
+
|
| 453 |
+
original_text_to_process = english_text_translation
|
| 454 |
+
result['english_translation'] = original_text_to_process
|
| 455 |
+
print("Translation to English complete.")
|
| 456 |
+
else: # Non-English detected by fastText, but no NLLB mapping for it
|
| 457 |
+
print(f"Critical: Original language detected as '{detected_lang_raw}', but no NLLB code mapping found. Cannot reliably translate.")
|
| 458 |
+
result['error'] = f"Error: Input language '{detected_lang_raw}' is not supported for translation (no NLLB mapping)."
|
| 459 |
+
return result
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
# --- Summarization of English text (original_text_to_process is now English) ---
|
| 463 |
+
all_input_ids = self.pegasus_tokenizer.encode(original_text_to_process, add_special_tokens=False)
|
| 464 |
+
total_tokens = len(all_input_ids)
|
| 465 |
+
english_summary_text = None
|
| 466 |
+
|
| 467 |
+
if total_tokens <= self.effective_input_token_limit:
|
| 468 |
+
english_summary_text = self._summarize_english_text(original_text_to_process, overall_min_length, overall_max_length)
|
| 469 |
+
else:
|
| 470 |
+
# (Your full chunking logic here)
|
| 471 |
+
print(f"Input text for summarizer has {total_tokens} tokens. Applying chunking...")
|
| 472 |
+
chunk_size = self.effective_input_token_limit - 50
|
| 473 |
+
overlap_size = 50
|
| 474 |
+
chunks_texts = []
|
| 475 |
+
start_idx = 0
|
| 476 |
+
while start_idx < total_tokens:
|
| 477 |
+
end_idx = min(start_idx + chunk_size, total_tokens)
|
| 478 |
+
chunk_token_ids = all_input_ids[start_idx:end_idx]
|
| 479 |
+
chunk_text_for_summary = self.pegasus_tokenizer.decode(chunk_token_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
|
| 480 |
+
chunks_texts.append(chunk_text_for_summary)
|
| 481 |
+
if end_idx == total_tokens: break
|
| 482 |
+
start_idx += (chunk_size - overlap_size)
|
| 483 |
+
if start_idx >= end_idx : break
|
| 484 |
+
|
| 485 |
+
if not chunks_texts:
|
| 486 |
+
result['error'] = "Error: Failed to create any chunks from the (potentially translated) text."
|
| 487 |
+
return result
|
| 488 |
+
|
| 489 |
+
chunk_summaries_list = []
|
| 490 |
+
for i_chunk, chunk_text_to_summarize in enumerate(chunks_texts):
|
| 491 |
+
print(f"Summarizing (English) chunk {i_chunk+1}/{len(chunks_texts)}...")
|
| 492 |
+
chunk_sum = self._summarize_english_text(chunk_text_to_summarize, min_length_per_chunk, max_length_per_chunk)
|
| 493 |
+
if chunk_sum.startswith("Error"):
|
| 494 |
+
result['error'] = f"Error summarizing chunk {i_chunk+1}: {chunk_sum.split(':', 1)[1].strip() if ':' in chunk_sum else chunk_sum}"
|
| 495 |
+
return result
|
| 496 |
+
chunk_summaries_list.append(chunk_sum)
|
| 497 |
+
english_summary_text = " ".join(chunk_summaries_list)
|
| 498 |
+
|
| 499 |
+
if not english_summary_text or english_summary_text.startswith("Error"):
|
| 500 |
+
err_msg_sum = f"Error during English summarization"
|
| 501 |
+
if english_summary_text and english_summary_text.startswith("Error"):
|
| 502 |
+
err_msg_sum += f": {english_summary_text.split(':', 1)[1].strip() if ':' in english_summary_text else english_summary_text}"
|
| 503 |
+
elif not english_summary_text:
|
| 504 |
+
err_msg_sum += ": Summarization returned no text."
|
| 505 |
+
result['error'] = err_msg_sum
|
| 506 |
+
return result
|
| 507 |
+
|
| 508 |
+
result['english_summary'] = english_summary_text
|
| 509 |
+
|
| 510 |
+
# --- Translate summary back ---
|
| 511 |
+
final_summary_text = english_summary_text
|
| 512 |
+
if result['translation_performed'] and not force_translation_flag: # if force_translation_flag, it was already "English"
|
| 513 |
+
if detected_lang_raw != 'en' and detected_lang_raw != 'eng' and detected_lang_nllb:
|
| 514 |
+
print(f"Translating summary back to {detected_lang_raw} (NLLB: {detected_lang_nllb})...")
|
| 515 |
+
translated_summary_back = self._translate_text(english_summary_text, "eng_Latn", detected_lang_nllb)
|
| 516 |
+
if translated_summary_back and not translated_summary_back.startswith("Error"):
|
| 517 |
+
final_summary_text = translated_summary_back
|
| 518 |
+
print("Back-translation complete.")
|
| 519 |
+
else:
|
| 520 |
+
warning_msg = f"Warning: Back-translation to {detected_lang_raw} failed or returned error. Returning English summary."
|
| 521 |
+
if translated_summary_back and translated_summary_back.startswith("Error"):
|
| 522 |
+
warning_msg += f" Details: {translated_summary_back.split(':', 1)[1].strip() if ':' in translated_summary_back else translated_summary_back}"
|
| 523 |
+
elif not translated_summary_back:
|
| 524 |
+
warning_msg += " Back-translation function returned None."
|
| 525 |
+
print(warning_msg)
|
| 526 |
+
|
| 527 |
+
result['final_summary'] = final_summary_text
|
| 528 |
+
return result
|
| 529 |
+
|
| 530 |
+
if __name__ == "__main__":
|
| 531 |
+
print("Testing Summarizer Engine...")
|
| 532 |
+
if _ft_model is None:
|
| 533 |
+
print("CRITICAL: fastText model not loaded. Please ensure 'lid.176.bin' is present and readable.")
|
| 534 |
+
print("Skipping direct engine tests.")
|
| 535 |
+
else:
|
| 536 |
+
try:
|
| 537 |
+
engine = Summarizer()
|
| 538 |
+
|
| 539 |
+
print("\n--- Testing English Article ---")
|
| 540 |
+
short_article_en = "Apple today announced its new iPhone, which features a faster processor and an improved camera. The company expects strong sales."
|
| 541 |
+
print(f"Input (EN):\n{short_article_en}")
|
| 542 |
+
summary_en_result = engine.summarize(short_article_en, overall_min_length=10, overall_max_length=50)
|
| 543 |
+
print(f"Result (EN): {summary_en_result}")
|
| 544 |
+
|
| 545 |
+
print("\n--- Testing Chinese Article (Short) ---")
|
| 546 |
+
short_article_zh = "今天天气真好,阳光明媚。"
|
| 547 |
+
print(f"Input (ZH):\n{short_article_zh}")
|
| 548 |
+
summary_zh_result = engine.summarize(short_article_zh, overall_min_length=5, overall_max_length=30)
|
| 549 |
+
print(f"Result (ZH): {summary_zh_result}")
|
| 550 |
+
|
| 551 |
+
print("\n--- Testing Mixed Language Article ---")
|
| 552 |
+
mixed_article = '今天天气真好 How are you? 大家都在玩'
|
| 553 |
+
print(f"Input (Mixed):\n{mixed_article}")
|
| 554 |
+
summary_mixed_result = engine.summarize(mixed_article, overall_min_length=5, overall_max_length=40)
|
| 555 |
+
print(f"Result (Mixed): {summary_mixed_result}")
|
| 556 |
+
|
| 557 |
+
|
| 558 |
+
print("\n--- Testing Long English Article ---")
|
| 559 |
+
long_article_en = """
|
| 560 |
+
Warmer weather is finally here in the Northern Hemisphere, and with it, many pools and beaches are opening for the summer. That’s great for families who want to spend time by the water, but it’s also a good time to be reminded about the importance of water safety.
|
| 561 |
+
An estimated 4,000 fatal unintentional drownings happen every year in the United States — an average of 11 drowning deaths per day — according to the Centers for Disease Control and Prevention.
|
| 562 |
+
Drowning is the leading cause of death in children ages 1 to 4, and after motor vehicle accidents, it is the second leading cause of death attributed to unintentional injuries among kids ages 5 to 14.
|
| 563 |
+
I wanted to speak more about water safety with CNN wellness expert Dr. Leana Wen. Wen is an emergency physician and adjunct associate professor at George Washington University who previously served as Baltimore’s health commissioner. She is also the mother of two young children, and as someone who learned to swim as an adult, she is a passionate advocate for kids — and adults — learning to swim.
|
| 564 |
+
CNN: Who is most at risk of drowning, and under what circumstances?
|
| 565 |
+
Dr. Leana Wen: The CDC issued an important report in 2024 about unintentional drowning deaths in the United States. Death rates were significantly higher in 2020, 2021 and 2022 than in 2019, according to the agency. Moreover, the highest rates were seen in young children ages 1 to 4. Among children in this age group, 461 died by drowning in 2022, an increase of 28% from 2019.
|
| 566 |
+
The report also highlighted racial disparities, with higher rates of drowning deaths among individuals identified as non-Hispanic American Indian or Alaska Native and as non-Hispanic Black or African American. Only 45% of all adults reported having had swim lessons, and those numbers were higher among White Americans (52%) than among Black (37%) and Hispanic (28%) Americans.
|
| 567 |
+
Racial disparities were also reported in a 2023 analysis from the Consumer Product Safety Commission, which found that African American children made up 21% of all drownings for kids younger than age 15 in which race and ethnicity are known. Among kids ages 5 to 14, 45% of drowning deaths occurred among African Americans.
|
| 568 |
+
The CPSC analysis also contained another key data point: The vast majority (80%) of pediatric drownings in which the setting is known occurred at a residence. This means that 4 in 5 kids who drowned died in their own backyard pool or that of a friend, neighbor or family member. Of these residential drownings, 91% occurred among kids younger than 7 years old.
|
| 569 |
+
CNN: Why do so many drownings happen in residential settings?
|
| 570 |
+
Wen: One major reason is the difference in supervision. Many public beaches and community pools hire lifeguards whose job is to watch out for the safety of everyone in or near the water. Private pools in people’s yards often don’t have someone designated for this purpose. Sometimes older children are supervising younger children but aren’t always watching. Or adults may be supervising, but they are also busy with other tasks. In addition, some of those watching others may not know how to swim themselves.
|
| 571 |
+
There may also be a false sense of security in residential settings. People may think the pool is small or not that deep or there are a lot of people around, so what can happen? Keep in mind, though, that small kids can drown in just inches of water. Serious injury or death can happen within 30 seconds. Drownings are often silent because the victim is unable to call out for help.
|
| 572 |
+
CNN: How can parents and guardians prevent drownings in residential settings?
|
| 573 |
+
Wen: The single most important best practice is to never leave children unsupervised near a body of water. Even if they already know how to swim, even if they are wearing a flotation device, even if the pool is shallow or small, an accident could occur — and either you or another responsible adult should always be able to see your child. The supervising adult should be actively watching the child and not distracted by chores or their smartphone. That person also should not be under the influence of alcohol or drugs.
|
| 574 |
+
The adult who’s responsible must also know how to swim well enough so they are able to jump into the pool and save the child if necessary. An additional safety precaution is learning CPR and first aid for infants, children and adults, which you can do through the American Red Cross.
|
| 575 |
+
More than 1 in 3 Black adults say they can’t swim. This team is trying to teach them and their kids
|
| 576 |
+
If you have a pool, be very careful before allowing others to use it. If your neighbors’ children want to swim in your pool, a responsible adult must accompany them. Private swimming pools should all have childproof fencing around them. The fencing should enclose the pool, have a self-close latch out of the reach of children and be at least 4 feet high. This is required by law in most states.
|
| 577 |
+
CNN: What safety precautions should people take around natural bodies of water?
|
| 578 |
+
Wen: Always wear a properly fitted, US Coast Guard–approved life jacket when boating. Of all the people who drowned while boating in 2022, 85% were not wearing a life jacket, according to the CDC.
|
| 579 |
+
To be safe, swim in areas where a lifeguard is on duty. Always follow lifeguard guidance about safety conditions, and stay in the area designated for swimming.
|
| 580 |
+
CNN: What about teaching children how to swim — can that help with water safety?
|
| 581 |
+
Wen: Yes. Kids ages 1 to 4 who took part in formal swim lessons had an 88% lower risk of drowning, according to a study in JAMA Pediatrics. The goal here isn’t necessarily to teach kids all the different strokes and get them to join a swim team; it’s to impart basic lifesaving skills, such as treading water and floating on their back.
|
| 582 |
+
When you are in the water with your children, take every opportunity to remind them about water safety. Other tips include never swimming alone, always asking for permission before entering the water and never diving into unknown bodies of water headfirst. Young children should also be reminded not to reach for items in the pool, as they are at risk of falling in; they should always ask for help instead.
|
| 583 |
+
Never leave children alone by the water, and remind them to ask for help if they want to reach something in the pool. travelism/E+/Getty Images
|
| 584 |
+
CNN: What about parents or guardians who don’t know how to swim? Do you recommend that they also take swim lessons?
|
| 585 |
+
Wen: Yes. First, adults who don’t know how to swim are more likely to have children who don’t know how to swim. This was the case for me. My parents didn’t swim, and I also never learned swimming growing up.
|
| 586 |
+
Second, it’s hard for adults to properly supervise children swimming if they can’t swim themselves. It was actually a terrifying experience with my own children that prompted me to learn to swim. My children were just 1 year and 3 years old one summer when my older kid pushed the younger one into the pool.
|
| 587 |
+
We were at our local community pool, and there was a lifeguard who immediately sprang into action. But I remember how terrified I felt — and how helpless. I enrolled my kids in swim lessons right away. I also found an instructor to teach me, too, because I realized I had to overcome my own fear of the water and learn basic water safety skills to protect my kids.
|
| 588 |
+
Learning how to swim as an adult is a humbling experience, especially for people like me who had to first start with overcoming fear. I began literally from zero. For weeks, I worked on just getting comfortable submerging my head underwater.
|
| 589 |
+
Eventually, I learned how to swim and now really enjoy being in the water. And I feel a lot more comfortable supervising my children when we are in private or community swimming spaces. I’m looking forward to our local pool opening for the summer and to spending time with my kids having a fun — and safe — time in the water.
|
| 590 |
+
"""
|
| 591 |
+
print(f"Input (Long EN), num chars: {len(long_article_en)}")
|
| 592 |
+
summary_long_en_result = engine.summarize(long_article_en, overall_min_length=50, overall_max_length=200)
|
| 593 |
+
print(f"Result (Long EN): {summary_long_en_result}")
|
| 594 |
+
|
| 595 |
+
|
| 596 |
+
except Exception as e:
|
| 597 |
+
print(f"Failed to initialize or use Summarizer engine: {e}")
|
| 598 |
+
import traceback
|
| 599 |
+
traceback.print_exc()
|
test.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline, PegasusForConditionalGeneration, PegasusTokenizer
|
| 2 |
+
|
| 3 |
+
def main():
|
| 4 |
+
model_name = "google/pegasus-cnn_dailymail"
|
| 5 |
+
|
| 6 |
+
try:
|
| 7 |
+
print(f"Loading tokenizer: {model_name}...")
|
| 8 |
+
tokenizer = PegasusTokenizer.from_pretrained(model_name)
|
| 9 |
+
print("Tokenizer loaded successfully!")
|
| 10 |
+
|
| 11 |
+
print(f"Loading model: {model_name}...")
|
| 12 |
+
model = PegasusForConditionalGeneration.from_pretrained(model_name)
|
| 13 |
+
print("Model loaded successfully!")
|
| 14 |
+
|
| 15 |
+
print("Creating summarization pipeline...")
|
| 16 |
+
# use GPU (if it is available)
|
| 17 |
+
summarizer_pipeline = pipeline("summarization", model=model, tokenizer=tokenizer, device=0)
|
| 18 |
+
print("Pipeline created successfully!")
|
| 19 |
+
|
| 20 |
+
except Exception as e:
|
| 21 |
+
print(f"Error during model/tokenizer loading or pipeline creation: {e}")
|
| 22 |
+
# Print more detailed error if possible
|
| 23 |
+
import traceback
|
| 24 |
+
traceback.print_exc()
|
| 25 |
+
return
|
| 26 |
+
|
| 27 |
+
article_text = """
|
| 28 |
+
Scientists have discovered a new species of glowing frog in the Amazon rainforest.
|
| 29 |
+
The frog, which has been named 'Luminos Hyalinobatrachium', emits a faint blue light
|
| 30 |
+
from its translucent skin. Researchers believe this bioluminescence might be used
|
| 31 |
+
for communication or camouflage in the dense jungle environment. The discovery
|
| 32 |
+
highlights the incredible biodiversity still being uncovered in the region and
|
| 33 |
+
underscores the importance of conservation efforts to protect these unique ecosystems.
|
| 34 |
+
Further studies are planned to understand the exact mechanism and purpose of the glow.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
print("\nOriginal Article:")
|
| 38 |
+
print(article_text)
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
print("\nGenerating summary...")
|
| 42 |
+
summary = summarizer_pipeline(article_text, max_length=60, min_length=20, do_sample=False)
|
| 43 |
+
|
| 44 |
+
if summary and isinstance(summary, list) and 'summary_text' in summary[0]:
|
| 45 |
+
print("\nGenerated Summary:")
|
| 46 |
+
print(summary[0]['summary_text'])
|
| 47 |
+
else:
|
| 48 |
+
print("Could not generate summary or unexpected output format.")
|
| 49 |
+
print("Output from pipeline:", summary)
|
| 50 |
+
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"Error during summarization: {e}")
|
| 53 |
+
import traceback
|
| 54 |
+
traceback.print_exc()
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
main()
|