Deploy complete Smart Summarizer project with all features - notebooks, data, evaluation, batch processing

#2
.dockerignore ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git
2
+ .git
3
+ .gitignore
4
+
5
+ # Python
6
+ __pycache__
7
+ *.pyc
8
+ *.pyo
9
+ *.pyd
10
+ .Python
11
+ *.so
12
+ *.egg
13
+ *.egg-info
14
+ dist
15
+ build
16
+
17
+ # Virtual Environment
18
+ .venv
19
+ venv
20
+ ENV
21
+ env
22
+
23
+ # IDE
24
+ .vscode
25
+ .idea
26
+ *.swp
27
+ *.swo
28
+
29
+ # OS
30
+ .DS_Store
31
+ Thumbs.db
32
+
33
+ # Documentation
34
+ *.md
35
+ !README.md
36
+ !QUICK_START.md
37
+
38
+ # Test files
39
+ test_*.py
40
+ *_test.py
41
+ tests/
42
+
43
+ # Notebooks
44
+ notebooks/
45
+ *.ipynb
46
+ .ipynb_checkpoints
47
+
48
+ # Data
49
+ data/raw/
50
+ data/processed/
51
+ results/
52
+
53
+ # Logs
54
+ logs/
55
+ *.log
56
+
57
+ # Uploads (will be created in container)
58
+ uploads/*
59
+
60
+ # Development files
61
+ .env
62
+ .env.local
.hfignore ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces ignore file
2
+ # Exclude files not needed for deployment
3
+
4
+ # Git files
5
+ .git/
6
+ .gitignore
7
+
8
+ # Python cache
9
+ __pycache__/
10
+ *.pyc
11
+ *.pyo
12
+ *.pyd
13
+ .Python
14
+
15
+ # Development files
16
+ .vscode/
17
+ .idea/
18
+ *.swp
19
+ *.swo
20
+
21
+ # OS files
22
+ .DS_Store
23
+ Thumbs.db
24
+
25
+ # Local development scripts
26
+ install.sh
27
+ install.bat
28
+ run_webapp.sh
29
+ run_webapp.bat
30
+ prepare_hf_deployment.sh
31
+
32
+ # Deployment configs for other platforms
33
+ railway.json
34
+ render.yaml
35
+ docker-compose.yml
36
+ deploy.sh
37
+ Procfile
38
+
39
+ # Documentation (keep only essential)
40
+ HUGGINGFACE_DEPLOYMENT.md
41
+
42
+ # Large files that aren't needed
43
+ designs/
44
+ *.png
45
+ *.jpg
46
+ *.jpeg
47
+
48
+ # Logs
49
+ logs/
50
+ *.log
Dockerfile CHANGED
@@ -22,7 +22,7 @@ COPY . .
22
  RUN mkdir -p uploads logs
23
 
24
  # Download NLTK data
25
- RUN python -c "import nltk; nltk.download('punkt'); nltk.download('stopwords')"
26
 
27
  # Expose port for Hugging Face Spaces
28
  EXPOSE 7860
 
22
  RUN mkdir -p uploads logs
23
 
24
  # Download NLTK data
25
+ RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('stopwords')"
26
 
27
  # Expose port for Hugging Face Spaces
28
  EXPOSE 7860
QUICK_START.md ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Smart Summarizer - Quick Start Guide
2
+
3
+ ## 🚀 Getting Started
4
+
5
+ ### 1. Installation
6
+
7
+ ```bash
8
+ # Clone the repository
9
+ git clone https://github.com/Rajak13/Smart-Summarizer.git
10
+ cd Smart-Summarizer
11
+
12
+ # Run installation script
13
+ # For macOS/Linux:
14
+ ./install.sh
15
+
16
+ # For Windows:
17
+ install.bat
18
+ ```
19
+
20
+ ### 2. Running the Web Application
21
+
22
+ ```bash
23
+ # Quick start (recommended)
24
+ # For macOS/Linux:
25
+ ./run_webapp.sh
26
+
27
+ # For Windows:
28
+ run_webapp.bat
29
+
30
+ # Or manually:
31
+ cd webapp
32
+ python app.py
33
+ ```
34
+
35
+ ### 3. Access the Application
36
+
37
+ Open your browser and navigate to:
38
+ ```
39
+ http://localhost:5001
40
+ ```
41
+
42
+ ## 📱 Features
43
+
44
+ ### 🏠 Home Page
45
+ - Overview of three summarization models
46
+ - Model comparison cards
47
+ - Quick navigation
48
+
49
+ ### 📄 Single Summary
50
+ - Generate summaries with individual models
51
+ - Upload files (.txt, .md, .pdf, .docx)
52
+ - Real-time processing metrics
53
+
54
+ ### ⚖️ Comparison
55
+ - Compare all three models side-by-side
56
+ - Synchronized input
57
+ - Performance metrics for each model
58
+
59
+ ### 📚 Batch Processing
60
+ - Process multiple documents simultaneously
61
+ - Load sample documents
62
+ - Export results to CSV
63
+ - Track processing status
64
+
65
+ ### 📊 Evaluation
66
+ - ROUGE metrics visualization
67
+ - Benchmark data comparison
68
+ - Model performance insights
69
+
70
+ ## 🤖 Models
71
+
72
+ ### TextRank (Extractive)
73
+ - **Speed**: Very fast (~0.03s)
74
+ - **Type**: Graph-based PageRank
75
+ - **Best for**: Quick summaries, keyword extraction
76
+
77
+ ### BART (Abstractive)
78
+ - **Speed**: Moderate (~9s on CPU)
79
+ - **Type**: Transformer encoder-decoder
80
+ - **Best for**: Fluent, human-like summaries
81
+
82
+ ### PEGASUS (Abstractive)
83
+ - **Speed**: Moderate (~6s on CPU)
84
+ - **Type**: Gap Sentence Generation
85
+ - **Best for**: High-quality abstractive summaries
86
+
87
+ ## 📝 Supported File Types
88
+
89
+ - Plain text (`.txt`, `.md`)
90
+ - PDF documents (`.pdf`)
91
+ - Word documents (`.docx`, `.doc`)
92
+
93
+ ## 🔧 API Endpoints
94
+
95
+ ### POST /api/summarize
96
+ Generate summary with a single model
97
+ ```json
98
+ {
99
+ "text": "Your text here...",
100
+ "model": "bart"
101
+ }
102
+ ```
103
+
104
+ ### POST /api/compare
105
+ Compare all three models
106
+ ```json
107
+ {
108
+ "text": "Your text here..."
109
+ }
110
+ ```
111
+
112
+ ### POST /api/upload
113
+ Upload and extract text from file
114
+ ```
115
+ multipart/form-data with file
116
+ ```
117
+
118
+ ## 🧪 Testing
119
+
120
+ ```bash
121
+ # Test all routes
122
+ python test_webapp.py
123
+ ```
124
+
125
+ ## 📦 Project Structure
126
+
127
+ ```
128
+ smart-summarizer/
129
+ ├── webapp/ # Flask web application
130
+ │ ├── app.py # Main application
131
+ │ ├── templates/ # HTML templates
132
+ │ └── static/ # CSS, JS, assets
133
+ ├── models/ # Summarization models
134
+ ├── utils/ # Utility functions
135
+ ├── data/ # Data files
136
+ ├── notebooks/ # Jupyter notebooks
137
+ └── tests/ # Test files
138
+ ```
139
+
140
+ ## 🎨 Design
141
+
142
+ **Color Palette (Ink Wash):**
143
+ - Charcoal: #4A4A4A
144
+ - Cool Gray: #CBCBCB
145
+ - Soft Ivory: #FFFFE3
146
+ - Slate Blue: #6D8196
147
+
148
+ ## 🐛 Troubleshooting
149
+
150
+ ### Models not loading?
151
+ ```bash
152
+ # Reinstall dependencies
153
+ pip install -r requirements.txt
154
+ ```
155
+
156
+ ### Port 5001 already in use?
157
+ Edit `webapp/app.py` and change:
158
+ ```python
159
+ app.run(debug=True, port=5002) # Use different port
160
+ ```
161
+
162
+ ### File upload not working?
163
+ Check file size (max 16MB) and format (.txt, .md, .pdf, .docx)
164
+
165
+ ## 👨‍💻 Author
166
+
167
+ **Abdul Razzaq Ansari**
168
+
169
+ ## 🔗 Links
170
+
171
+ - GitHub: https://github.com/Rajak13/Smart-Summarizer
172
+ - Documentation: See `webapp/README.md`
173
+
174
+ ## 📄 License
175
+
176
+ © 2025 Smart Summarizer. Abdul Razzaq Ansari
177
+
178
+ ---
179
+
180
+ **Need help?** Check the documentation or open an issue on GitHub.
README.md CHANGED
@@ -62,4 +62,4 @@ Professional text summarization using three state-of-the-art models:
62
  ## Links
63
 
64
  - [GitHub Repository](https://github.com/Rajak13/Smart-Summarizer)
65
- - [Documentation](https://github.com/Rajak13/Smart-Summarizer/blob/main/QUICK_START.md)
 
62
  ## Links
63
 
64
  - [GitHub Repository](https://github.com/Rajak13/Smart-Summarizer)
65
+ - [Documentation](https://github.com/Rajak13/Smart-Summarizer/blob/main/QUICK_START.md)
config.yaml ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Smart Summarizer Configuration
2
+ # Professional settings for text summarization application
3
+
4
+ # Application Settings
5
+ app:
6
+ name: "Smart Summarizer"
7
+ version: "1.0.0"
8
+ author: "Abdul Razzaq Ansari"
9
+ student_id: "23049149"
10
+ module: "CU6051NI Artificial Intelligence"
11
+ institution: "London Metropolitan University"
12
+
13
+ # Model Configuration
14
+ models:
15
+ textrank:
16
+ enabled: true
17
+ damping_factor: 0.85
18
+ max_iterations: 100
19
+ tolerance: 1e-4
20
+ summary_ratio: 0.3
21
+ min_sentence_length: 5
22
+
23
+ bart:
24
+ enabled: true
25
+ model_name: "facebook/bart-large-cnn"
26
+ device: "cpu" # Change to "cuda" if GPU available
27
+ max_input_length: 1024
28
+ max_summary_length: 150
29
+ min_summary_length: 50
30
+ num_beams: 4
31
+ length_penalty: 2.0
32
+
33
+ pegasus:
34
+ enabled: true
35
+ model_name: "google/pegasus-cnn_dailymail"
36
+ device: "cpu" # Change to "cuda" if GPU available
37
+ max_input_length: 1024
38
+ max_summary_length: 128
39
+ min_summary_length: 30
40
+ num_beams: 4
41
+ length_penalty: 2.0
42
+
43
+ # Dataset Configuration
44
+ dataset:
45
+ name: "cnn_dailymail"
46
+ version: "3.0.0"
47
+ test_samples: 50
48
+ cache_dir: "./data/cache"
49
+
50
+ # Evaluation Settings
51
+ evaluation:
52
+ rouge_metrics:
53
+ - "rouge-1"
54
+ - "rouge-2"
55
+ - "rouge-l"
56
+ statistical_tests:
57
+ - "wilcoxon"
58
+ - "friedman"
59
+ significance_level: 0.05
60
+
61
+ # UI Configuration
62
+ ui:
63
+ theme: "light"
64
+ page_title: "Smart Summarizer"
65
+ page_icon: "📚"
66
+ layout: "wide"
67
+ sidebar_state: "expanded"
68
+
69
+ # Logging Configuration
70
+ logging:
71
+ level: "INFO"
72
+ format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
73
+ file: "logs/smart_summarizer.log"
74
+
75
+ # Performance Settings
76
+ performance:
77
+ cache_models: true
78
+ batch_size: 8
79
+ max_workers: 4
80
+ timeout: 300 # seconds
81
+
82
+ # Export Settings
83
+ export:
84
+ formats:
85
+ - "text"
86
+ - "json"
87
+ - "csv"
88
+ - "pdf"
89
+ default_format: "json"
90
+
91
+ # Sample Texts for Demo
92
+ samples:
93
+ ai_technology: |
94
+ Artificial intelligence has revolutionized modern technology in unprecedented ways.
95
+ Machine learning algorithms enable computers to learn from vast amounts of data without
96
+ explicit programming. Deep learning neural networks, inspired by the human brain, can
97
+ now recognize patterns in images, understand natural language, and even generate creative
98
+ content. Natural language processing has advanced to the point where AI systems can
99
+ engage in human-like conversations, translate between languages in real-time, and
100
+ summarize lengthy documents automatically.
101
+
102
+ climate_change: |
103
+ Climate change represents one of the most pressing challenges facing humanity in the
104
+ 21st century. Global temperatures have risen significantly over the past century,
105
+ primarily due to increased greenhouse gas emissions from human activities. The burning
106
+ of fossil fuels for energy, deforestation, and industrial processes have released
107
+ enormous amounts of carbon dioxide and methane into the atmosphere.
108
+
109
+ space_exploration: |
110
+ Space exploration has captured human imagination for decades and continues to push the
111
+ boundaries of what's possible. Since the first satellite launch in 1957 and the moon
112
+ landing in 1969, humanity has made remarkable progress in understanding our universe.
113
+ Modern space agencies like NASA, ESA, and private companies like SpaceX have developed
114
+ advanced technologies for space travel.
data/samples/cnn_dailymail_test.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "article": "(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed \"in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014.\" Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a move toward greater justice. \"As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice,\" he said, according to an ICC news release. \"Indeed, today brings us closer to our shared goals of justice and peace.\" Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. \"As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly,\" she said. Rights group Human Rights Watch welcomed the development. \"Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court's treaty should speak out to welcome its membership,\" said Balkees Jarrah, international justice counsel for the group. \"What's objectionable is the attempts to undermine international justice, not Palestine's decision to join a treaty to which over 100 countries around the world are members.\" In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it \"strongly\" disagreed with the court's decision. \"As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC,\" the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. \"We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace,\" it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as \"Palestine.\" While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would \"conduct its analysis in full independence and impartiality.\" The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN's Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.",
4
+ "reference_summary": "Membership gives the ICC jurisdiction over alleged crimes committed in Palestinian territories since last June .\nIsrael and the United States opposed the move, which could open the door to war crimes investigations against Israelis .",
5
+ "id": "f001ec5c4704938247d27a44948eebb37ae98d01"
6
+ },
7
+ {
8
+ "article": "(CNN)Never mind cats having nine lives. A stray pooch in Washington State has used up at least three of her own after being hit by a car, apparently whacked on the head with a hammer in a misguided mercy killing and then buried in a field -- only to survive. That's according to Washington State University, where the dog -- a friendly white-and-black bully breed mix now named Theia -- has been receiving care at the Veterinary Teaching Hospital. Four days after her apparent death, the dog managed to stagger to a nearby farm, dirt-covered and emaciated, where she was found by a worker who took her to a vet for help. She was taken in by Moses Lake, Washington, resident Sara Mellado. \"Considering everything that she's been through, she's incredibly gentle and loving,\" Mellado said, according to WSU News. \"She's a true miracle dog and she deserves a good life.\" Theia is only one year old but the dog's brush with death did not leave her unscathed. She suffered a dislocated jaw, leg injuries and a caved-in sinus cavity -- and still requires surgery to help her breathe. The veterinary hospital's Good Samaritan Fund committee awarded some money to help pay for the dog's treatment, but Mellado has set up a fundraising page to help meet the remaining cost of the dog's care. She's also created a Facebook page to keep supporters updated. Donors have already surpassed the $10,000 target, inspired by Theia's tale of survival against the odds. On the fundraising page, Mellado writes, \"She is in desperate need of extensive medical procedures to fix her nasal damage and reset her jaw. I agreed to foster her until she finally found a loving home.\" She is dedicated to making sure Theia gets the medical attention she needs, Mellado adds, and wants to \"make sure she gets placed in a family where this will never happen to her again!\" Any additional funds raised will be \"paid forward\" to help other animals. Theia is not the only animal to apparently rise from the grave in recent weeks. A cat in Tampa, Florida, found seemingly dead after he was hit by a car in January, showed up alive in a neighbor's yard five days after he was buried by his owner. The cat was in bad shape, with maggots covering open wounds on his body and a ruined left eye, but remarkably survived with the help of treatment from the Humane Society.",
9
+ "reference_summary": "Theia, a bully breed mix, was apparently hit by a car, whacked with a hammer and buried in a field .\n\"She's a true miracle dog and she deserves a good life,\" says Sara Mellado, who is looking for a home for Theia .",
10
+ "id": "230c522854991d053fe98a718b1defa077a8efef"
11
+ },
12
+ {
13
+ "article": "(CNN)If you've been following the news lately, there are certain things you doubtless know about Mohammad Javad Zarif. He is, of course, the Iranian foreign minister. He has been U.S. Secretary of State John Kerry's opposite number in securing a breakthrough in nuclear discussions that could lead to an end to sanctions against Iran -- if the details can be worked out in the coming weeks. And he received a hero's welcome as he arrived in Iran on a sunny Friday morning. \"Long live Zarif,\" crowds chanted as his car rolled slowly down the packed street. You may well have read that he is \"polished\" and, unusually for one burdened with such weighty issues, \"jovial.\" An Internet search for \"Mohammad Javad Zarif\" and \"jovial\" yields thousands of results. He certainly has gone a long way to bring Iran in from the cold and allow it to rejoin the international community. But there are some facts about Zarif that are less well-known. Here are six: . In September 2013, Zarif tweeted \"Happy Rosh Hashanah,\" referring to the Jewish New Year. That prompted Christine Pelosi, the daughter of House Minority Leader Nancy Pelosi, to respond with a tweet of her own: \"Thanks. The New Year would be even sweeter if you would end Iran's Holocaust denial, sir.\" And, perhaps to her surprise, Pelosi got a response. \"Iran never denied it,\" Zarif tweeted back. \"The man who was perceived to be denying it is now gone. Happy New Year.\" The reference was likely to former Iranian President Mahmoud Ahmadinejad, who had left office the previous month. Zarif was nominated to be foreign minister by Ahmadinejad's successor, Hassan Rouhami. His foreign ministry notes, perhaps defensively, that \"due to the political and security conditions of the time, he decided to continue his education in the United States.\" That is another way of saying that he was outside the country during the demonstrations against the Shah of Iran, which began in 1977, and during the Iranian Revolution, which drove the shah from power in 1979. Zarif left the country in 1977, received his undergraduate degree from San Francisco State University in 1981, his master's in international relations from the University of Denver in 1984 and his doctorate from the University of Denver in 1988. Both of his children were born in the United States. The website of the Iranian Foreign Ministry, which Zarif runs, cannot even agree with itself on when he was born. The first sentence of his official biography, perhaps in a nod to the powers that be in Tehran, says Zarif was \"born to a religious traditional family in Tehran in 1959.\" Later on the same page, however, his date of birth is listed as January 8, 1960. And the Iranian Diplomacy website says he was born in in 1961 . So he is 54, 55 or maybe even 56. Whichever, he is still considerably younger than his opposite number, Kerry, who is 71. The feds investigated him over his alleged role in controlling the Alavi Foundation, a charitable organization. The U.S. Justice Department said the organization was secretly run on behalf of the Iranian government to launder money and get around U.S. sanctions. But last year, a settlement in the case, under which the foundation agreed to give a 36-story building in Manhattan along with other properties to the U.S. government, did not mention Zarif's name. Early in the Iranian Revolution, Zarif was among the students who took over the Iranian Consulate in San Francisco. The aim, says the website Iranian.com -- which cites Zarif's memoirs, titled \"Mr. Ambassador\" -- was to expel from the consulate people who were not sufficiently Islamic. Later, the website says, Zarif went to make a similar protest at the Iranian mission to the United Nations. In response, the Iranian ambassador to the United Nations offered him a job. In fact, he has now spent more time with Kerry than any other foreign minister in the world. And that amount of quality time will only increase as the two men, with help from other foreign ministers as well, try to meet a June 30 deadline for nailing down the details of the agreement they managed to outline this week in Switzerland.",
14
+ "reference_summary": "Mohammad Javad Zarif has spent more time with John Kerry than any other foreign minister .\nHe once participated in a takeover of the Iranian Consulate in San Francisco .\nThe Iranian foreign minister tweets in English .",
15
+ "id": "4495ba8f3a340d97a9df1476f8a35502bcce1f69"
16
+ },
17
+ {
18
+ "article": "(CNN)Five Americans who were monitored for three weeks at an Omaha, Nebraska, hospital after being exposed to Ebola in West Africa have been released, a Nebraska Medicine spokesman said in an email Wednesday. One of the five had a heart-related issue on Saturday and has been discharged but hasn't left the area, Taylor Wilson wrote. The others have already gone home. They were exposed to Ebola in Sierra Leone in March, but none developed the deadly virus. They are clinicians for Partners in Health, a Boston-based aid group. They all had contact with a colleague who was diagnosed with the disease and is being treated at the National Institutes of Health in Bethesda, Maryland. As of Monday, that health care worker is in fair condition. The Centers for Disease Control and Prevention in Atlanta has said the last of 17 patients who were being monitored are expected to be released by Thursday. More than 10,000 people have died in a West African epidemic of Ebola that dates to December 2013, according to the World Health Organization. Almost all the deaths have been in Guinea, Liberia and Sierra Leone. Ebola is spread by direct contact with the bodily fluids of an infected person.",
19
+ "reference_summary": "17 Americans were exposed to the Ebola virus while in Sierra Leone in March .\nAnother person was diagnosed with the disease and taken to hospital in Maryland .\nNational Institutes of Health says the patient is in fair condition after weeks of treatment .",
20
+ "id": "a38e72fed88684ec8d60dd5856282e999dc8c0ca"
21
+ },
22
+ {
23
+ "article": "(CNN)A Duke student has admitted to hanging a noose made of rope from a tree near a student union, university officials said Thursday. The prestigious private school didn't identify the student, citing federal privacy laws. In a news release, it said the student was no longer on campus and will face student conduct review. The student was identified during an investigation by campus police and the office of student affairs and admitted to placing the noose on the tree early Wednesday, the university said. Officials are still trying to determine if other people were involved. Criminal investigations into the incident are ongoing as well. Students and faculty members marched Wednesday afternoon chanting \"We are not afraid. We stand together,\" after pictures of the noose were passed around on social media. At a forum held on the steps of Duke Chapel, close to where the noose was discovered at 2 a.m., hundreds of people gathered. \"You came here for the reason that you want to say with me, 'This is no Duke we will accept. This is no Duke we want. This is not the Duke we're here to experience. And this is not the Duke we're here to create,' \" Duke President Richard Brodhead told the crowd. The incident is one of several recent racist events to affect college students. Last month a fraternity at the University of Oklahoma had its charter removed after a video surfaced showing members using the N-word and referring to lynching in a chant. Two students were expelled. In February, a noose was hung around the neck of a statue of a famous civil rights figure at the University of Mississippi. A statement issued by Duke said there was a previous report of hate speech directed at students on campus. In the news release, the vice president for student affairs called the noose incident a \"cowardly act.\" \"To whomever committed this hateful and stupid act, I just want to say that if your intent was to create fear, it will have the opposite effect,\" Larry Moneta said Wednesday. Duke University is a private college with about 15,000 students in Durham, North Carolina. CNN's Dave Alsup contributed to this report.",
24
+ "reference_summary": "Student is no longer on Duke University campus and will face disciplinary review .\nSchool officials identified student during investigation and the person admitted to hanging the noose, Duke says .\nThe noose, made of rope, was discovered on campus about 2 a.m.",
25
+ "id": "c27cf1b136cc270023de959e7ab24638021bc43f"
26
+ },
27
+ {
28
+ "article": "(CNN)He's a blue chip college basketball recruit. She's a high school freshman with Down syndrome. At first glance Trey Moses and Ellie Meredith couldn't be more different. But all that changed Thursday when Trey asked Ellie to be his prom date. Trey -- a star on Eastern High School's basketball team in Louisville, Kentucky, who's headed to play college ball next year at Ball State -- was originally going to take his girlfriend to Eastern's prom. So why is he taking Ellie instead? \"She's great... she listens and she's easy to talk to\" he said. Trey made the prom-posal (yes, that's what they are calling invites to prom these days) in the gym during Ellie's P.E. class. Trina Helson, a teacher at Eastern, alerted the school's newspaper staff to the prom-posal and posted photos of Trey and Ellie on Twitter that have gone viral. She wasn't surpristed by Trey's actions. \"That's the kind of person Trey is,\" she said. To help make sure she said yes, Trey entered the gym armed with flowers and a poster that read \"Let's Party Like it's 1989,\" a reference to the latest album by Taylor Swift, Ellie's favorite singer. Trey also got the OK from Ellie's parents the night before via text. They were thrilled. \"You just feel numb to those moments raising a special needs child,\" said Darla Meredith, Ellie's mom. \"You first feel the need to protect and then to overprotect.\" Darla Meredith said Ellie has struggled with friendships since elementary school, but a special program at Eastern called Best Buddies had made things easier for her. She said Best Buddies cultivates friendships between students with and without developmental disabilities and prevents students like Ellie from feeling isolated and left out of social functions. \"I guess around middle school is when kids started to care about what others thought,\" she said, but \"this school, this year has been a relief.\" Trey's future coach at Ball State, James Whitford, said he felt great about the prom-posal, noting that Trey, whom he's known for a long time, often works with other kids . Trey's mother, Shelly Moses, was also proud of her son. \"It's exciting to bring awareness to a good cause,\" she said. \"Trey has worked pretty hard, and he's a good son.\" Both Trey and Ellie have a lot of planning to do. Trey is looking to take up special education as a college major, in addition to playing basketball in the fall. As for Ellie, she can't stop thinking about prom. \"Ellie can't wait to go dress shopping\" her mother said. \"Because I've only told about a million people!\" Ellie interjected.",
29
+ "reference_summary": "College-bound basketball star asks girl with Down syndrome to high school prom .\nPictures of the two during the \"prom-posal\" have gone viral .",
30
+ "id": "1b2cc634e2bfc6f2595260e7ed9b42f77ecbb0ce"
31
+ },
32
+ {
33
+ "article": "(CNN)Governments around the world are using the threat of terrorism -- real or perceived -- to advance executions, Amnesty International alleges in its annual report on the death penalty. \"The dark trend of governments using the death penalty in a futile attempt to tackle real or imaginary threats to state security and public safety was stark last year,\" said Salil Shetty, Amnesty's Secretary General in a release. \"It is shameful that so many states around the world are essentially playing with people's lives -- putting people to death for 'terrorism' or to quell internal instability on the ill-conceived premise of deterrence.\" The report, \"Death Sentences and Executions 2014,\" cites the example of Pakistan lifting a six-year moratorium on the execution of civilians following the horrific attack on a school in Peshawar in December. China is also mentioned, as having used the death penalty as a tool in its \"Strike Hard\" campaign against terrorism in the restive far-western province of Xinjiang. The annual report catalogs the use of state-sanctioned killing as a punitive measure across the globe, and this year's edition contains some mixed findings. On one hand, the number of executions worldwide has gone down by almost 22% on the previous year. At least 607 people were executed around the world in 2014, compared to 778 in 2013. Amnesty's figures do not include statistics on executions carried out in China, where information on the practice is regarded as a state secret. Belarus and Vietnam, too, do not release data on death penalty cases. \"The long-term trend is definitely positive -- we are seeing a decrease in the number of executions (worldwide),\" Audrey Gaughran, Amnesty's Director of Global Issues, told CNN. \"A number of countries are closer to abolition, and there are some signs that some countries will be abolitionist by 2015. (There are) signals of a world that is nearing abolition.\" While the report notes some encouraging signs, it also highlights a marked increase in the number of people sentenced to death in 2014. At least 2,466 people globally are confirmed to have been handed the sentence last year, an increase of 28% compared with 2013. The report notes that the spike in sentencing is attributable to mass-sentencing in countries including Egypt and Nigeria, \"against scores of people in some cases.\" The organization found \"positive developments\" worldwide, with most regions seeming to show reductions in the number of executions. Opinion: Sharp spike in death sentences . Sub-Saharan Africa, for example, saw a 28% fall in reported cases, and executions recorded in the Middle East and North Africa were down 23% compared to 2013. \"Even though we've highlighted some of the negative developments... I think we would always highlight that there are positive developments,\" Gaughran said. \"Across the board, with the exception of Europe and Central Asia there were fewer reports of executions in every region.\" The resumption of the use of capital punishment in Belarus -- the only country in Europe and Central Asia to execute people -- after a two year hiatus spoiled an near-universal decrease in countries using the death penalty by region. The United States has the dubious distinction of being the only country in the Americas to conduct executions, but the number of convicts put to death here fell slightly, from 39 in 2013 to 35 in 2014. The state of Washington also imposed a moratorium on executions last year. The U.S. remains one of the worst offenders for imposing capital punishment, with only Iran (289+), Iraq (61+), and Saudi Arabia (90+) executing more people in 2014. While figures are not available, Amnesty estimates that China also executes \"thousands\" of prisoners each year, \"more than the rest of the world put together.\" The report also highlights the imperfections in the judiciary processes that lead to many sentenced to death. \"In the majority of countries where people were sentenced to death or executed, the death penalty was imposed after proceedings that did not meet international fair trial standards,\" the report stated. \"In 2014 Amnesty International raised particular concerns in relation to court proceedings in Afghanistan, Bangladesh, China, Egypt, Iran, Iraq, North Korea, Pakistan, Saudi Arabia and Sri Lanka.\" The United Nations Secretary-General, Ban Ki-moon, last year stressed the need to move toward abolition of capital punishment. \"The taking of life is too irreversible for one human being to inflict it on another,\" he said, in marking World Day against Death Penalty in October. \"We must continue to argue strongly that the death penalty is unjust and incompatible with fundamental human rights.\" Amnesty estimates that at least 19,094 people were believed to be on death row at the end of 2014.",
34
+ "reference_summary": "Amnesty's annual death penalty report catalogs encouraging signs, but setbacks in numbers of those sentenced to death .\nOrganization claims that governments around the world are using the threat of terrorism to advance executions .\nThe number of executions worldwide has gone down by almost 22% compared with 2013, but death sentences up by 28% .",
35
+ "id": "e2706dce6cf26bc61b082438188fdb6e130d9e40"
36
+ },
37
+ {
38
+ "article": "(CNN)Andrew Getty, one of the heirs to billions of oil money, appears to have died of natural causes, a Los Angeles Police Department spokesman said. The coroner's preliminary assessment is there was no foul play involved in the death of Getty, grandson of oil tycoon J. Paul Getty, said Detective Meghan Aguilar. Andrew Getty, 47, had \"several health issues,\" Aguilar said, adding that an autopsy will be conducted. There is no criminal investigation underway, he said. Some medication had also been recovered from Getty's home, though investigators don't know whether Getty was taking it or what his medical history was, Ed Winter, assistant chief in the Los Angeles County coroner's office, told CNN affiliate KTLA Tuesday night. KTLA reported that Getty was found on his side near a bathroom in his home. Getty's parents, Ann and Gordon Getty, released a statement confirming their son's death and asking for privacy. Where the Getty family fortune came from . Gordon Getty is one of three living sons of J. Paul Getty, the oil baron who was thought to be the richest man in the world at the time of his death in 1976. Gordon Getty, 81, has a net worth of $2.1 billion, according to Forbes. One other son died in 1958 and another died in 1973. Gordon Getty spearheaded the controversial sale of Getty to Texaco for $10 billion in 1984. In its list of richest American families, Forbes estimated the Gettys' net worth to be about $5 billion. Court records show Andrew Getty had recently filed to get a restraining order against an ex-girlfriend. A hearing in the case had been scheduled for next week. In his request, Getty said he had been diagnosed with a serious medical condition in 2013. \"A rise in my blood pressure places me in grave risk of substantial and irreparable injury or death,\" he wrote in the petition. \"My doctors have advised that heated arguments can cause my blood pressure to rise dangerously.\" Andrew Getty had three brothers and three half-sisters. People we've lost in 2015 . CNN's Doug Criss, Janet DiGiacomo, Mark Mooney, Mike Love, Julie In and Cheri Mossburg contributed to this report.",
39
+ "reference_summary": "Andrew Getty's death appears to be from natural causes, police say, citing coroner's early assessment .\nIn a petition for a restraining order, Getty had written he had a serious medical condition.\nPolice say this is not a criminal matter at this time .",
40
+ "id": "0d3c8c276d079c4c225f034c69aa024cdab7869d"
41
+ },
42
+ {
43
+ "article": "(CNN)Filipinos are being warned to be on guard for flash floods and landslides as tropical storm Maysak approached the Asian island nation Saturday. Just a few days ago, Maysak gained super typhoon status thanks to its sustained 150 mph winds. It has since lost a lot of steam as it has spun west in the Pacific Ocean. It's now classified as a tropical storm, according to the Philippine national weather service, which calls it a different name, Chedeng. It boasts steady winds of more than 70 mph (115 kph) and gusts up to 90 mph as of 5 p.m. (5 a.m. ET) Saturday. Still, that doesn't mean Maysak won't pack a wallop. Authorities took preemptive steps to keep people safe such as barring outdoor activities like swimming, surfing, diving and boating in some locales, as well as a number of precautionary evacuations. Gabriel Llave, a disaster official, told PNA that tourists who arrive Saturday in and around the coastal town of Aurora \"will not be accepted by the owners of hotels, resorts, inns and the like ... and will be advised to return to their respective places.\" Aldczar Aurelio, a meteorologist with the Philippine Atmospheric, Geophysical and Astronomical Services Administration (PAGASA), said the storm was centered 200 miles southwest of Aurora province as of 5 p.m. (5 a.m. ET) and heading west at a 12.5 mph clip. It's expected to make landfall Sunday morning on the southeastern coast of Isabela province and be out of the Philippines by Monday. Ahead of the storm. Isabela Gov. Faustino Dry III warned Saturday that residents should act as if this will be \"no ordinary typhoon.\" Dry told PNA, \"We do not know what the impact will be once it will make landfall.\"",
44
+ "reference_summary": "Once a super typhoon, Maysak is now a tropical storm with 70 mph winds .\nIt could still cause flooding, landslides and other problems in the Philippines .",
45
+ "id": "6222f33c2c79b80be437335eeb3f488509e92cf5"
46
+ },
47
+ {
48
+ "article": "(CNN)For the first time in eight years, a TV legend returned to doing what he does best. Contestants told to \"come on down!\" on the April 1 edition of \"The Price Is Right\" encountered not host Drew Carey but another familiar face in charge of the proceedings. Instead, there was Bob Barker, who hosted the TV game show for 35 years before stepping down in 2007. Looking spry at 91, Barker handled the first price-guessing game of the show, the classic \"Lucky Seven,\" before turning hosting duties over to Carey, who finished up. Despite being away from the show for most of the past eight years, Barker didn't seem to miss a beat.",
49
+ "reference_summary": "Bob Barker returned to host \"The Price Is Right\" on Wednesday .\nBarker, 91, had retired as host in 2007 .",
50
+ "id": "2bd8ada1de6a7b02f59430cc82045eb8d29cf033"
51
+ },
52
+ {
53
+ "article": "London (CNN)A 19-year-old man was charged Wednesday with terror offenses after he was arrested as he returned to Britain from Turkey, London's Metropolitan Police said. Yahya Rashid, a UK national from northwest London, was detained at Luton airport on Tuesday after he arrived on a flight from Istanbul, police said. He's been charged with engaging in conduct in preparation of acts of terrorism, and with engaging in conduct with the intention of assisting others to commit acts of terrorism. Both charges relate to the period between November 1 and March 31. Rashid is due to appear in Westminster Magistrates' Court on Wednesday, police said. CNN's Lindsay Isaac contributed to this report.",
54
+ "reference_summary": "London's Metropolitan Police say the man was arrested at Luton airport after landing on a flight from Istanbul .\nHe's been charged with terror offenses allegedly committed since the start of November .",
55
+ "id": "ee17dfb574feca82ccac5689595e47483bd23f12"
56
+ },
57
+ {
58
+ "article": "(CNN)Paul Walker is hardly the first actor to die during a production. But Walker's death in November 2013 at the age of 40 after a car crash was especially eerie given his rise to fame in the \"Fast and Furious\" film franchise. The release of \"Furious 7\" on Friday offers the opportunity for fans to remember -- and possibly grieve again -- the man that so many have praised as one of the nicest guys in Hollywood. \"He was a person of humility, integrity, and compassion,\" military veteran Kyle Upham said in an email to CNN. Walker secretly paid for the engagement ring Upham shopped for with his bride. \"We didn't know him personally but this was apparent in the short time we spent with him. I know that we will never forget him and he will always be someone very special to us,\" said Upham. The actor was on break from filming \"Furious 7\" at the time of the fiery accident, which also claimed the life of the car's driver, Roger Rodas. Producers said early on that they would not kill off Walker's character, Brian O'Connor, a former cop turned road racer. Instead, the script was rewritten and special effects were used to finish scenes, with Walker's brothers, Cody and Caleb, serving as body doubles. There are scenes that will resonate with the audience -- including the ending, in which the filmmakers figured out a touching way to pay tribute to Walker while \"retiring\" his character. At the premiere Wednesday night in Hollywood, Walker's co-star and close friend Vin Diesel gave a tearful speech before the screening, saying \"This movie is more than a movie.\" \"You'll feel it when you see it,\" Diesel said. \"There's something emotional that happens to you, where you walk out of this movie and you appreciate everyone you love because you just never know when the last day is you're gonna see them.\" There have been multiple tributes to Walker leading up to the release. Diesel revealed in an interview with the \"Today\" show that he had named his newborn daughter after Walker. Social media has also been paying homage to the late actor. A week after Walker's death, about 5,000 people attended an outdoor memorial to him in Los Angeles. Most had never met him. Marcus Coleman told CNN he spent almost $1,000 to truck in a banner from Bakersfield for people to sign at the memorial. \"It's like losing a friend or a really close family member ... even though he is an actor and we never really met face to face,\" Coleman said. \"Sitting there, bringing his movies into your house or watching on TV, it's like getting to know somebody. It really, really hurts.\" Walker's younger brother Cody told People magazine that he was initially nervous about how \"Furious 7\" would turn out, but he is happy with the film. \"It's bittersweet, but I think Paul would be proud,\" he said. CNN's Paul Vercammen contributed to this report.",
59
+ "reference_summary": "\"Furious 7\" pays tribute to star Paul Walker, who died during filming .\nVin Diesel: \"This movie is more than a movie\"\n\"Furious 7\" opens Friday .",
60
+ "id": "384175be1c8d41610fbeddbd9d9cb46e716e2529"
61
+ },
62
+ {
63
+ "article": "(CNN)Seventy years ago, Anne Frank died of typhus in a Nazi concentration camp at the age of 15. Just two weeks after her supposed death on March 31, 1945, the Bergen-Belsen concentration camp where she had been imprisoned was liberated -- timing that showed how close the Jewish diarist had been to surviving the Holocaust. But new research released by the Anne Frank House shows that Anne and her older sister, Margot Frank, died at least a month earlier than previously thought. Researchers re-examined archives of the Red Cross, the International Training Service and the Bergen-Belsen Memorial, along with testimonies of survivors. They concluded that Anne and Margot probably did not survive to March 1945 -- contradicting the date of death which had previously been determined by Dutch authorities. In 1944, Anne and seven others hiding in the Amsterdam secret annex were arrested and sent to the Auschwitz-Birkenau concentration camp. Anne Frank's final entry . That same year, Anne and Margot were separated from their mother and sent away to work as slave labor at the Bergen-Belsen camp in Germany. Days at the camp were filled with terror and dread, witnesses said. The sisters stayed in a section of the overcrowded camp with no lighting, little water and no latrine. They slept on lice-ridden straw and violent storms shredded the tents, according to the researchers. Like the other prisoners, the sisters endured long hours at roll call. Her classmate, Nannette Blitz, recalled seeing Anne there in December 1944: \"She was no more than a skeleton by then. She was wrapped in a blanket; she couldn't bear to wear her clothes anymore because they were crawling with lice.\" Listen to Anne Frank's friends describe her concentration camp experience . As the Russians advanced further, the Bergen-Belsen concentration camp became even more crowded, bringing more disease. A deadly typhus outbreak caused thousands to die each day. Typhus is an infectious disease caused by lice that breaks out in places with poor hygiene. The disease causes high fever, chills and skin eruptions. \"Because of the lice infesting the bedstraw and her clothes, Anne was exposed to the main carrier of epidemic typhus for an extended period,\" museum researchers wrote. They concluded that it's unlikely the sisters survived until March, because witnesses at the camp said the sisters both had symptoms before February 7. \"Most deaths caused by typhus occur around twelve days after the first symptoms appear,\" wrote authors Erika Prins and Gertjan Broek. The exact dates of death for Anne and Margot remain unclear. Margot died before Anne. \"Anne never gave up hope,\" said Blitz, her friend. \"She was absolutely convinced she would survive.\" Her diary endures as one of the world's most popular books. Read more about Anne Frank's cousin, a keeper of her legacy .",
64
+ "reference_summary": "Museum: Anne Frank died earlier than previously believed .\nResearchers re-examined archives and testimonies of survivors .\nAnne and older sister Margot Frank are believed to have died in February 1945 .",
65
+ "id": "203886369feea77bbc35715e6d7e518b751f57de"
66
+ },
67
+ {
68
+ "article": "(CNN)A year ago Bloomberg published a story with the following headline: Mike Pence, a Koch Favorite, Mulls 2016 Run for President. The story ticked off items on Pence's conservative things-to-do list while also noting his close ties to the deep-pocketed Koch brothers, as well as other right-wing lobbying groups. Last August the Indiana governor was in Dallas for an Americans for Prosperity event; the group is backed by the conservative Koch brothers, and supported Gov. Pence's tax-slashing budget. Now, Pence is drawing huge heat for his controversial decision to sign a religious freedom law last week that opens the door to discrimination against gays and lesbians. Why would Pence ignore the pleas of Indiana's Chamber of Commerce as well as the Republican mayor of his state capital and sign such a bill? Because there's a very powerful wing of his party that wants a conservative as its 2016 candidate and this bill was Pence's way of shoring up his street cred. It is also the reason why Republican Jeb Bush, Pence's fellow White House hopeful, who is viewed as a little light in that category, was first to rush in to defend Pence and the law. One lesson here: Just because more than 70% of the country now lives in states where same-sex marriage is legal does not mean 70% of the country is happy about it. Backlash aside, the fact is Pence has scored a lot of points this week among ultraconservatives. And while that may not be enough to get him over this political hump, the very public debate that now embroils him — and Arkansas Gov. Asa Hutchinson, and likely 14 other states considering similar proposals this year -- is more than enough to drag the entire Republican field farther to the right than the party had hoped. Pence: 'Was I expecting this kind of backlash? Heavens no.' For there is no way a Republican can get through the pending primary without denouncing LGBT rights, which unfortunately will turn numerous Americans into single-issue voters. I foolishly hoped the issue of LGBT rights would be a bit player in the 2016 general election, overshadowed by foreign policy and the economy. Instead it looks like it's going to be dragged down to a replay of Pat Buchanan's \"cultural war\" speech, during which he told the 1992 Republican National Convention: \"We stand with (George H.W. Bush) against the amoral idea that gay and lesbian couples should have the same standing in law as married men and women\" and later followed with \"There is a religious war going on in this country. It is a cultural war, as critical to the kind of nation we shall be as the Cold War itself. For this war is for the soul of America.\" Progressives may enjoy watching Pence's temporary fall from grace, but his policy rhetoric has echoed that of 2016 hopeful Sen. Ted Cruz of Texas, who has indicated a federal ban on same-sex marriage is not off the GOP table. And even if you think neither Pence nor Bush nor Cruz will win the nomination, someone has to. In light of that, listen to conservative former Arkansas Gov. Mike Huckabee, a potential 2016 candidate describing conservatives' discomfort with same-sex marriage: \"It's like asking someone who's Jewish to start serving bacon-wrapped shrimp in their deli.\" Or Louisiana Gov. Bobby Jindal: \"I certainly will support Ted Cruz and others that are talking about making ... a constitutional amendment to allow states to continue to define marriage.\" Or Wisconsin Gov. Scott Walker, who has a long history of fighting against same-sex marriage and civil unions. And Ben Carson said jail turns people gay, so there's that. Remember: Pence didn't act alone. He only signed a bill that first passed muster with other elected officials. In fact, according to the American Civil Liberties Union, \"the Indiana RFRA [Religious Freedom Restoration Act] is one of 24 introduced in 15 states this year that could allow someone to use their religious beliefs to discriminate. Numerous other bills specifically single out the LGBT community for unequal treatment.\" Who supports, denounces Indiana law? Gallup Polls may suggest voters nationwide are more gay-friendly, but the trend on the state level tells a different story. Perhaps we're witnessing the final gasp of long-ago biases. Or maybe those biases are having a rebirth we had underestimated. Former Rep. Barney Frank of Massachusetts, the first member of Congress to marry someone of the same sex while in office, said he believes Republicans want the Supreme Court to rule in favor of same-sex marriage to provide political cover in the GOP primary. \"We're winning,\" he told a crowd in Chicago recently while promoting his latest book. I guess if you look at where the country was on LGBT issues 10 years ago, we definitely are. That's assuming you are part of the \"we\" who believe LGBT people should have the same rights as their heterosexual/cisgender counterparts. But as the situation in Indiana has shown, \"winning\" should not be mistaken for having \"won.\" For it is doubtful that a candidate will be able to avoid taking a position on the wave of so-called \"religious freedom\" bills snaking through red-state legislatures. Or to sidestep the topic of a constitutional amendment when it's raised in a debate or at a campaign stop, especially with Republicans controlling both the House and the Senate. Pence, and to a lesser extent, Jeb Bush, may be toxic now but America has a short attention span. More importantly, they are not alone. Frank said when progressives get angry they march in the streets, and when conservatives get mad they march to the polls. If that holds true in 2016, \"winning\" is going to feel very strange.",
69
+ "reference_summary": "LZ: Indiana law pushing back LGBT rights, and other states' anti-LGBT moves, bow to far right wing that GOP candidates need for 2016 .\nCruz, Huckabee, Jindal, Carson, Walker are reviving culture wars, he says. Equality for LGBT has not yet \"won\" in America .",
70
+ "id": "1dd00c89d71a5611797bb34da8a6eada8f058405"
71
+ },
72
+ {
73
+ "article": "(CNN)If you're famous and performing the American national anthem, be prepared to become a national hero or a national disgrace. Facts are facts. Just ask Vince, Whitney, Roseanne, Jimi and Michael. Mötley Crüe's Vince Neil reminded us again this week of the dangers of tackling \"The Star-Spangled Banner.\" Sure, he can shred it on \"Girls, Girls, Girls\" and \"Dr. Feelgood,\" but this is a different story -- a completely different story. To say Neil butchered the song before the Las Vegas Outlaws Arena Football League game would be unkind to those in the profession. There's less carnage when butchers are done with their work. The late Whitney Houston set the modern standard for the national anthem at Super Bowl XXV. In the early stages of the Gulf War in 1991, a patriotic America saluted her performance. Just six months earlier, comedian Roseanne Barr may have established the low-water mark. The crowd at the San Diego Padres game booed her rendition and President George H. W. Bush called it \"disgraceful.\" There's nothing quite like getting the presidential thumbs down. One of the most controversial and beloved versions of \"The Star-Spangled Banner\" comes from 1969. Guitar slinger Jimi Hendrix inflamed mainstream America with his psychedelic take on the national anthem to the delight of the Woodstock generation. And then there's Michael Bolton's version. Overly wrought songs are his specialty and he doesn't disappoint in that department when he sings at the American League Championship Series in 2003. Bolton belts it out, but there's one little problem -- the words. Can anyone say crib notes?",
74
+ "reference_summary": "Singing the national anthem is a risky proposition .\nWhitney Houston nailed it; Roseanne Barr destroyed it .",
75
+ "id": "eeafdc8b2d8130cabda5aafe352eab1198d0b9f8"
76
+ },
77
+ {
78
+ "article": "(CNN)As goes Walmart, so goes the nation? Everyone from Apple CEO Tim Cook to the head of the NCAA slammed religious freedom laws being considered in several states this week, warning that they would open the door to discrimination against gay and lesbian customers. But it was the opposition from Walmart, the ubiquitous retailer that dots the American landscape, that perhaps resonated most deeply, providing the latest evidence of growing support for gay rights in the heartland. Walmart's staunch criticism of a religious freedom law in its home state of Arkansas came after the company said in February it would boost pay for about 500,000 workers well above the federal minimum wage. Taken together, the company is emerging as a bellwether for shifting public opinion on hot-button political issues that divide conservatives and liberals. And some prominent Republicans are urging the party to take notice. Former Minnesota Gov. Tim Pawlenty, who famously called on the GOP to \"be the party of Sam's Club, not just the country club,\" told CNN that Walmart's actions \"foreshadow where the Republican Party will need to move.\" \"The Republican Party will have to better stand for\" ideas on helping the middle class, said Pawlenty, the head of the Financial Services Roundtable, a Washington lobbying group for the finance industry. The party's leaders must be \"willing to put forward ideas that will help modest income workers, such as a reasonable increase in the minimum wage, and prohibit discrimination in things such as jobs, housing, public accommodation against gays and lesbians.\" Walmart, which employs more than 50,000 people in Arkansas, emerged victorious on Wednesday. Hours after the company's CEO, Doug McMillon, called on Republican Gov. Asa Hutchinson to veto the bill, the governor held a news conference and announced he would not sign the legislation unless its language was fixed. Walmart's opposition to the religious freedom law once again puts the company at odds with many in the Republican Party, which the company's political action committee has tended to support. In 2004, the Walmart PAC gave around $2 million to Republicans versus less than $500,000 to Democrats, according to data from the Center for Responsive Politics. That gap has grown less pronounced in recent years. In 2014, the PAC spent about $1.3 million to support Republicans and around $970,000 for Democrats. It has been a gradual transformation for Walmart. In 2011, the company bulked up its nondiscrimination policies by adding protections for gender identity. Two years later, the company announced that it would start offering health insurance benefits to same-sex partners of employees starting in 2014. Retail experts say Walmart's evolution on these issues over the years is partly a reflection of its diverse consumer base, as well as a recognition of the country's increasingly progressive views of gay equality (support for same-sex marriage is at a new high of 59%, according to a recent Wall Street Journal/NBC News poll). \"It's easy for someone like a Chick-fil-A to take a really polarizing position,\" said Dwight Hill, a partner at the retail consulting firm McMillanDoolittle. \"But in the world of the largest retailer in the world, that's very different.\" Hill added: Same-sex marriage, \"while divisive, it's becoming more common place here within the U.S., and the businesses by definition have to follow the trend of their customer.\" The backlash over the religious freedom measures in Indiana and Arkansas this week is shining a bright light on the broader business community's overwhelming support for workplace policies that promote gay equality. After Indiana Gov. Mike Pence, a Republican, signed his state's religious freedom bill into law, CEOs of companies big and small across the country threatened to pull out of the Hoosier state. The resistance came from business leaders of all political persuasions, including Bill Oesterle, CEO of the business-rating website Angie's List and a one-time campaign manager for former Indiana Gov. Mitch Daniels. Oesterle announced that his company would put plans on hold to expand its footprint in Indianapolis in light of the state's passage of the religious freedom act. NASCAR, scheduled to hold a race in Indianapolis this summer, also spoke out against the Indiana law. \"What we're seeing over the past week is a tremendous amount of support from the business community who are standing up and are sending that equality is good for business and discrimination is bad for business,\" said Jason Rahlan, spokesman for the Human Rights Campaign. The debate has reached presidential politics. National Republicans are being forced to walk the fine line of protecting religious liberties and supporting nondiscrimination. Likely GOP presidential candidate Jeb Bush initially backed Indiana's religious freedom law and Pence, but moderated his tone a few days later. The former Florida governor said Wednesday that Indiana could have taken a \"better\" and \"more consensus-oriented approach.\" \"By the end of the week, Indiana will be in the right place,\" Bush said, a reference to Pence's promise this week to fix his state's law in light of the widespread backlash. Others in the GOP field are digging in. Sen. Ted Cruz of Texas, the only officially declared Republican presidential candidate, said Wednesday that he had no interest in second-guessing Pence and lashed out at the business community for opposing the law. \"I think it is unfortunate that large companies today are listening to the extreme left wing agenda that is driven by an aggressive gay marriage agenda,\" Cruz said. Meanwhile, former Secretary of State Hillary Clinton, who previously served on Walmart's board of directors, called on Hutchinson to veto the Arkansas bill, saying it would \"permit unfair discrimination\" against the LGBT community. Jay Chesshir, CEO of the Little Rock Regional Chamber of Commerce in Arkansas, welcomed Hutchinson's pledge on Wednesday to seek changes to his state's bill. He said businesses are not afraid to wade into a politically controversial debate to ensure inclusive workplace policies. \"When it comes to culture and quality of life, businesses are extremely interested in engaging in debate simply because it impacts its more precious resource -- and that's its people,\" Chesshir said. \"Therefore, when issues arise that have negative or positive impact on those things, then the business community will again speak and speak loudly.\"",
79
+ "reference_summary": "While Republican Gov. Asa Hutchinson was weighing an Arkansas religious freedom bill, Walmart voiced its opposition .\nWalmart and other high-profile businesses are showing their support for gay and lesbian rights .\nTheir stance puts them in conflict with socially conservative Republicans, traditionally seen as allies .",
80
+ "id": "dc833f8b55e381011ce23f89ea909b9a141b5a66"
81
+ },
82
+ {
83
+ "article": "(CNN)On May 28, 2014, some 7,000 people gathered in a stadium in China's northwestern Xinjiang region. But they had not come to watch the local football team or any other grand sporting event. Instead, the authorities paraded scores of prisoners dressed in orange jumpsuits. Armed soldiers guarded the exits. In the patently unfair, open air trial that followed, 55 people were found guilty of a range of offenses linked to violent attacks in the region and jailed. Three were sentenced to death. The public mass sentencing was part a China's \"Strike Hard\" campaign against unrest in Xinjiang, a campaign the government claims was launched to combat \"terrorism\" and \"separatism.\" But it was also indicative of a trend that was starkly evident last year around the world -- governments using the death penalty in a misguided, and often cynical, attempt to tackle crime and terrorism. Today, Amnesty International releases its annual review of the death penalty worldwide. Much of it makes for grim reading. In Pakistan, the government lifted a six-year moratorium on the execution of civilians in the wake of the horrific Taliban attack on a school in Peshawar in December. More than 60 people have been put to death since, and the government has threatened to send thousands more death row prisoners to the gallows. Iran and Iraq executed people for \"terrorism,\" and other countries expanded the scope of capital crimes in their penal codes. In a year when abhorrent summary executions by armed groups were branded on the global consciousness as never before, governments are themselves resorting to more executions in a knee-jerk reaction to terrorism. Other countries made use of executions in similarly flawed attempts to address -- or appear to address -- crime rates. Jordan ended an eight-year moratorium in December, putting 11 murder convicts to death, with the government saying it was a move to end a surge in violent crime. In Indonesia, authorities announced plans to execute mainly drug traffickers to tackle a public safety \"national emergency.\" Six people have already been executed this year. A sharp spike in death sentences recorded in 2014 -- up more than 500 on the previous year -- can also be attributed to governments using the death penalty as a political tool. The rise was largely because of developments in Egypt and Nigeria, where courts imposed hundreds of death sentences in the context of internal political instability or crime and armed conflict. The simple fact is that governments using the death penalty to tackle crime and security threats are deceiving themselves or the public or both. There is no evidence that the threat of execution is more of a deterrent to crime than a prison sentence, as United Nations and other studies have repeatedly confirmed. It is high time that world leaders stop using the death penalty as an easy way out when times get tough. At Amnesty International, we have campaigned for an end to the death penalty for decades. Thankfully, most of the world now appears to agree with us. The numbers speak for themselves. In 1945 when the United Nations was founded, only eight countries had abolished the death penalty. Today, 140 states are abolitionist in law or practice. Last year, we recorded executions in 22 countries, down by almost a half from 20 years ago. Despite the troubling developments we recorded last year, there was still much good news to be found. The number of executions recorded around the world dropped significantly in 2014 compared with the previous year, from 778 to 607. This number does not include China, where more people are put to death than the rest of the world put together, but with death penalty statistics treated as a state secret, the true figure is impossible to determine. Executions were recorded in only three countries in sub-Saharan Africa -- Equatorial Guinea, Somalia and Sudan -- and the number of people put to death went down by more than a quarter. The Americas continued to be execution-free, apart from the United States. Those governments that still execute need to realize that they are on the wrong side of history. They must join the vast majority of countries which have dropped the ultimate cruel punishment. Fighting for an end to the death penalty remains an uphill task, but all of us must try to make the world free of this punishment. With determination, I know that we can achieve this goal.",
84
+ "reference_summary": "Amnesty International releases its annual review of the death penalty worldwide; much of it makes for grim reading .\nSalil Shetty: Countries that use executions to deal with problems are on the wrong side of history .",
85
+ "id": "c222979bd1cfbc7d3ff821e9c738e3dbd29b14f4"
86
+ },
87
+ {
88
+ "article": "Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that \"so far no videos were used in the crash investigation.\" He added, \"A person who has such a video needs to immediately give it to the investigators.\" Robin's comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. \"One can hear cries of 'My God' in several languages,\" Paris Match reported. \"Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the screaming intensifies. Then nothing.\" \"It is a very disturbing scene,\" said Julian Reichelt, editor-in-chief of Bild online. An official with France's accident investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the reports were \"completely wrong\" and \"unwarranted.\" Cell phones have been collected at the site, he said, but that they \"hadn't been exploited yet.\" Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card to the media, Menichini answered with a categorical \"no.\" Reichelt told \"Erin Burnett: Outfront\" that he had watched the video and stood by the report, saying Bild and Paris Match are \"very confident\" that the clip is real. He noted that investigators only revealed they'd recovered cell phones from the crash site after Bild and Paris Match published their reports. \"That is something we did not know before. ... Overall we can say many things of the investigation weren't revealed by the investigation at the beginning,\" he said. What was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he's accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a \"previous episode of severe depression,\" the airline said Tuesday. Email correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, included medical documents he submitted in connection with resuming his flight training. The announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz's battle with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday as a \"swift and seamless clarification\" and said it was sharing the information and documents -- including training and medical records -- with public prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human remains were left at the site but recovery teams would keep searching. French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the victims' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our correspondents . The details about Lubitz's correspondence with the flight school during his training were among several developments as investigators continued to delve into what caused the crash and Lubitz's possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his examinations and \"held all the licenses required.\" Earlier, a spokesman for the prosecutor's office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before he got his pilot's license. Kumpa emphasized there's no evidence suggesting Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to lose his pilot's license, a European government official briefed on the investigation told CNN on Tuesday. While flying was \"a big part of his life,\" the source said, it's only one theory being considered. Another source, a law enforcement official briefed on the investigation, also told CNN that authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly because of his medical problems. Lubitz's girlfriend told investigators he had seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had psychological issues, the European government official said. But no matter what details emerge about his previous mental health struggles, there's more to the story, said Brian Russell, a forensic psychologist. \"Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they weren't going to keep doing their job and they're upset about that and so they're suicidal,\" he said. \"But there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it outward on 149 other people who had nothing to do with the person's problems.\" Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight 9525? CNN's Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.",
89
+ "reference_summary": "Marseille prosecutor says \"so far no videos were used in the crash investigation\" despite media reports .\nJournalists at Bild and Paris Match are \"very confident\" the video clip is real, an editor says .\nAndreas Lubitz had informed his Lufthansa training school of an episode of severe depression, airline says .",
90
+ "id": "469c6ac05092ca5997728c9dfc19f9ab6b936e40"
91
+ },
92
+ {
93
+ "article": "(CNN)The Rev. Robert H. Schuller, California televangelist and founder of the television ministry \"Hour of Power,\" died Thursday, according to his family. He was 88 years old. Schuller, also the founder of Crystal Cathedral megachurch, had been diagnosed with esophageal cancer in August 2013, a release from \"Hour of Power\" said. \"My father-in-law passed away peacefully early this morning. He was a great Dad and a great man of God,\" said Schuller's daughter-in-law, Donna Schuller, in a Twitter message. Schuller's life followed an almost Shakespearean arc. He was born in a Iowa farmhouse without running water and longed to preach from his earliest days. In his autobiography, \"Prayer: My Soul's Adventure with God,\" he described standing alone by a river and picturing himself delivering sermons to a rapt congregation. After attending a Hope College and Western Theological Seminary in Michigan, he met his wife of more than 60 years, Arvella, while preaching at her church (she was the organist). With their young family in tow, the Schullers caravanned west to California, where he rented a drive-in theater and preached from the roof of the snack bar. It was beneath the dignity of Christian ministry, some local pastors huffed. The \"passion pits\" where teenagers necked was no place for the gospel. Schuller was undeterred, and he quickly outgrew the drive-in. He called the explosive growth of his tiny congregation a \"miracle,\" though his many mainstream critics had other names for it. His confident, breezy version of Christianity -- too breezy, by some estimations -- drew hordes of seekers and lapsed Christians who were put off by the hellfire fulminations of many post-War American preachers. Schuller sold a softer, gentler message, which borrowed heavily, he acknowledged, from the father of the feel-good gospel, Norman Vincent Peale. He preached not to convert or condemn people, but to encourage them, a sentiment he called \"possibility thinking.\" People loved it. \"Evangelicalism at its best wants to be innovative and reach people,\" said Timothy Larsen, a professor of Christian thought at Wheaton College in Illinois. \"And Schuller was a master at that.\" \"What he got right is that the gospel is good news,\" Larsen continued. \"And he preached an uplifting message about personal transformation and uplift and hope.\" Some of Schuller's favored phrases, though, struck others as cornpone Christianity. \"Turn your hurt into a halo?\" said Randall Balmer, a professor of American religious history at Dartmouth College, citing one such phrase. \"That's pretty weak tea.\" Still, Balmer gives Schuller some credit. \"It may be bad theology, but it's brilliant marketing.\" In 1970, Schuller began broadcasting \"Hour of Power,\" believed to be one of the first, if not the very first, Sunday service to be shown regularly on television. With his genial smile, priestly robes and gray hair, he looked and talked like a guy who wanted nothing more than to see his flock succeed. The show, which ran for decades, reached millions, making Schuller a televangelist before the term became tarnished by the sins of his many successors. Schuller's crowning achievement, at least architecturally, still stands in Orange County, California, though it is now owned by the Roman Catholic Church. The Crystal Cathedral, a great gleaming edifice with 10,000 glass panels, gave worshipers a look at the clouds that house the heavens, while Schuller preached in the pulpit below. The message was clear to many: The road to the former ran through the latter. During the 1980s and 1990s, Schuller's star continued to rise, with presidents stopping by the Crystal Cathedral -- often during campaigns, it should be said -- and future megachurch pastors like Rick Warren and Bill Hybels seeking his advice. As Schuller aged, though, his family was beset by a succession scandal straight from the pages of \"King Lear.\" He tried to install his only son, Bobby Jr., as pastor of Crystal Cathedral. But the preaching styles of father and son were too different for the congregation -- measured at times at 10,000 strong -- to countenance. Bobby Schuller Jr. left \"Hour of Power\" and the pulpit at Crystal Cathedral after a short time. As the family searched for a new successor and tussled over finances, viewers and donations to the church and its television show dropped precipitously. Crystal Cathedral Ministries filed for bankruptcy in 2010, citing debts of more than $43 million, according to The Associated Press. Schuller's empire, which once soared as high as his glassy cathedral, had fallen to dust. Eventually, Schuller's grandson, also named Bobby, took over \"Hour of Power,\" though at a different church. In a statement on Thursday, the younger Schuller recalled standing atop Crystal Cathedral's 12-story Tower of Hope with his grandfather as they surveyed the surrounding landscape. \"You could see the whole world from there,\" he said. People we've lost in 2015 . CNN's Stella Chan reported from Los Angeles.",
94
+ "reference_summary": "The Rev. Robert Schuller, 88, had been diagnosed with esophageal cancer in 2013 .\nHis TV show, \"Hour of Power,\" was enormously popular in the 1970s and 1980s .",
95
+ "id": "88e7dac40f3e6c159e4edc0747d0cc0f50886fbb"
96
+ },
97
+ {
98
+ "article": "(CNN)Michele Bachmann is comparing President Obama to the co-pilot of the doomed Germanwings flight. \"With his Iran deal, Barack Obama is for the 300 million souls of the United States what Andreas Lubitz was for the 150 souls on the German Wings flight - a deranged pilot flying his entire nation into the rocks,\" the Minnesota Republican and former representative wrote in a Facebook comment posted March 31. \"After the fact, among the smoldering remains of American cities, the shocked survivors will ask, why did he do it?\" Andreas Lubitz, the co-pilot of Germanwings Flight 9525, is accused by authorities of deliberately crashing the plane in the French Alps. He died in the crash along with 149 other crew and passengers. The motive of the March 24 crash is under investigation, though investigators are looking in to whether Lubitz feared a medical condition would cause him to lose his pilot's license. Many comments posted on her Facebook page blasted the former representative. Melissa Coca wrote, \"Comparing this tragedy to anything is moronic and despicable.\" Michael J Pristash wrote, \"Your allusion is so inappropriate and divisive, not to mention disrespectful on so many levels. Shame on you.\" Some also accused her of taking desperate measures to stay in the public eye. Lynda Anderson wrote, \"Posting outrageous things in a pathetic attempt to stay relevant?\" Negotiations are coming down to the wire between Iran, the United States and other nations on restricting Tehran's nuclear program to prevent the ability to develop an atomic bomb. One deadline passed Tuesday, but there is a June 30 deadline for a comprehensive deal -- with all technical and diplomatic impasses fully worked out. Bachmann is no stranger to voicing her opinion on the President's dealing with Iran, personally telling him to \"bomb Iran\" during the 2014 White House Christmas Party. \"I turned to the president and I said, something to the effect of, 'Mr. President, you need to bomb the Iranian nuclear facilities, because if you don't, Iran will have a nuclear weapon on your watch and the course of world history will change,'\" she told the Washington Free Beacon. The congresswoman, who sought the GOP presidential nomination in 2012, said Obama had a \"condescending smile on his face and laughed at me.\" She said he told her: \"Well Michele, it's just not that easy.\"",
99
+ "reference_summary": "Former GOP representative compares President Obama to Andreas Lubitz .\nBachmann said with possible Iran deal, Obama will fly \"entire nation into the rocks\"\nReaction on social media? She was blasted by Facebook commenters .",
100
+ "id": "ec4b5fef725b4cc81545689603ff84c8e19bd6dc"
101
+ }
102
+ ]
data/samples/test_50.json ADDED
File without changes
models/textrank.py CHANGED
@@ -96,8 +96,15 @@ class TextRankSummarizer(BaseSummarizer):
96
  Returns:
97
  Tuple of (original_sentences, cleaned_sentences)
98
  """
99
- # Split into sentences
100
- sentences = sent_tokenize(text)
 
 
 
 
 
 
 
101
 
102
  # Filter out very short sentences
103
  filtered_sentences = [
@@ -111,8 +118,16 @@ class TextRankSummarizer(BaseSummarizer):
111
  # Clean sentences for similarity calculation
112
  cleaned_sentences = []
113
  for sent in filtered_sentences:
114
- # Tokenize and lowercase
115
- words = word_tokenize(sent.lower())
 
 
 
 
 
 
 
 
116
  # Remove stopwords and non-alphanumeric tokens
117
  words = [w for w in words if w.isalnum() and w not in self.stop_words]
118
  cleaned_sentences.append(' '.join(words))
 
96
  Returns:
97
  Tuple of (original_sentences, cleaned_sentences)
98
  """
99
+ # Split into sentences with error handling for NLTK data
100
+ try:
101
+ sentences = sent_tokenize(text)
102
+ except LookupError:
103
+ logger.warning("NLTK punkt tokenizer not found. Downloading...")
104
+ import nltk
105
+ nltk.download('punkt')
106
+ nltk.download('punkt_tab') # Download both punkt versions
107
+ sentences = sent_tokenize(text)
108
 
109
  # Filter out very short sentences
110
  filtered_sentences = [
 
118
  # Clean sentences for similarity calculation
119
  cleaned_sentences = []
120
  for sent in filtered_sentences:
121
+ # Tokenize and lowercase with error handling
122
+ try:
123
+ words = word_tokenize(sent.lower())
124
+ except LookupError:
125
+ logger.warning("NLTK punkt tokenizer not found for word tokenization. Downloading...")
126
+ import nltk
127
+ nltk.download('punkt')
128
+ nltk.download('punkt_tab')
129
+ words = word_tokenize(sent.lower())
130
+
131
  # Remove stopwords and non-alphanumeric tokens
132
  words = [w for w in words if w.isalnum() and w not in self.stop_words]
133
  cleaned_sentences.append(' '.join(words))
notebooks/01_data_exploration.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/02_model_testing.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/03_evaluation_analysis.ipynb ADDED
@@ -0,0 +1,478 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "id": "0c688166",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "⚠ rouge library not found. Installing rouge-score...\n",
14
+ "✓ Successfully installed rouge-score\n",
15
+ "✗ Installation succeeded but import still fails.\n",
16
+ " Please restart the kernel and run this cell again.\n"
17
+ ]
18
+ }
19
+ ],
20
+ "source": [
21
+ "# FIX: Install and verify rouge-score package\n",
22
+ "# Run this cell FIRST if you get \"ModuleNotFoundError: No module named 'rouge'\"\n",
23
+ "\n",
24
+ "import sys\n",
25
+ "import subprocess\n",
26
+ "\n",
27
+ "def install_package(package_name):\n",
28
+ " \"\"\"Install package using pip\"\"\"\n",
29
+ " try:\n",
30
+ " subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", package_name, \"--quiet\"])\n",
31
+ " return True\n",
32
+ " except subprocess.CalledProcessError:\n",
33
+ " return False\n",
34
+ "\n",
35
+ "# Check if rouge is available\n",
36
+ "try:\n",
37
+ " from rouge import Rouge\n",
38
+ " print(\"✓ rouge library is already installed\")\n",
39
+ "except ImportError:\n",
40
+ " print(\"⚠ rouge library not found. Installing rouge-score...\")\n",
41
+ " if install_package(\"rouge-score\"):\n",
42
+ " print(\"✓ Successfully installed rouge-score\")\n",
43
+ " # Try importing again\n",
44
+ " try:\n",
45
+ " from rouge import Rouge\n",
46
+ " print(\"✓ rouge library now available\")\n",
47
+ " except ImportError:\n",
48
+ " print(\"✗ Installation succeeded but import still fails.\")\n",
49
+ " print(\" Please restart the kernel and run this cell again.\")\n",
50
+ " else:\n",
51
+ " print(\"✗ Failed to install rouge-score\")\n",
52
+ " print(\" Please run manually: pip install rouge-score\")\n",
53
+ " print(\" Then restart the kernel.\")\n"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 1,
59
+ "id": "1aa43993",
60
+ "metadata": {},
61
+ "outputs": [
62
+ {
63
+ "name": "stdout",
64
+ "output_type": "stream",
65
+ "text": [
66
+ "✗ Import error: No module named 'rouge'\n",
67
+ " Make sure you've run the previous cell to install dependencies\n"
68
+ ]
69
+ },
70
+ {
71
+ "ename": "ModuleNotFoundError",
72
+ "evalue": "No module named 'rouge'",
73
+ "output_type": "error",
74
+ "traceback": [
75
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
76
+ "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)",
77
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 10\u001b[39m\n\u001b[32m 8\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmodels\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbart\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m BARTSummarizer\n\u001b[32m 9\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmodels\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpegasus\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m PEGASUSSummarizer\n\u001b[32m---> \u001b[39m\u001b[32m10\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mevaluator\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m SummarizerEvaluator\n\u001b[32m 11\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdata_loader\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m DataLoader\n\u001b[32m 12\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33m✓ All imports successful\u001b[39m\u001b[33m\"\u001b[39m)\n",
78
+ "\u001b[36mFile \u001b[39m\u001b[32m~/Downloads/smart-summarizer/notebooks/../utils/evaluator.py:6\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 2\u001b[39m \u001b[33;03mComprehensive Evaluation System for Summarization Models\u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[33;03mImplements ROUGE metrics, comparison analysis, and statistical testing\u001b[39;00m\n\u001b[32m 4\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mrouge\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Rouge\n\u001b[32m 7\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnp\u001b[39;00m\n\u001b[32m 8\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtyping\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Dict, List, Tuple, Optional\n",
79
+ "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'rouge'"
80
+ ]
81
+ }
82
+ ],
83
+ "source": [
84
+ "# Add project root to path\n",
85
+ "import sys\n",
86
+ "sys.path.append('..')\n",
87
+ "\n",
88
+ "# Import models and utilities\n",
89
+ "try:\n",
90
+ " from models.textrank import TextRankSummarizer\n",
91
+ " from models.bart import BARTSummarizer\n",
92
+ " from models.pegasus import PEGASUSSummarizer\n",
93
+ " from utils.evaluator import SummarizerEvaluator\n",
94
+ " from utils.data_loader import DataLoader\n",
95
+ " print(\"✓ All imports successful\")\n",
96
+ "except ImportError as e:\n",
97
+ " print(f\"✗ Import error: {e}\")\n",
98
+ " print(\" Make sure you've run the previous cell to install dependencies\")\n",
99
+ " raise\n",
100
+ "\n",
101
+ "# Import standard libraries\n",
102
+ "import pandas as pd\n",
103
+ "import numpy as np\n",
104
+ "import matplotlib.pyplot as plt\n",
105
+ "import seaborn as sns\n",
106
+ "from scipy import stats\n",
107
+ "import json\n",
108
+ "\n",
109
+ "plt.style.use('seaborn-v0_8')"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "code",
114
+ "execution_count": null,
115
+ "id": "e28695c0",
116
+ "metadata": {},
117
+ "outputs": [],
118
+ "source": [
119
+ "print(\"Loading test dataset...\")\n",
120
+ "loader = DataLoader()\n",
121
+ "\n",
122
+ "# Load your saved samples (or load fresh)\n",
123
+ "try:\n",
124
+ " test_data = loader.load_samples('../data/samples/test_50.json')\n",
125
+ " print(f\"✓ Loaded {len(test_data)} test samples\")\n",
126
+ "except:\n",
127
+ " print(\"Downloading test data...\")\n",
128
+ " test_data = loader.load_cnn_dailymail(split='test', num_samples=50)\n",
129
+ " loader.save_samples(test_data, '../data/samples/test_50.json')\n",
130
+ " print(f\"✓ Downloaded and saved {len(test_data)} samples\")\n",
131
+ "\n",
132
+ "# Extract texts and references\n",
133
+ "texts = [item['article'] for item in test_data]\n",
134
+ "references = [item['reference_summary'] for item in test_data]\n",
135
+ "\n",
136
+ "print(f\"\\nDataset Statistics:\")\n",
137
+ "print(f\" - Number of samples: {len(texts)}\")\n",
138
+ "print(f\" - Avg article length: {np.mean([len(t.split()) for t in texts]):.0f} words\")\n",
139
+ "print(f\" - Avg reference length: {np.mean([len(r.split()) for r in references]):.0f}words\")"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": null,
145
+ "id": "3b7dc004",
146
+ "metadata": {},
147
+ "outputs": [],
148
+ "source": [
149
+ "print(\"\\nInitializing models...\")\n",
150
+ "\n",
151
+ "models = {\n",
152
+ " 'TextRank': TextRankSummarizer(),\n",
153
+ " 'BART': BARTSummarizer(device='cpu'),\n",
154
+ " 'PEGASUS': PEGASUSSummarizer(device='cpu')\n",
155
+ "}\n",
156
+ "\n",
157
+ "print(\"✓ All models ready\")\n",
158
+ "\n",
159
+ "# Cell 4: Generate Summaries (Takes ~10-20 minutes for 50 samples)\n",
160
+ "print(\"\\nGenerating summaries for all models...\")\n",
161
+ "print(\"This will take 10-20 minutes. Grab a coffee! ☕\")\n",
162
+ "\n",
163
+ "all_summaries = {}\n",
164
+ "all_times = {}\n",
165
+ "\n",
166
+ "for model_name, model in models.items():\n",
167
+ " print(f\"\\n{model_name}:\")\n",
168
+ " summaries = []\n",
169
+ " times = []\n",
170
+ " \n",
171
+ " for i, text in enumerate(texts[:10], 1): # Start with 10 samples\n",
172
+ " print(f\" Processing {i}/10...\", end='\\r')\n",
173
+ " \n",
174
+ " if model_name == 'TextRank':\n",
175
+ " result = model.summarize_with_metrics(text)\n",
176
+ " else:\n",
177
+ " result = model.summarize_with_metrics(text, max_length=100, min_length=30)\n",
178
+ " \n",
179
+ " summaries.append(result['summary'])\n",
180
+ " times.append(result['metadata']['processing_time'])\n",
181
+ " \n",
182
+ " all_summaries[model_name] = summaries\n",
183
+ " all_times[model_name] = times\n",
184
+ " print(f\" ✓ Completed {model_name} \")\n",
185
+ "\n",
186
+ "print(\"\\n✓ All summaries generated!\")"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": null,
192
+ "id": "bf78630d",
193
+ "metadata": {},
194
+ "outputs": [],
195
+ "source": [
196
+ "print(\"\\nEvaluating models...\")\n",
197
+ "\n",
198
+ "evaluator = SummarizerEvaluator()\n",
199
+ "evaluation_results = {}\n",
200
+ "\n",
201
+ "for model_name in models.keys():\n",
202
+ " print(f\"\\nEvaluating {model_name}...\")\n",
203
+ " results = evaluator.evaluate_batch(\n",
204
+ " all_summaries[model_name],\n",
205
+ " references[:len(all_summaries[model_name])],\n",
206
+ " model_name\n",
207
+ " )\n",
208
+ " results['avg_time'] = np.mean(all_times[model_name])\n",
209
+ " results['std_time'] = np.std(all_times[model_name])\n",
210
+ " evaluation_results[model_name] = results\n",
211
+ "\n",
212
+ "print(\"✓ Evaluation complete\")"
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "execution_count": null,
218
+ "id": "c7ebcf59",
219
+ "metadata": {},
220
+ "outputs": [],
221
+ "source": [
222
+ "print(\"\\n\" + \"=\"*70)\n",
223
+ "print(\"EVALUATION RESULTS\")\n",
224
+ "print(\"=\"*70)\n",
225
+ "\n",
226
+ "results_table = []\n",
227
+ "\n",
228
+ "for model_name, results in evaluation_results.items():\n",
229
+ " results_table.append({\n",
230
+ " 'Model': model_name,\n",
231
+ " 'Type': 'Extractive' if model_name == 'TextRank' else 'Abstractive',\n",
232
+ " 'ROUGE-1': f\"{results['rouge_1_f1_mean']:.4f} ± {results['rouge_1_f1_std']:.4f}\",\n",
233
+ " 'ROUGE-2': f\"{results['rouge_2_f1_mean']:.4f} ± {results['rouge_2_f1_std']:.4f}\",\n",
234
+ " 'ROUGE-L': f\"{results['rouge_l_f1_mean']:.4f} ± {results['rouge_l_f1_std']:.4f}\",\n",
235
+ " 'Avg Time (s)': f\"{results['avg_time']:.3f} ± {results['std_time']:.3f}\",\n",
236
+ " 'Samples': results['num_samples']\n",
237
+ " })\n",
238
+ "\n",
239
+ "results_df = pd.DataFrame(results_table)\n",
240
+ "print(results_df.to_string(index=False))\n",
241
+ "\n",
242
+ "# Save to CSV for report\n",
243
+ "results_df.to_csv('../results/evaluation_results.csv', index=False)\n",
244
+ "print(\"\\n✓ Results saved to results/evaluation_results.csv\")\n"
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "code",
249
+ "execution_count": null,
250
+ "id": "a65fac0c",
251
+ "metadata": {},
252
+ "outputs": [],
253
+ "source": [
254
+ "print(\"\\n\" + \"=\"*70)\n",
255
+ "print(\"STATISTICAL SIGNIFICANCE TESTS\")\n",
256
+ "print(\"=\"*70)\n",
257
+ "\n",
258
+ "# Compare BART vs PEGASUS (both abstractive)\n",
259
+ "bart_rouge1 = [s['rouge_1_f1'] for s in evaluation_results['BART']['individual_scores']]\n",
260
+ "peg_rouge1 = [s['rouge_1_f1'] for s in evaluation_results['PEGASUS']['individual_scores']]\n",
261
+ "\n",
262
+ "sig_test = evaluator.statistical_significance_test(\n",
263
+ " bart_rouge1,\n",
264
+ " peg_rouge1,\n",
265
+ " test_name='paired t-test'\n",
266
+ ")\n",
267
+ "\n",
268
+ "print(f\"\\nBART vs PEGASUS (ROUGE-1):\")\n",
269
+ "print(f\" Test: {sig_test['test_name']}\")\n",
270
+ "print(f\" p-value: {sig_test['p_value']:.6f}\")\n",
271
+ "print(f\" {sig_test['interpretation']}\")"
272
+ ]
273
+ },
274
+ {
275
+ "cell_type": "code",
276
+ "execution_count": null,
277
+ "id": "ae272f7a",
278
+ "metadata": {},
279
+ "outputs": [],
280
+ "source": [
281
+ "fig = plt.figure(figsize=(16, 12))\n",
282
+ "\n",
283
+ "# Create grid\n",
284
+ "gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)\n",
285
+ "\n",
286
+ "# 1. ROUGE Scores Comparison\n",
287
+ "ax1 = fig.add_subplot(gs[0, :2])\n",
288
+ "rouge_data = pd.DataFrame({\n",
289
+ " 'Model': list(evaluation_results.keys()) * 3,\n",
290
+ " 'Metric': ['ROUGE-1']*3 + ['ROUGE-2']*3 + ['ROUGE-L']*3,\n",
291
+ " 'Score': [\n",
292
+ " evaluation_results['TextRank']['rouge_1_f1_mean'],\n",
293
+ " evaluation_results['BART']['rouge_1_f1_mean'],\n",
294
+ " evaluation_results['PEGASUS']['rouge_1_f1_mean'],\n",
295
+ " evaluation_results['TextRank']['rouge_2_f1_mean'],\n",
296
+ " evaluation_results['BART']['rouge_2_f1_mean'],\n",
297
+ " evaluation_results['PEGASUS']['rouge_2_f1_mean'],\n",
298
+ " evaluation_results['TextRank']['rouge_l_f1_mean'],\n",
299
+ " evaluation_results['BART']['rouge_l_f1_mean'],\n",
300
+ " evaluation_results['PEGASUS']['rouge_l_f1_mean']\n",
301
+ " ]\n",
302
+ "})\n",
303
+ "\n",
304
+ "sns.barplot(data=rouge_data, x='Metric', y='Score', hue='Model', ax=ax1)\n",
305
+ "ax1.set_title('ROUGE Score Comparison', fontsize=14, fontweight='bold')\n",
306
+ "ax1.set_ylabel('F1 Score')\n",
307
+ "ax1.set_ylim([0, 0.5])\n",
308
+ "ax1.legend(title='Model')\n",
309
+ "ax1.grid(axis='y', alpha=0.3)\n",
310
+ "\n",
311
+ "# 2. Processing Time\n",
312
+ "ax2 = fig.add_subplot(gs[0, 2])\n",
313
+ "times = [evaluation_results[m]['avg_time'] for m in models.keys()]\n",
314
+ "colors = ['#FF6B6B', '#4ECDC4', '#45B7D1']\n",
315
+ "ax2.bar(models.keys(), times, color=colors)\n",
316
+ "ax2.set_title('Processing Time', fontsize=12, fontweight='bold')\n",
317
+ "ax2.set_ylabel('Time (seconds)')\n",
318
+ "ax2.grid(axis='y', alpha=0.3)\n",
319
+ "\n",
320
+ "# 3. ROUGE-1 Distribution\n",
321
+ "ax3 = fig.add_subplot(gs[1, 0])\n",
322
+ "for model_name, color in zip(models.keys(), colors):\n",
323
+ " rouge1_scores = [s['rouge_1_f1'] for s in evaluation_results[model_name]['individual_scores']]\n",
324
+ " ax3.hist(rouge1_scores, alpha=0.6, label=model_name, bins=10, color=color)\n",
325
+ "ax3.set_title('ROUGE-1 Score Distribution', fontsize=12, fontweight='bold')\n",
326
+ "ax3.set_xlabel('ROUGE-1 F1 Score')\n",
327
+ "ax3.set_ylabel('Frequency')\n",
328
+ "ax3.legend()\n",
329
+ "ax3.grid(axis='y', alpha=0.3)\n",
330
+ "\n",
331
+ "# 4. ROUGE-2 Distribution\n",
332
+ "ax4 = fig.add_subplot(gs[1, 1])\n",
333
+ "for model_name, color in zip(models.keys(), colors):\n",
334
+ " rouge2_scores = [s['rouge_2_f1'] for s in evaluation_results[model_name]['individual_scores']]\n",
335
+ " ax4.hist(rouge2_scores, alpha=0.6, label=model_name, bins=10, color=color)\n",
336
+ "ax4.set_title('ROUGE-2 Score Distribution', fontsize=12, fontweight='bold')\n",
337
+ "ax4.set_xlabel('ROUGE-2 F1 Score')\n",
338
+ "ax4.set_ylabel('Frequency')\n",
339
+ "ax4.legend()\n",
340
+ "ax4.grid(axis='y', alpha=0.3)\n",
341
+ "\n",
342
+ "# 5. ROUGE-L Distribution\n",
343
+ "ax5 = fig.add_subplot(gs[1, 2])\n",
344
+ "for model_name, color in zip(models.keys(), colors):\n",
345
+ " rougel_scores = [s['rouge_l_f1'] for s in evaluation_results[model_name]['individual_scores']]\n",
346
+ " ax5.hist(rougel_scores, alpha=0.6, label=model_name, bins=10, color=color)\n",
347
+ "ax5.set_title('ROUGE-L Score Distribution', fontsize=12, fontweight='bold')\n",
348
+ "ax5.set_xlabel('ROUGE-L F1 Score')\n",
349
+ "ax5.set_ylabel('Frequency')\n",
350
+ "ax5.legend()\n",
351
+ "ax5.grid(axis='y', alpha=0.3)\n",
352
+ "\n",
353
+ "# 6. Box Plot Comparison\n",
354
+ "ax6 = fig.add_subplot(gs[2, :])\n",
355
+ "box_data = []\n",
356
+ "for model_name in models.keys():\n",
357
+ " rouge1_scores = [s['rouge_1_f1'] for s in evaluation_results[model_name]['individual_scores']]\n",
358
+ " for score in rouge1_scores:\n",
359
+ " box_data.append({'Model': model_name, 'ROUGE-1': score})\n",
360
+ "\n",
361
+ "box_df = pd.DataFrame(box_data)\n",
362
+ "sns.boxplot(data=box_df, x='Model', y='ROUGE-1', ax=ax6, palette=colors)\n",
363
+ "ax6.set_title('ROUGE-1 Score Distribution (Box Plot)', fontsize=14, fontweight='bold')\n",
364
+ "ax6.grid(axis='y', alpha=0.3)\n",
365
+ "\n",
366
+ "plt.savefig('../results/comprehensive_evaluation.png', dpi=300, bbox_inches='tight')\n",
367
+ "print(\"\\n✓ Comprehensive visualization saved!\")\n",
368
+ "plt.show()"
369
+ ]
370
+ },
371
+ {
372
+ "cell_type": "code",
373
+ "execution_count": null,
374
+ "id": "3e24f94c",
375
+ "metadata": {},
376
+ "outputs": [],
377
+ "source": [
378
+ "print(\"\\n\" + \"=\"*70)\n",
379
+ "print(\"EXPORTING RESULTS FOR REPORT\")\n",
380
+ "print(\"=\"*70)\n",
381
+ "\n",
382
+ "# Create comprehensive export\n",
383
+ "export_data = {\n",
384
+ " 'evaluation_date': pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),\n",
385
+ " 'dataset': {\n",
386
+ " 'name': 'CNN/DailyMail',\n",
387
+ " 'samples_evaluated': len(all_summaries['TextRank']),\n",
388
+ " 'split': 'test'\n",
389
+ " },\n",
390
+ " 'models': {\n",
391
+ " model_name: {\n",
392
+ " 'type': results_table[i]['Type'],\n",
393
+ " 'rouge_1': {\n",
394
+ " 'mean': evaluation_results[model_name]['rouge_1_f1_mean'],\n",
395
+ " 'std': evaluation_results[model_name]['rouge_1_f1_std']\n",
396
+ " },\n",
397
+ " 'rouge_2': {\n",
398
+ " 'mean': evaluation_results[model_name]['rouge_2_f1_mean'],\n",
399
+ " 'std': evaluation_results[model_name]['rouge_2_f1_std']\n",
400
+ " },\n",
401
+ " 'rouge_l': {\n",
402
+ " 'mean': evaluation_results[model_name]['rouge_l_f1_mean'],\n",
403
+ " 'std': evaluation_results[model_name]['rouge_l_f1_std']\n",
404
+ " },\n",
405
+ " 'processing_time': {\n",
406
+ " 'mean': evaluation_results[model_name]['avg_time'],\n",
407
+ " 'std': evaluation_results[model_name]['std_time']\n",
408
+ " }\n",
409
+ " }\n",
410
+ " for i, model_name in enumerate(models.keys())\n",
411
+ " },\n",
412
+ " 'statistical_tests': {\n",
413
+ " 'bart_vs_pegasus': sig_test\n",
414
+ " }\n",
415
+ "}\n",
416
+ "\n",
417
+ "with open('../results/final_evaluation.json', 'w') as f:\n",
418
+ " json.dump(export_data, f, indent=2)\n",
419
+ "\n",
420
+ "print(\"✓ Exported to results/final_evaluation.json\")\n",
421
+ "print(\"\\nFiles created for your report:\")\n",
422
+ "print(\" 1. results/evaluation_results.csv - Table for report\")\n",
423
+ "print(\" 2. results/comprehensive_evaluation.png - Main figure\")\n",
424
+ "print(\" 3. results/final_evaluation.json - All data\")\n",
425
+ "\n",
426
+ "# Cell 10: Summary for Report\n",
427
+ "print(\"\\n\" + \"=\"*70)\n",
428
+ "print(\"KEY FINDINGS FOR YOUR REPORT\")\n",
429
+ "print(\"=\"*70)\n",
430
+ "\n",
431
+ "best_model = max(evaluation_results.keys(), \n",
432
+ " key=lambda x: evaluation_results[x]['rouge_1_f1_mean'])\n",
433
+ "fastest_model = min(evaluation_results.keys(),\n",
434
+ " key=lambda x: evaluation_results[x]['avg_time'])\n",
435
+ "\n",
436
+ "print(f\"\\n1. Best Overall Performance: {best_model}\")\n",
437
+ "print(f\" - ROUGE-1: {evaluation_results[best_model]['rouge_1_f1_mean']:.4f}\")\n",
438
+ "print(f\" - ROUGE-2: {evaluation_results[best_model]['rouge_2_f1_mean']:.4f}\")\n",
439
+ "print(f\" - ROUGE-L: {evaluation_results[best_model]['rouge_l_f1_mean']:.4f}\")\n",
440
+ "\n",
441
+ "print(f\"\\n2. Fastest Processing: {fastest_model}\")\n",
442
+ "print(f\" - Avg time: {evaluation_results[fastest_model]['avg_time']:.3f}s\")\n",
443
+ "print(f\" - {evaluation_results[max(evaluation_results.keys(), key=lambda x: evaluation_results[x]['avg_time'])]['avg_time'] / evaluation_results[fastest_model]['avg_time']:.1f}x faster than slowest\")\n",
444
+ "\n",
445
+ "print(f\"\\n3. Extractive vs Abstractive:\")\n",
446
+ "print(f\" - TextRank (Extractive): ROUGE-1 = {evaluation_results['TextRank']['rouge_1_f1_mean']:.4f}\")\n",
447
+ "print(f\" - BART (Abstractive): ROUGE-1 = {evaluation_results['BART']['rouge_1_f1_mean']:.4f}\")\n",
448
+ "print(f\" - PEGASUS (Abstractive): ROUGE-1 = {evaluation_results['PEGASUS']['rouge_1_f1_mean']:.4f}\")\n",
449
+ "print(f\" - Abstractive models outperform extractive by {(evaluation_results[best_model]['rouge_1_f1_mean'] / evaluation_results['TextRank']['rouge_1_f1_mean'] - 1) * 100:.1f}%\")\n",
450
+ "\n",
451
+ "print(\"\\n\" + \"=\"*70)\n",
452
+ "print(\"✓ Evaluation complete! Use these results in your report.\")\n",
453
+ "print(\"=\"*70)"
454
+ ]
455
+ }
456
+ ],
457
+ "metadata": {
458
+ "kernelspec": {
459
+ "display_name": "Workshop2",
460
+ "language": "python",
461
+ "name": "python3"
462
+ },
463
+ "language_info": {
464
+ "codemirror_mode": {
465
+ "name": "ipython",
466
+ "version": 3
467
+ },
468
+ "file_extension": ".py",
469
+ "mimetype": "text/x-python",
470
+ "name": "python",
471
+ "nbconvert_exporter": "python",
472
+ "pygments_lexer": "ipython3",
473
+ "version": "3.13.9"
474
+ }
475
+ },
476
+ "nbformat": 4,
477
+ "nbformat_minor": 5
478
+ }
notebooks/03_evaluation_analysis_cnn_dailymail.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.9.18
setup.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Smart Summarizer - Setup Configuration
3
+ Professional text summarization application with multiple AI models
4
+ """
5
+
6
+ from setuptools import setup, find_packages
7
+
8
+ # Read requirements from requirements.txt
9
+ with open('requirements.txt', 'r') as f:
10
+ requirements = [line.strip() for line in f if line.strip() and not line.startswith('#')]
11
+
12
+ # Read README for long description
13
+ try:
14
+ with open('README.md', 'r', encoding='utf-8') as f:
15
+ long_description = f.read()
16
+ except FileNotFoundError:
17
+ long_description = "Smart Summarizer - AI-powered text summarization tool"
18
+
19
+ setup(
20
+ name="smart-summarizer",
21
+ version="1.0.0",
22
+ author="Abdul Razzaq Ansari",
23
+ author_email="rajakansari83@gmail.com",
24
+ description="AI-powered text summarization with multiple model comparison",
25
+ long_description=long_description,
26
+ long_description_content_type="text/markdown",
27
+ url="https://github.com/Rajak13/Smart-Summarizer",
28
+ packages=find_packages(),
29
+ classifiers=[
30
+ "Development Status :: 4 - Beta",
31
+ "Intended Audience :: Education",
32
+ "Intended Audience :: Science/Research",
33
+ "License :: OSI Approved :: MIT License",
34
+ "Operating System :: OS Independent",
35
+ "Programming Language :: Python :: 3",
36
+ "Programming Language :: Python :: 3.8",
37
+ "Programming Language :: Python :: 3.9",
38
+ "Programming Language :: Python :: 3.10",
39
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
40
+ "Topic :: Text Processing :: Linguistic",
41
+ ],
42
+ python_requires=">=3.8",
43
+ install_requires=requirements,
44
+ extras_require={
45
+ "dev": [
46
+ "pytest>=7.4.0",
47
+ "pytest-cov>=4.1.0",
48
+ "black>=23.0.0",
49
+ "flake8>=6.0.0",
50
+ "mypy>=1.0.0",
51
+ ],
52
+ "docs": [
53
+ "sphinx>=7.0.1",
54
+ "sphinx-rtd-theme>=1.3.0",
55
+ ],
56
+ },
57
+ entry_points={
58
+ "console_scripts": [
59
+ "smart-summarizer=app.main:main",
60
+ ],
61
+ },
62
+ include_package_data=True,
63
+ package_data={
64
+ "": ["*.md", "*.txt", "*.yaml", "*.yml"],
65
+ },
66
+ project_urls={
67
+ "Bug Reports": "https://github.com/Rajak13/Smart-Summarizer/issues",
68
+ "Source": "https://github.com/Rajak13/Smart-Summarizer",
69
+ "Documentation": "https://smart-summarizer.readthedocs.io/",
70
+ },
71
+ )