Spaces:
Sleeping
Sleeping
Upload 12 files
Browse files- README.md +14 -10
- app.py +104 -58
- categorizer.py +58 -0
- config.json +62 -0
- fetch_top_news.py +117 -0
- generation_config.json +6 -0
- label_encoder.pkl +3 -0
- logistic_regression_model.pkl +3 -0
- model.safetensors +3 -0
- news_data.json +0 -0
- summarizer.py +171 -0
README.md
CHANGED
|
@@ -1,10 +1,14 @@
|
|
| 1 |
-
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: streamlit
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: FlanT5 Base Fine Tuned For Summarization
|
| 3 |
+
emoji: 🏢
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: 1.40.1
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
short_description: FlanT5_base_fine_tuned_on_CNNDailymail_for_Summarization
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
@@ -1,60 +1,106 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
.
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
st.header("Categories")
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import fetch_top_news as news_api
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
# Function to load news into session state
|
| 6 |
+
def load_news():
|
| 7 |
+
if "news_data" not in st.session_state:
|
| 8 |
+
# Fetch news if it's not available in session state
|
| 9 |
+
st.session_state.news_data = news_api.get_top_news(language="en")
|
| 10 |
+
st.session_state.last_fetch_time = time.time() # Save the fetch time
|
| 11 |
+
|
| 12 |
+
# Function to check if the news needs to be refreshed
|
| 13 |
+
def should_refresh_data():
|
| 14 |
+
if "last_fetch_time" not in st.session_state:
|
| 15 |
+
return True # Force refresh if no fetch time is found
|
| 16 |
+
time_since_last_fetch = time.time() - st.session_state.last_fetch_time
|
| 17 |
+
return time_since_last_fetch > 10800 # Refresh every 10 minutes
|
| 18 |
+
|
| 19 |
+
# Function to filter news by category
|
| 20 |
+
def filter_news_by_category(news_data, category):
|
| 21 |
+
if category == "All":
|
| 22 |
+
return news_data
|
| 23 |
+
return [news for news in news_data if news['category'] == category]
|
| 24 |
+
|
| 25 |
+
def main():
|
| 26 |
+
st.markdown("<h1 style='font-size: 50px;'>QuantumQuest</h1>", unsafe_allow_html=True)
|
| 27 |
+
|
| 28 |
+
st.markdown("""
|
| 29 |
+
<style>
|
| 30 |
+
.category-button {
|
| 31 |
+
background-color: #4CAF50; /* Green */
|
| 32 |
+
border: none;
|
| 33 |
+
width: 150px; /* Set fixed width */
|
| 34 |
+
height: 50px; /* Set fixed height */
|
| 35 |
+
color: white;
|
| 36 |
+
padding: 10px 24px;
|
| 37 |
+
text-align: center;
|
| 38 |
+
text-decoration: none;
|
| 39 |
+
display: inline-block;
|
| 40 |
+
font-size: 16px;
|
| 41 |
+
margin: 4px 2px;
|
| 42 |
+
cursor: pointer;
|
| 43 |
+
border-radius: 8px;
|
| 44 |
+
transition-duration: 0.4s;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
.category-button:hover {
|
| 48 |
+
background-color: #3e8e41;
|
| 49 |
+
color: white;
|
| 50 |
+
}
|
| 51 |
+
</style>
|
| 52 |
+
""", unsafe_allow_html=True)
|
| 53 |
+
|
| 54 |
+
# List of categories
|
| 55 |
+
categories = ["All", "Sports", "Business", "Science/Tech", "Politics", "Entertainment", "Others"]
|
| 56 |
+
|
| 57 |
+
# Load or refresh news data
|
| 58 |
+
if should_refresh_data():
|
| 59 |
+
load_news()
|
| 60 |
+
|
| 61 |
+
tab1, tab2, tab3 = st.tabs(["Categories", "Popular News", "Test Area"])
|
| 62 |
+
|
| 63 |
+
# Categories Tab
|
| 64 |
+
with tab1:
|
| 65 |
st.header("Categories")
|
| 66 |
+
|
| 67 |
+
# Create category selection using st.pills
|
| 68 |
+
selection = st.pills("", categories)
|
| 69 |
+
st.write(f"You have selected: {selection}")
|
| 70 |
+
|
| 71 |
+
# Fetch news data
|
| 72 |
+
if "news_data" in st.session_state:
|
| 73 |
+
news_data = st.session_state.news_data
|
| 74 |
+
filtered_news = filter_news_by_category(news_data, selection)
|
| 75 |
+
|
| 76 |
+
# Display each news in its own bordered container
|
| 77 |
+
for news in filtered_news:
|
| 78 |
+
with st.container(border = True):
|
| 79 |
+
st.markdown(f"**{news['title']}**")
|
| 80 |
+
st.write(news['summary'])
|
| 81 |
+
st.markdown(f"[Read more]({news['url']})")
|
| 82 |
+
st.divider() # Adds a divider between news items
|
| 83 |
+
|
| 84 |
+
# Popular News Tab
|
| 85 |
+
with tab2:
|
| 86 |
+
if "news_data" in st.session_state:
|
| 87 |
+
news_data = st.session_state.news_data
|
| 88 |
+
st.write(f"Fetched {len(news_data)} news articles.")
|
| 89 |
+
|
| 90 |
+
# Display the news articles
|
| 91 |
+
for news in news_data:
|
| 92 |
+
st.subheader(f"{news['index']}. {news['title']}")
|
| 93 |
+
st.write(news['summary'])
|
| 94 |
+
st.write(f"[Read more]({news['url']})")
|
| 95 |
+
st.divider()
|
| 96 |
+
|
| 97 |
+
# Test Area Tab
|
| 98 |
+
with tab3:
|
| 99 |
+
container1 = st.container()
|
| 100 |
+
container1.write("This is container 1")
|
| 101 |
+
|
| 102 |
+
container2 = st.container()
|
| 103 |
+
container2.write("This is container 2")
|
| 104 |
+
|
| 105 |
+
if __name__ == "__main__":
|
| 106 |
+
main()
|
categorizer.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from nltk.tokenize import word_tokenize
|
| 3 |
+
import re
|
| 4 |
+
import pickle
|
| 5 |
+
import gensim.downloader as api
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class NewsCategorizer:
|
| 9 |
+
def __init__(self, gensim_model_name='word2vec-google-news-300', model_file='logistic_regression_model.pkl', encoder_file='label_encoder.pkl'):
|
| 10 |
+
# Load gensim model
|
| 11 |
+
self.gensim_model = api.load(gensim_model_name)
|
| 12 |
+
print("New categorizer was called")
|
| 13 |
+
|
| 14 |
+
# Load classifier model and label encoder
|
| 15 |
+
with open(model_file, 'rb') as f:
|
| 16 |
+
self.model = pickle.load(f)
|
| 17 |
+
|
| 18 |
+
with open(encoder_file, 'rb') as le_file:
|
| 19 |
+
self.label_encoder = pickle.load(le_file)
|
| 20 |
+
|
| 21 |
+
def clean_text(self, text):
|
| 22 |
+
"""Cleans the text by removing non-alphabetic characters and converting to lowercase."""
|
| 23 |
+
text = re.sub(r'[^a-zA-Z\s]', '', text, re.I)
|
| 24 |
+
text = text.lower()
|
| 25 |
+
return text
|
| 26 |
+
|
| 27 |
+
def get_word2vec_embeddings(self, tokens):
|
| 28 |
+
"""Converts tokens to their word2vec embeddings."""
|
| 29 |
+
embeddings = []
|
| 30 |
+
for token in tokens:
|
| 31 |
+
if token in self.gensim_model:
|
| 32 |
+
embeddings.append(self.gensim_model[token]) # Get word vector
|
| 33 |
+
if embeddings:
|
| 34 |
+
return np.mean(embeddings, axis=0)
|
| 35 |
+
else:
|
| 36 |
+
return np.zeros(self.gensim_model.vector_size)
|
| 37 |
+
|
| 38 |
+
def predict_category(self, text):
|
| 39 |
+
"""Predicts the category of the given text using the pre-trained model."""
|
| 40 |
+
cleaned_text = self.clean_text(text)
|
| 41 |
+
tokens = word_tokenize(cleaned_text)
|
| 42 |
+
embeddings = self.get_word2vec_embeddings(tokens)
|
| 43 |
+
embeddings = embeddings.reshape(1, -1)
|
| 44 |
+
predicted_label = self.model.predict(embeddings)[0]
|
| 45 |
+
predicted_category = self.label_encoder.inverse_transform([predicted_label])[0]
|
| 46 |
+
return predicted_category
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# # Example Usage
|
| 50 |
+
# # Initialize the NewsCategorizer class
|
| 51 |
+
# categorizer = NewsCategorizer()
|
| 52 |
+
|
| 53 |
+
# # Example text for prediction
|
| 54 |
+
# unknown_text = """A horrifying incident in Sultanpuri, Delhi, has led to the arrest of Neeraj Solanki and four of his family members for allegedly killing and burying his three-day-old twin daughters. The police revealed that the act was driven by Solanki's preference for a male child. Following the birth of the twins on May 30, the newborns were taken to a cremation ground and buried after being killed. The investigation began after the children’s mother reported the crime to the police."""
|
| 55 |
+
|
| 56 |
+
# # Predict the category for the unknown text
|
| 57 |
+
# predicted_category = categorizer.predict_category(unknown_text)
|
| 58 |
+
# print(f"The predicted category is: {predicted_category}")
|
config.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "google/flan-t5-base",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"T5ForConditionalGeneration"
|
| 5 |
+
],
|
| 6 |
+
"classifier_dropout": 0.0,
|
| 7 |
+
"d_ff": 2048,
|
| 8 |
+
"d_kv": 64,
|
| 9 |
+
"d_model": 768,
|
| 10 |
+
"decoder_start_token_id": 0,
|
| 11 |
+
"dense_act_fn": "gelu_new",
|
| 12 |
+
"dropout_rate": 0.1,
|
| 13 |
+
"eos_token_id": 1,
|
| 14 |
+
"feed_forward_proj": "gated-gelu",
|
| 15 |
+
"initializer_factor": 1.0,
|
| 16 |
+
"is_encoder_decoder": true,
|
| 17 |
+
"is_gated_act": true,
|
| 18 |
+
"layer_norm_epsilon": 1e-06,
|
| 19 |
+
"model_type": "t5",
|
| 20 |
+
"n_positions": 512,
|
| 21 |
+
"num_decoder_layers": 12,
|
| 22 |
+
"num_heads": 12,
|
| 23 |
+
"num_layers": 12,
|
| 24 |
+
"output_past": true,
|
| 25 |
+
"pad_token_id": 0,
|
| 26 |
+
"relative_attention_max_distance": 128,
|
| 27 |
+
"relative_attention_num_buckets": 32,
|
| 28 |
+
"task_specific_params": {
|
| 29 |
+
"summarization": {
|
| 30 |
+
"early_stopping": true,
|
| 31 |
+
"length_penalty": 2.0,
|
| 32 |
+
"max_length": 200,
|
| 33 |
+
"min_length": 30,
|
| 34 |
+
"no_repeat_ngram_size": 3,
|
| 35 |
+
"num_beams": 4,
|
| 36 |
+
"prefix": "summarize: "
|
| 37 |
+
},
|
| 38 |
+
"translation_en_to_de": {
|
| 39 |
+
"early_stopping": true,
|
| 40 |
+
"max_length": 300,
|
| 41 |
+
"num_beams": 4,
|
| 42 |
+
"prefix": "translate English to German: "
|
| 43 |
+
},
|
| 44 |
+
"translation_en_to_fr": {
|
| 45 |
+
"early_stopping": true,
|
| 46 |
+
"max_length": 300,
|
| 47 |
+
"num_beams": 4,
|
| 48 |
+
"prefix": "translate English to French: "
|
| 49 |
+
},
|
| 50 |
+
"translation_en_to_ro": {
|
| 51 |
+
"early_stopping": true,
|
| 52 |
+
"max_length": 300,
|
| 53 |
+
"num_beams": 4,
|
| 54 |
+
"prefix": "translate English to Romanian: "
|
| 55 |
+
}
|
| 56 |
+
},
|
| 57 |
+
"tie_word_embeddings": false,
|
| 58 |
+
"torch_dtype": "float32",
|
| 59 |
+
"transformers_version": "4.44.2",
|
| 60 |
+
"use_cache": true,
|
| 61 |
+
"vocab_size": 32128
|
| 62 |
+
}
|
fetch_top_news.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from dotenv import load_dotenv, find_dotenv
|
| 3 |
+
dotenv_path = find_dotenv()
|
| 4 |
+
load_dotenv(dotenv_path)
|
| 5 |
+
import os
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
import requests
|
| 8 |
+
from categorizer import NewsCategorizer
|
| 9 |
+
from summarizer import Summarizer
|
| 10 |
+
import json
|
| 11 |
+
current_day = datetime.now().strftime("%Y-%m-%d")
|
| 12 |
+
api_key = os.getenv("WORLD_NEWS_API")
|
| 13 |
+
|
| 14 |
+
# Instantiate the NewsCategorizer class
|
| 15 |
+
categorizer_instance = NewsCategorizer()
|
| 16 |
+
summarizer_instance = Summarizer(
|
| 17 |
+
model_path = r'C:\Users\TDH\VIIIIIV\MVIIXXIV\2024\Senior Project 1\huggingface_model_deployment\FlanT5_base_fine_tuned_for_Summarization',
|
| 18 |
+
tokenizer_path = r'C:\Users\TDH\VIIIIIV\MVIIXXIV\2024\Senior Project 1\huggingface_model_deployment\FlanT5_base_fine_tuned_for_Summarization\local_google_flan_t5_tokenizer'
|
| 19 |
+
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
def get_top_news(language="en", date=current_day,number=30):
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
print("get_top_news was called")
|
| 26 |
+
|
| 27 |
+
def save_news_to_file(news_list, filename="news_data.json"):
|
| 28 |
+
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
| 29 |
+
# Save data with a timestamp in the file
|
| 30 |
+
data_to_save = {
|
| 31 |
+
"timestamp": timestamp,
|
| 32 |
+
"news": news_list
|
| 33 |
+
}
|
| 34 |
+
with open(filename, 'w') as f:
|
| 35 |
+
json.dump(data_to_save, f)
|
| 36 |
+
print(f"News data saved to {filename} at {timestamp}")
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
base_url = "https://api.worldnewsapi.com/search-news"
|
| 40 |
+
news_list = [] # List to store the news articles
|
| 41 |
+
|
| 42 |
+
params = {
|
| 43 |
+
'api-key': api_key,
|
| 44 |
+
'language': language, # Language code, e.g., 'en' for English, 'th' for Thai
|
| 45 |
+
'published_date': date, # Date in YYYY-MM-DD format
|
| 46 |
+
'sort-by': 'relevance', # You can use 'relevance', 'popularity', or 'publishedAt'
|
| 47 |
+
'number': number # Number of articles you want to retrieve (top 5 in this example)
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
response = requests.get(base_url, params=params)
|
| 51 |
+
|
| 52 |
+
if response.status_code == 200:
|
| 53 |
+
articles = response.json().get("news", [])
|
| 54 |
+
|
| 55 |
+
for i, article in enumerate(articles, start=1):
|
| 56 |
+
text = article.get("text", "")
|
| 57 |
+
predicted_category = categorizer_instance.predict_category(text)
|
| 58 |
+
# Generate summary using the summarizer
|
| 59 |
+
if text: # Ensure the text is not empty
|
| 60 |
+
summary = summarizer_instance.iterative_summarization(
|
| 61 |
+
text=text,
|
| 62 |
+
chunk_size=350,
|
| 63 |
+
overlap_size=50,
|
| 64 |
+
max_iterations=5,
|
| 65 |
+
use_prompt_template=True
|
| 66 |
+
)
|
| 67 |
+
else:
|
| 68 |
+
summary = "No summary available"
|
| 69 |
+
|
| 70 |
+
# Collect the data for each article
|
| 71 |
+
news_list.append({
|
| 72 |
+
"index": i,
|
| 73 |
+
"title": article.get("title", "No Title"),
|
| 74 |
+
"text": article.get("text", "No Text"), # First 10 characters of the text
|
| 75 |
+
"url": article.get("url", "No URL"),
|
| 76 |
+
"category": article.get("category", "Others"),
|
| 77 |
+
"summary": summary,
|
| 78 |
+
"predicted_category": predicted_category
|
| 79 |
+
|
| 80 |
+
})
|
| 81 |
+
save_news_to_file(news_list)
|
| 82 |
+
else:
|
| 83 |
+
print("Error:", response.status_code, response.text)
|
| 84 |
+
|
| 85 |
+
return news_list # Return the list of news articles
|
| 86 |
+
|
| 87 |
+
# # Example usage
|
| 88 |
+
# news = get_top_news(language="en", date="2024-11-19")
|
| 89 |
+
# for item in news:
|
| 90 |
+
# print(item)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# def get_top_news(language="en", date=current_day,number=30):
|
| 95 |
+
# temp = [
|
| 96 |
+
# {"index": 1, "title": "World Cup 2024: The Road to the Final", "text": "The World Cup 2024 is heating up, with top teams battling for a spot in the final... (rest of the article text)", "url": "https://example.com/world-cup-2024", "category": "Sports", "summary": "The World Cup 2024 is nearing its final stages with intense matches between the world's top teams.", "predicted_category": "Sports"},
|
| 97 |
+
# {"index": 2, "title": "Tech Giants Invest in AI for 2024", "text": "Major tech companies are doubling down on AI technologies with multi-billion dollar investments... (rest of the article text)", "url": "https://example.com/tech-giants-ai-investment", "category": "Science/Tech", "summary": "Tech companies are boosting AI investments, with the focus on automation and machine learning innovations.", "predicted_category": "Science/Tech"},
|
| 98 |
+
# {"index": 3, "title": "Stocks Hit Record High Amid Global Economic Recovery", "text": "The stock market has reached new heights, with positive news about global economic recovery... (rest of the article text)", "url": "https://example.com/stock-market-record-high", "category": "Business", "summary": "The stock market reaches an all-time high as signs of global economic recovery boost investor confidence.", "predicted_category": "Business"},
|
| 99 |
+
# {"index": 4, "title": "New Legislation Aims to Tackle Climate Change", "text": "Lawmakers are debating new legislation aimed at addressing climate change with stricter regulations... (rest of the article text)", "url": "https://example.com/climate-change-legislation", "category": "Politics", "summary": "New legislation to combat climate change is being debated, focusing on stricter environmental regulations.", "predicted_category": "Politics"},
|
| 100 |
+
# {"index": 5, "title": "AI Revolutionizing Healthcare Diagnostics", "text": "AI systems are making waves in healthcare, improving diagnostics and treatment recommendations... (rest of the article text)", "url": "https://example.com/ai-healthcare-diagnostics", "category": "Science/Tech", "summary": "AI is revolutionizing healthcare by improving diagnostic accuracy and treatment efficiency.", "predicted_category": "Science/Tech"},
|
| 101 |
+
# {"index": 6, "title": "Oscars 2024: The Big Winners", "text": "The Oscars 2024 saw exciting wins, with new films taking home prestigious awards... (rest of the article text)", "url": "https://example.com/oscars-2024-winners", "category": "Entertainment", "summary": "The 2024 Oscars highlighted new talent and groundbreaking films, with major wins in multiple categories.", "predicted_category": "Entertainment"},
|
| 102 |
+
# {"index": 7, "title": "Government Launches New Initiative for Rural Development", "text": "A new initiative has been launched by the government to boost economic development in rural areas... (rest of the article text)", "url": "https://example.com/rural-development-initiative", "category": "Politics", "summary": "The government has launched a new initiative aimed at improving infrastructure and opportunities in rural regions.", "predicted_category": "Politics"},
|
| 103 |
+
# {"index": 8, "title": "The Future of Renewable Energy: Solar Dominates", "text": "Solar energy is expected to dominate the renewable energy market in the coming decades... (rest of the article text)", "url": "https://example.com/future-renewable-energy-solar", "category": "Science/Tech", "summary": "Solar energy is poised to take the lead in renewable energy, driven by technological advancements and policy support.", "predicted_category": "Science/Tech"},
|
| 104 |
+
# {"index": 9, "title": "2024 Tokyo Marathon: The Race of the Year", "text": "The Tokyo Marathon 2024 is set to attract the best runners from across the globe... (rest of the article text)", "url": "https://example.com/tokyo-marathon-2024", "category": "Sports", "summary": "The Tokyo Marathon 2024 will feature top athletes and promising new talent, making it one of the biggest races of the year.", "predicted_category": "Sports"},
|
| 105 |
+
# {"index": 10, "title": "Breaking News: Major Tech Merger Announced", "text": "Two major tech firms have announced a merger, which is expected to reshape the industry... (rest of the article text)", "url": "https://example.com/tech-merger-announcement", "category": "Business", "summary": "A major tech merger has been announced, signaling significant shifts in the technology sector.", "predicted_category": "Business"},
|
| 106 |
+
# {"index": 11, "title": "Political Tensions Rise Ahead of Election", "text": "With the upcoming election, political tensions are escalating as parties prepare for a fierce battle... (rest of the article text)", "url": "https://example.com/political-tensions-election", "category": "Politics", "summary": "Political tensions are rising as election campaigns intensify, with major candidates vying for leadership.", "predicted_category": "Politics"},
|
| 107 |
+
# {"index": 12, "title": "NBA Finals 2024: The Ultimate Showdown", "text": "The 2024 NBA Finals is shaping up to be one of the most thrilling matchups in basketball history... (rest of the article text)", "url": "https://example.com/nba-finals-2024", "category": "Sports", "summary": "The 2024 NBA Finals promises an epic showdown between two powerhouse teams, drawing attention from sports fans worldwide.", "predicted_category": "Sports"},
|
| 108 |
+
# {"index": 13, "title": "New Cybersecurity Threats: How to Protect Your Data", "text": "As cyber threats continue to evolve, experts are warning about new vulnerabilities affecting businesses and individuals... (rest of the article text)", "url": "https://example.com/cybersecurity-threats-2024", "category": "Science/Tech", "summary": "New cybersecurity threats are emerging, and experts provide tips on how to protect your data from malicious actors.", "predicted_category": "Science/Tech"},
|
| 109 |
+
# {"index": 14, "title": "Hollywood Stars Attend Gala for Charity", "text": "A star-studded charity gala was held last night, raising millions for a good cause... (rest of the article text)", "url": "https://example.com/hollywood-charity-gala", "category": "Entertainment", "summary": "A gala event in Hollywood saw major stars come together to raise millions for charity, highlighting philanthropic efforts in the industry.", "predicted_category": "Entertainment"},
|
| 110 |
+
# {"index": 15, "title": "Oil Prices Hit New Highs: Global Market Impact", "text": "Oil prices have hit record highs this month, affecting global markets and driving up fuel costs... (rest of the article text)", "url": "https://example.com/oil-prices-new-highs", "category": "Business", "summary": "Oil prices have surged, influencing global market trends and leading to higher fuel prices across the world.", "predicted_category": "Business"},
|
| 111 |
+
# {"index": 16, "title": "Exploring Mars: The Latest Space Mission Updates", "text": "NASA's latest Mars mission has yielded fascinating new insights about the planet's surface... (rest of the article text)", "url": "https://example.com/mars-space-mission-2024", "category": "Science/Tech", "summary": "NASA's latest Mars mission has provided new data about the planet's surface, shedding light on its potential for future exploration.", "predicted_category": "Science/Tech"},
|
| 112 |
+
# {"index": 17, "title": "New National Sports Policy to Boost Youth Involvement", "text": "The government has announced a new national sports policy aimed at increasing youth participation in sports... (rest of the article text)", "url": "https://example.com/national-sports-policy-2024", "category": "Sports", "summary": "A new national sports policy has been launched to boost youth engagement in physical activities and competitions.", "predicted_category": "Sports"},
|
| 113 |
+
# {"index": 18, "title": "Entertainment Industry Faces Challenges Amid Strike", "text": "The entertainment industry is facing significant disruptions as workers go on strike over pay and conditions... (rest of the article text)", "url": "https://example.com/entertainment-strike-2024", "category": "Entertainment", "summary": "The entertainment industry is experiencing major disruptions due to strikes over pay and working conditions.", "predicted_category": "Entertainment"},
|
| 114 |
+
# {"index": 19, "title": "China's Growing Influence in Global Politics", "text": "China's political influence continues to grow, shaping global policy decisions and economic trends... (rest of the article text)", "url": "https://example.com/china-global-influence", "category": "Politics", "summary": "China's political and economic influence is rapidly increasing, affecting global decision-making and alliances.", "predicted_category": "Politics"},
|
| 115 |
+
# {"index": 20, "title": "AI-Powered Tools Change the Game for Content Creators", "text": "Content creators are increasingly relying on AI-powered tools to streamline their production process and boost engagement... (rest of the article text)", "url": "https://example.com/ai-content-creators", "category": "Science/Tech", "summary": "AI-powered tools are revolutionizing content creation, helping creators improve efficiency and engagement with their audiences.", "predicted_category": "Science/Tech"}
|
| 116 |
+
# ]
|
| 117 |
+
# return temp
|
generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"decoder_start_token_id": 0,
|
| 3 |
+
"eos_token_id": 1,
|
| 4 |
+
"pad_token_id": 0,
|
| 5 |
+
"transformers_version": "4.44.2"
|
| 6 |
+
}
|
label_encoder.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdd75907b5940f619aac3bd28132a6b15af2bc628d0e676563c27e34db3f7865
|
| 3 |
+
size 317
|
logistic_regression_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acaee881e6d08738879f588e71b3a01a3f84641e38d0778e5ebef72b47b0d354
|
| 3 |
+
size 17602
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:197b346658905cda50c1a71fe0eb77a4d79306adaeb5d7a3c0e9208c91022d45
|
| 3 |
+
size 990345064
|
news_data.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
summarizer.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# # import torch
|
| 2 |
+
# from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 3 |
+
# model = AutoModelForSeq2SeqLM.from_pretrained(r'FlanT5_base_fine_tuned_for_Summarization\model.safetensors')
|
| 4 |
+
|
| 5 |
+
# tokenizer = AutoTokenizer.from_pretrained("local_google_flan_t5_tokenizer")
|
| 6 |
+
|
| 7 |
+
# import torch
|
| 8 |
+
|
| 9 |
+
# def split_text(text, chunk_size=300, overlap_size=50):
|
| 10 |
+
# words = text.split()
|
| 11 |
+
# chunks = []
|
| 12 |
+
# for i in range(0, len(words), chunk_size - overlap_size):
|
| 13 |
+
# chunk = ' '.join(words[i:i + chunk_size])
|
| 14 |
+
# chunks.append(chunk)
|
| 15 |
+
# return chunks
|
| 16 |
+
|
| 17 |
+
# def summarize_chunks(model, tokenizer, chunks, use_prompt_template):
|
| 18 |
+
# summaries = []
|
| 19 |
+
# for chunk in chunks:
|
| 20 |
+
# # Tokenizing the input chunk
|
| 21 |
+
# if use_prompt_template:
|
| 22 |
+
# prompt = "Summarize the following news article. The summary should consist of multiple short sentences that cover the key points of the news article.Make relations between the news articles. \n\nNews Article: "
|
| 23 |
+
# inputs = tokenizer(prompt + chunk, return_tensors='pt', padding=True, truncation=True, max_length=375-40, add_special_tokens=True)
|
| 24 |
+
# else:
|
| 25 |
+
# inputs = tokenizer(chunk, return_tensors='pt', padding=True, truncation=True, max_length=375, add_special_tokens=True)
|
| 26 |
+
|
| 27 |
+
# # Generate the summary
|
| 28 |
+
# model_output = model.generate(
|
| 29 |
+
# **inputs,
|
| 30 |
+
# min_length=10,
|
| 31 |
+
# max_length=200
|
| 32 |
+
# )
|
| 33 |
+
# summaries.append(tokenizer.decode(model_output[0], skip_special_tokens=True))
|
| 34 |
+
# return summaries
|
| 35 |
+
|
| 36 |
+
# def iterative_summarization(model, tokenizer, text, chunk_size=300, overlap_size=50, max_iterations=5, use_prompt_template=True):
|
| 37 |
+
# current_text = text
|
| 38 |
+
# iteration = 0
|
| 39 |
+
|
| 40 |
+
# while len(current_text.split()) > chunk_size and iteration < max_iterations:
|
| 41 |
+
# print(f"Iteration {iteration + 1}: Text length = {len(current_text.split())}")
|
| 42 |
+
# # Step 1: Split the text into smaller chunks
|
| 43 |
+
# chunks = split_text(current_text, chunk_size, overlap_size)
|
| 44 |
+
|
| 45 |
+
# # Step 2: Summarize each chunk
|
| 46 |
+
# chunk_summaries = summarize_chunks(model, tokenizer, chunks, use_prompt_template)
|
| 47 |
+
|
| 48 |
+
# # Step 3: Combine summaries
|
| 49 |
+
# current_text = ' '.join(chunk_summaries)
|
| 50 |
+
# iteration += 1
|
| 51 |
+
|
| 52 |
+
# return current_text
|
| 53 |
+
|
| 54 |
+
# # Example usage
|
| 55 |
+
# final_summary = iterative_summarization(
|
| 56 |
+
# model=model,
|
| 57 |
+
# tokenizer=tokenizer,
|
| 58 |
+
# text=long_text,
|
| 59 |
+
# chunk_size=350,
|
| 60 |
+
# overlap_size=50,
|
| 61 |
+
# max_iterations=5,
|
| 62 |
+
# use_prompt_template=False
|
| 63 |
+
# )
|
| 64 |
+
|
| 65 |
+
# print("Final Summary:", final_summary)
|
| 66 |
+
|
| 67 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 68 |
+
|
| 69 |
+
class Summarizer:
|
| 70 |
+
def __init__(self, model_path, tokenizer_path):
|
| 71 |
+
"""
|
| 72 |
+
Initialize the Summarizer with the model and tokenizer.
|
| 73 |
+
"""
|
| 74 |
+
print("News Summarizer was called")
|
| 75 |
+
self.model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
|
| 76 |
+
self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
|
| 77 |
+
|
| 78 |
+
def split_text(self, text, chunk_size=300, overlap_size=50):
|
| 79 |
+
"""
|
| 80 |
+
Split text into chunks with overlap.
|
| 81 |
+
"""
|
| 82 |
+
words = text.split()
|
| 83 |
+
chunks = []
|
| 84 |
+
for i in range(0, len(words), chunk_size - overlap_size):
|
| 85 |
+
chunk = ' '.join(words[i:i + chunk_size])
|
| 86 |
+
chunks.append(chunk)
|
| 87 |
+
return chunks
|
| 88 |
+
|
| 89 |
+
def summarize_chunks(self, chunks, use_prompt_template):
|
| 90 |
+
"""
|
| 91 |
+
Summarize each chunk of text using the model.
|
| 92 |
+
"""
|
| 93 |
+
summaries = []
|
| 94 |
+
for chunk in chunks:
|
| 95 |
+
# Prepare the prompt
|
| 96 |
+
if use_prompt_template:
|
| 97 |
+
prompt = (
|
| 98 |
+
"Summarize the following news article. The summary should consist of multiple short sentences "
|
| 99 |
+
"that cover the key points of the news article. Make relations between the news articles.\n\n"
|
| 100 |
+
"Summarize News Article: "
|
| 101 |
+
)
|
| 102 |
+
inputs = self.tokenizer(
|
| 103 |
+
prompt + chunk,
|
| 104 |
+
return_tensors='pt',
|
| 105 |
+
padding=True,
|
| 106 |
+
truncation=True,
|
| 107 |
+
max_length=375 - 40,
|
| 108 |
+
add_special_tokens=True
|
| 109 |
+
)
|
| 110 |
+
else:
|
| 111 |
+
inputs = self.tokenizer(
|
| 112 |
+
chunk,
|
| 113 |
+
return_tensors='pt',
|
| 114 |
+
padding=True,
|
| 115 |
+
truncation=True,
|
| 116 |
+
max_length=375,
|
| 117 |
+
add_special_tokens=True
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# Generate the summary
|
| 121 |
+
model_output = self.model.generate(
|
| 122 |
+
**inputs,
|
| 123 |
+
min_length=10,
|
| 124 |
+
max_length=200
|
| 125 |
+
)
|
| 126 |
+
summaries.append(self.tokenizer.decode(model_output[0], skip_special_tokens=True))
|
| 127 |
+
return summaries
|
| 128 |
+
|
| 129 |
+
def iterative_summarization(self, text, chunk_size=300, overlap_size=50, max_iterations=5, use_prompt_template=True):
|
| 130 |
+
"""
|
| 131 |
+
Perform iterative summarization to condense text into a final summary.
|
| 132 |
+
"""
|
| 133 |
+
current_text = text
|
| 134 |
+
iteration = 0
|
| 135 |
+
|
| 136 |
+
while len(current_text.split()) > chunk_size and iteration < max_iterations:
|
| 137 |
+
print(f"Iteration {iteration + 1}: Text length = {len(current_text.split())}")
|
| 138 |
+
# Step 1: Split the text into smaller chunks
|
| 139 |
+
chunks = self.split_text(current_text, chunk_size, overlap_size)
|
| 140 |
+
|
| 141 |
+
# Step 2: Summarize each chunk
|
| 142 |
+
chunk_summaries = self.summarize_chunks(chunks, use_prompt_template)
|
| 143 |
+
|
| 144 |
+
# Step 3: Combine summaries
|
| 145 |
+
current_text = ' '.join(chunk_summaries)
|
| 146 |
+
iteration += 1
|
| 147 |
+
|
| 148 |
+
return current_text
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
# Example usage
|
| 152 |
+
if __name__ == "__main__":
|
| 153 |
+
model_path = r'C:\Users\TDH\VIIIIIV\MVIIXXIV\2024\Senior Project 1\huggingface_model_deployment\FlanT5_base_fine_tuned_for_Summarization'
|
| 154 |
+
tokenizer_path = r'C:\Users\TDH\VIIIIIV\MVIIXXIV\2024\Senior Project 1\huggingface_model_deployment\FlanT5_base_fine_tuned_for_Summarization\local_google_flan_t5_tokenizer'
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
long_text = """If you've been following the news lately, there are certain things you doubtless know about Mohammad Javad Zarif. He is, of course, the Iranian foreign minister. He has been U.S. Secretary of State John Kerry's opposite number in securing a breakthrough in nuclear discussions that could lead to an end to sanctions against Iran -- if the details can be worked out in the coming weeks. And he received a hero's welcome as he arrived in Iran on a sunny Friday morning. "Long live Zarif," crowds chanted as his car rolled slowly down the packed street. You may well have read that he is "polished" and, unusually for one burdened with such weighty issues, "jovial." An Internet search for "Mohammad Javad Zarif" and "jovial" yields thousands of results. He certainly has gone a long way to bring Iran in from the cold and allow it to rejoin the international community. But there are some facts about Zarif that are less well-known. Here are six: . In September 2013, Zarif tweeted "Happy Rosh Hashanah," referring to the Jewish New Year. That prompted Christine Pelosi, the daughter of House Minority Leader Nancy Pelosi, to respond with a tweet of her own: "Thanks. The New Year would be even sweeter if you would end Iran's Holocaust denial, sir." And, perhaps to her surprise, Pelosi got a response. "Iran never denied it," Zarif tweeted back. "The man who was perceived to be denying it is now gone. Happy New Year." The reference was likely to former Iranian President Mahmoud Ahmadinejad, who had left office the previous month. Zarif was nominated to be foreign minister by Ahmadinejad's successor, Hassan Rouhami. His foreign ministry notes, perhaps defensively, that "due to the political and security conditions of the time, he decided to continue his education in the United States." That is another way of saying that he was outside the country during the demonstrations against the Shah of Iran, which began in 1977, and during the Iranian Revolution, which drove the shah from power in 1979. Zarif left the country in 1977, received his undergraduate degree from San Francisco State University in 1981, his master's in international relations from the University of Denver in 1984 and his doctorate from the University of Denver in 1988. Both of his children were born in the United States. The website of the Iranian Foreign Ministry, which Zarif runs, cannot even agree with itself on when he was born. The first sentence of his official biography, perhaps in a nod to the powers that be in Tehran, says Zarif was "born to a religious traditional family in Tehran in 1959." Later on the same page, however, his date of birth is listed as January 8, 1960. And the Iranian Diplomacy website says he was born in in 1961 . So he is 54, 55 or maybe even 56. Whichever, he is still considerably younger than his opposite number, Kerry, who is 71. The feds investigated him over his alleged role in controlling the Alavi Foundation, a charitable organization. The U.S. Justice Department said the organization was secretly run on behalf of the Iranian government to launder money and get around U.S. sanctions. But last year, a settlement in the case, under which the foundation agreed to give a 36-story building in Manhattan along with other properties to the U.S. government, did not mention Zarif's name. Early in the Iranian Revolution, Zarif was among the students who took over the Iranian Consulate in San Francisco. The aim, says the website Iranian.com -- which cites Zarif's memoirs, titled "Mr. Ambassador" -- was to expel from the consulate people who were not sufficiently Islamic. Later, the website says, Zarif went to make a similar protest at the Iranian mission to the United Nations. In response, the Iranian ambassador to the United Nations offered him a job. In fact, he has now spent more time with Kerry than any other foreign minister in the world. And that amount of quality time will only increase as the two men, with help from other foreign ministers as well, try to meet a June 30 deadline for nailing down the details of the agreement they managed to outline this week in Switzerland.
|
| 158 |
+
|
| 159 |
+
"""
|
| 160 |
+
|
| 161 |
+
summarizer = Summarizer(model_path, tokenizer_path)
|
| 162 |
+
|
| 163 |
+
final_summary = summarizer.iterative_summarization(
|
| 164 |
+
text=long_text,
|
| 165 |
+
chunk_size=350,
|
| 166 |
+
overlap_size=50,
|
| 167 |
+
max_iterations=5,
|
| 168 |
+
use_prompt_template=True
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
print("Final Summary:", final_summary)
|