|
|
|
|
|
"""NewsSummary.ipynb |
|
|
|
|
|
Automatically generated by Colab. |
|
|
|
|
|
Original file is located at |
|
|
https://colab.research.google.com/drive/1o6xj-MQoYO_ZXMNd1MTq-8CmtguyphQW |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import streamlit as st |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import json |
|
|
|
|
|
import requests |
|
|
from huggingface_hub import hf_hub_download |
|
|
from transformers import pipeline |
|
|
from transformers import BartTokenizer, BartForConditionalGeneration |
|
|
|
|
|
model_name = "sshleifer/distilbart-cnn-12-6" |
|
|
|
|
|
import http.client, urllib.parse |
|
|
|
|
|
conn = http.client.HTTPSConnection('api.thenewsapi.com') |
|
|
import os |
|
|
|
|
|
API_TOKEN = os.getenv("NEWS_API_TOKEN") |
|
|
if not API_TOKEN: |
|
|
st.error("API token not set. Please configure NEWS_API_TOKEN in Hugging Face Secrets.") |
|
|
|
|
|
|
|
|
|
|
|
def fetch_news(): |
|
|
if not API_TOKEN: |
|
|
st.error("API token not set. Please configure NEWS_API_TOKEN in Hugging Face Secrets.") |
|
|
return None |
|
|
|
|
|
params = { |
|
|
'api_token': API_TOKEN, |
|
|
'limit': 3, |
|
|
'locale': 'in', |
|
|
'language': 'en', |
|
|
'domains': 'indiatimes.com,ndtv.com,thehindu.com' |
|
|
} |
|
|
try: |
|
|
|
|
|
response = requests.get('https://api.thenewsapi.com/v1/news/all', params=params) |
|
|
response.raise_for_status() |
|
|
return response.json() |
|
|
except requests.exceptions.RequestException as e: |
|
|
st.error(f"Failed to fetch news: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_model_and_tokenizer(): |
|
|
|
|
|
cache_dir = "/tmp/hf_cache" |
|
|
|
|
|
with st.spinner("Loading summarization model... please wait."): |
|
|
|
|
|
tokenizer = BartTokenizer.from_pretrained(model_name, cache_dir=cache_dir) |
|
|
model = BartForConditionalGeneration.from_pretrained(model_name, cache_dir=cache_dir) |
|
|
return tokenizer, model |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def summarize(text, bart_tokenizer, bart_model, maxSummarylength=500): |
|
|
inputs = bart_tokenizer.encode("summarize: " + text, |
|
|
return_tensors="pt", |
|
|
max_length=1024, truncation=True) |
|
|
summary_ids = bart_model.generate( |
|
|
inputs, |
|
|
max_length=int(maxSummarylength), |
|
|
min_length=int(maxSummarylength / 5), |
|
|
length_penalty=10.0, |
|
|
num_beams=4, |
|
|
early_stopping=True |
|
|
) |
|
|
summary = bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
|
|
return summary |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def display(title, url, img, src, txt): |
|
|
st.markdown(f"## **{title}**") |
|
|
|
|
|
if img: |
|
|
st.image(img, use_container_width=True) |
|
|
|
|
|
st.write(f"**Source:** {src}") |
|
|
st.write(f"**Summary:** {txt}") |
|
|
|
|
|
st.markdown(f"[Want to read the full article? Click Here!]({url})") |
|
|
st.markdown("------------------------------") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process(bart_tokenizer, bart_model): |
|
|
data = fetch_news() |
|
|
if data and "data" in data: |
|
|
|
|
|
for single_news in data["data"]: |
|
|
|
|
|
news_title = single_news.get("title", "No Title Available") |
|
|
news_text = single_news.get("snippet", "") |
|
|
news_url = single_news.get("url", "#") |
|
|
news_img_url = single_news.get("image_url") |
|
|
news_source = single_news.get("source", "Source Unknown") |
|
|
if not news_text: |
|
|
continue |
|
|
|
|
|
news_summary = summarize(news_text, bart_tokenizer, bart_model) |
|
|
display(news_title, news_url, news_img_url, news_source, news_summary) |
|
|
else: |
|
|
st.warning("No news articles were found.") |
|
|
|
|
|
def strmlt(): |
|
|
st.title("Newsflix") |
|
|
bart_tokenizer, bart_model = load_model_and_tokenizer() |
|
|
|
|
|
|
|
|
if bart_tokenizer and bart_model: |
|
|
if st.button("Load Latest News"): |
|
|
with st.spinner("Fetching and summarizing latest news..."): |
|
|
process(bart_tokenizer, bart_model) |
|
|
else: |
|
|
st.error("Application could not start because the model failed to load.") |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
strmlt() |