Spaces:
Sleeping
Sleeping
| import requests | |
| import streamlit as st | |
| from bs4 import BeautifulSoup | |
| from transformers import pipeline | |
| def load_model(): | |
| return pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
| model = load_model() | |
| def extract_article_text(url): | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| article = soup.find('div', class_='article__content') | |
| if article: | |
| return article.get_text(strip=True) | |
| else: | |
| return "Article not found." | |
| except Exception as e: | |
| return f"Error: {e}" | |
| st.title("Tag Detection from CNN News articles") | |
| st.write("Enter a CNN News article URL.") | |
| news_url = st.text_input("CNN Article URL:", placeholder="Example: https://edition.cnn.com/2024/12/19/science/stonehenge-monument-early-farmers/index.html") | |
| categories = [ | |
| "Politics", | |
| "Economy", | |
| "Sports", | |
| "Weather", | |
| "Health", | |
| "Technology", | |
| "Culture", | |
| "Science", | |
| "Education", | |
| "Entertainment", | |
| "Business", | |
| "Environment", | |
| "Crime", | |
| "Travel", | |
| "International Relations", | |
| ] | |
| if st.button("Get tags"): | |
| if news_url.strip(): | |
| article_text = extract_article_text(news_url) | |
| if article_text.startswith("Error") or article_text.startswith("Could not"): | |
| st.write(article_text) | |
| else: | |
| st.write("**Extracted Article Text:**") | |
| st.write(article_text[:500] + "...") | |
| with st.spinner("Analyzing... Please wait."): | |
| result = model(article_text[:2000], categories, multi_label=True) | |
| st.write("**Predicted Tags:**") | |
| for label, score in zip(result["labels"], result["scores"]): | |
| st.write(f"- {label}: {score:.2f}") | |
| else: | |
| st.write("Please enter a valid news URL.") |