Spaces:

DarshanaD
/

News_Summarizer_Agent

Sleeping

App Files Files Community

DarshanaD commited on May 28, 2025

Commit

4eefe1b

1 Parent(s): 383c8e6

Initial commit

Browse files

Files changed (3) hide show

README.md +3 -3
app.py +513 -54
requirements.txt +10 -1

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: News Summarizer Agent
 emoji: 💬
 colorFrom: yellow
 colorTo: purple
-sdk: gradio
-sdk_version: 5.0.1
 app_file: app.py
 pinned: false
 license: apache-2.0

 ---
+title: Assignment1KG
 emoji: 💬
 colorFrom: yellow
 colorTo: purple
+sdk: streamlit
+sdk_version: 1.35.0
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py CHANGED Viewed

@@ -1,64 +1,523 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
 """
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
 if __name__ == "__main__":
-    demo.launch()

+#!/usr/bin/env python3
 """
+News Summarizer Agent - Assignment 2
+An agentic AI app that fetches and summarizes latest news on any topic using NewsAPI and Serper.dev
 """
+import os
+import json
+import boto3
+import streamlit as st
+import requests
+from dotenv import load_dotenv
+import logging
+from typing import Dict, List, Any, Optional
+from datetime import datetime, timedelta
+import time
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load environment variables
+load_dotenv()
+class NewsAgent:
+    """Agentic AI News Summarizer using multiple news sources"""
+    def __init__(self):
+        """Initialize the news agent with API credentials"""
+        self.setup_credentials()
+        self.setup_aws_bedrock()
+    def setup_credentials(self):
+        """Setup API credentials from environment variables"""
+        self.aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
+        self.aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
+        self.aws_region = os.getenv("AWS_REGION", "us-east-1")
+        # News API credentials
+        self.newsapi_key = os.getenv("NEWSAPI_KEY")
+        self.serper_key = os.getenv("SERPER_API_KEY")
+        # Validate AWS credentials
+        if not all([self.aws_access_key, self.aws_secret_key]):
+            raise ValueError("Missing AWS credentials. Check your .env file.")
+        # Check if at least one news API key is available
+        if not self.newsapi_key and not self.serper_key:
+            st.warning("⚠️ No news API keys found. Please add NEWSAPI_KEY and/or SERPER_API_KEY to your .env file.")
+        elif (self.newsapi_key == "your_newsapi_key_from_newsapi.org" or not self.newsapi_key) and \
+             (self.serper_key == "your_serper_key_from_serper.dev" or not self.serper_key):
+            st.error("❌ **API Keys are still placeholders!** Please replace with real API keys from newsapi.org and serper.dev")
+    def setup_aws_bedrock(self):
+        """Initialize AWS Bedrock client"""
+        try:
+            self.bedrock_client = boto3.client(
+                'bedrock-runtime',
+                aws_access_key_id=self.aws_access_key,
+                aws_secret_access_key=self.aws_secret_key,
+                region_name=self.aws_region
+            )
+            logger.info("AWS Bedrock client initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize AWS Bedrock: {e}")
+            raise
+    def call_claude(self, prompt: str, max_tokens: int = 1000) -> str:
+        """Call AWS Bedrock Claude for AI processing"""
+        try:
+            body = {
+                "anthropic_version": "bedrock-2023-05-31",
+                "max_tokens": max_tokens,
+                "temperature": 0.3,
+                "top_p": 0.9,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": prompt
+                    }
+                ]
+            }
+            response = self.bedrock_client.invoke_model(
+                modelId="anthropic.claude-3-sonnet-20240229-v1:0",
+                contentType="application/json",
+                accept="application/json",
+                body=json.dumps(body)
+            )
+            response_body = json.loads(response['body'].read())
+            return response_body['content'][0]['text'].strip()
+        except Exception as e:
+            logger.error(f"Error calling Claude: {e}")
+            return f"Error: {e}"
+    def fetch_news_from_newsapi(self, topic: str, max_articles: int = 10) -> List[Dict]:
+        """Fetch news from NewsAPI"""
+        if not self.newsapi_key:
+            return []
+        try:
+            # Calculate date for recent news (last 7 days)
+            from_date = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
+            url = "https://newsapi.org/v2/everything"
+            params = {
+                'q': topic,
+                'apiKey': self.newsapi_key,
+                'language': 'en',
+                'sortBy': 'publishedAt',
+                'from': from_date,
+                'pageSize': max_articles
+            }
+            response = requests.get(url, params=params, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            articles = data.get('articles', [])
+            news_items = []
+            for article in articles:
+                if article.get('title') and article.get('description'):
+                    news_items.append({
+                        'title': article.get('title', ''),
+                        'description': article.get('description', ''),
+                        'content': article.get('content', ''),
+                        'url': article.get('url', ''),
+                        'published_at': article.get('publishedAt', ''),
+                        'source': article.get('source', {}).get('name', 'NewsAPI'),
+                        'api_source': 'NewsAPI'
+                    })
+            logger.info(f"Fetched {len(news_items)} articles from NewsAPI")
+            return news_items
+        except Exception as e:
+            logger.error(f"Error fetching from NewsAPI: {e}")
+            return []
+    def fetch_news_from_serper(self, topic: str, max_articles: int = 10) -> List[Dict]:
+        """Fetch news from Serper.dev (Google News)"""
+        if not self.serper_key:
+            return []
+        try:
+            url = "https://google.serper.dev/news"
+            headers = {
+                'X-API-KEY': self.serper_key,
+                'Content-Type': 'application/json'
+            }
+            payload = {
+                'q': topic,
+                'num': max_articles,
+                'tbm': 'nws'
+            }
+            response = requests.post(url, headers=headers, json=payload, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            articles = data.get('news', [])
+            news_items = []
+            for article in articles:
+                if article.get('title') and article.get('snippet'):
+                    news_items.append({
+                        'title': article.get('title', ''),
+                        'description': article.get('snippet', ''),
+                        'content': article.get('snippet', ''),
+                        'url': article.get('link', ''),
+                        'published_at': article.get('date', ''),
+                        'source': article.get('source', 'Google News'),
+                        'api_source': 'Serper'
+                    })
+            logger.info(f"Fetched {len(news_items)} articles from Serper")
+            return news_items
+        except Exception as e:
+            logger.error(f"Error fetching from Serper: {e}")
+            return []
+    def fetch_all_news(self, topic: str) -> List[Dict]:
+        """Fetch news from all available sources"""
+        all_news = []
+        # Check which APIs are available
+        newsapi_available = self.newsapi_key and self.newsapi_key != "your_newsapi_key_from_newsapi.org"
+        serper_available = self.serper_key and self.serper_key != "your_serper_key_from_serper.dev"
+        st.info(f"📡 **Available APIs:** NewsAPI: {'✅' if newsapi_available else '❌'} | Serper: {'✅' if serper_available else '❌'}")
+        # Fetch from NewsAPI
+        if newsapi_available:
+            st.info("🔍 **Fetching from NewsAPI...**")
+            newsapi_articles = self.fetch_news_from_newsapi(topic, 10)
+            all_news.extend(newsapi_articles)
+            st.success(f"✅ **NewsAPI:** {len(newsapi_articles)} articles fetched")
+        else:
+            st.warning("⚠️ **NewsAPI:** Invalid or missing API key")
+        # Fetch from Serper
+        if serper_available:
+            st.info("🔍 **Fetching from Serper.dev...**")
+            serper_articles = self.fetch_news_from_serper(topic, 10)
+            all_news.extend(serper_articles)
+            st.success(f"✅ **Serper.dev:** {len(serper_articles)} articles fetched")
+        else:
+            st.warning("⚠️ **Serper.dev:** Invalid or missing API key")
+        if not newsapi_available and not serper_available:
+            st.error("❌ **No valid API keys found!** Please add real API keys to your .env file")
+            return []
+        # Remove duplicates based on title similarity
+        unique_news = self.remove_duplicate_articles(all_news)
+        # Sort by published date (most recent first)
+        unique_news.sort(key=lambda x: x.get('published_at', ''), reverse=True)
+        st.info(f"📊 **Total unique articles:** {len(unique_news)}")
+        return unique_news
+    def remove_duplicate_articles(self, articles: List[Dict]) -> List[Dict]:
+        """Remove duplicate articles based on title similarity"""
+        unique_articles = []
+        seen_titles = set()
+        for article in articles:
+            title = article.get('title', '').lower().strip()
+            # Simple deduplication - check if title contains similar words
+            title_words = set(title.split())
+            is_duplicate = False
+            for seen_title in seen_titles:
+                seen_words = set(seen_title.split())
+                # If 70% of words are common, consider it duplicate
+                if len(title_words.intersection(seen_words)) / max(len(title_words), len(seen_words)) > 0.7:
+                    is_duplicate = True
+                    break
+            if not is_duplicate:
+                unique_articles.append(article)
+                seen_titles.add(title)
+        return unique_articles
+    def generate_relevant_headline(self, original_title: str, content: str, topic: str) -> str:
+        """Generate a topic-relevant headline using Claude"""
+        prompt = f"""
+        You are a news headline editor. Create a relevant, engaging headline for this news article that specifically relates to the topic "{topic}".
+        Original Title: {original_title}
+        Article Content: {content[:500]}...
+        Requirements:
+        1. The headline should be directly relevant to the topic "{topic}"
+        2. Keep it concise (under 80 characters)
+        3. Make it engaging and informative
+        4. Focus on the aspect most relevant to the search topic
+        5. Don't use clickbait language
+        Return only the headline, no explanations.
+        """
+        try:
+            relevant_headline = self.call_claude(prompt, max_tokens=100)
+            # Clean up the response
+            relevant_headline = relevant_headline.strip().strip('"').strip("'")
+            return relevant_headline if relevant_headline else original_title
+        except:
+            return original_title
+    def generate_summary(self, title: str, description: str, content: str, topic: str) -> str:
+        """Generate a 3-5 line summary using Claude"""
+        prompt = f"""
+        You are a news summarizer. Create a concise 3-5 line summary of this news article, focusing on aspects most relevant to the topic "{topic}".
+        Title: {title}
+        Description: {description}
+        Content: {content[:1000]}...
+        Requirements:
+        1. Write exactly 3-5 lines
+        2. Focus on information most relevant to "{topic}"
+        3. Include key facts, numbers, and important details
+        4. Write in clear, professional language
+        5. Each line should be a complete sentence
+        Summary:
+        """
+        try:
+            summary = self.call_claude(prompt, max_tokens=200)
+            return summary.strip()
+        except Exception as e:
+            return f"Summary generation failed: {e}"
+    def process_news_agentic(self, topic: str) -> List[Dict]:
+        """Main agentic processing pipeline"""
+        st.info(f"🤖 **Agent Status:** Starting news analysis for '{topic}'...")
+        # Step 1: Fetch news from multiple sources
+        with st.spinner("🔍 **Agent Action:** Fetching news from multiple sources..."):
+            all_news = self.fetch_all_news(topic)
+        if not all_news:
+            st.error("❌ **Agent Result:** No news articles found. Please check your API keys and try again.")
+            return []
+        st.success(f"✅ **Agent Result:** Found {len(all_news)} unique articles")
+        # Step 2: Process top 5 articles
+        with st.spinner("🧠 **Agent Action:** Processing articles with AI..."):
+            top_articles = all_news[:5]
+            processed_articles = []
+            progress_bar = st.progress(0)
+            for i, article in enumerate(top_articles):
+                st.info(f"🔄 **Agent Processing:** Article {i+1}/5 - {article['title'][:50]}...")
+                # Generate relevant headline
+                relevant_headline = self.generate_relevant_headline(
+                    article['title'],
+                    article.get('content', article['description']),
+                    topic
+                )
+                # Generate summary
+                summary = self.generate_summary(
+                    article['title'],
+                    article['description'],
+                    article.get('content', article['description']),
+                    topic
+                )
+                processed_article = {
+                    'original_title': article['title'],
+                    'relevant_headline': relevant_headline,
+                    'summary': summary,
+                    'url': article['url'],
+                    'published_at': article['published_at'],
+                    'source': article['source'],
+                    'api_source': article['api_source']
+                }
+                processed_articles.append(processed_article)
+                progress_bar.progress((i + 1) / len(top_articles))
+                # Small delay to avoid overwhelming the API
+                time.sleep(0.5)
+        st.success("✅ **Agent Complete:** All articles processed successfully!")
+        return processed_articles
+    def display_news_results(self, processed_articles: List[Dict], topic: str):
+        """Display the processed news results in Streamlit"""
+        st.header(f"📰 Top 5 Latest News on '{topic}'")
+        st.markdown("---")
+        for i, article in enumerate(processed_articles, 1):
+            with st.container():
+                # Headline
+                st.subheader(f"{i}. {article['relevant_headline']}")
+                # Metadata
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    st.caption(f"📅 {article['published_at'][:10] if article['published_at'] else 'Date unavailable'}")
+                with col2:
+                    st.caption(f"🔗 Source: {article['source']}")
+                with col3:
+                    st.caption(f"🤖 Via: {article['api_source']}")
+                # Summary
+                st.markdown(f"**Summary:**")
+                st.write(article['summary'])
+                # Read more link
+                if article['url']:
+                    st.markdown(f"[🔗 Read full article]({article['url']})")
+                # Original title (for reference)
+                with st.expander("📝 Original Title"):
+                    st.write(article['original_title'])
+                st.markdown("---")
+def create_sample_env():
+    """Display sample .env file content"""
+    st.info("""
+    📝 **Sample .env file:**
+    ```
+    # AWS Bedrock Credentials
+    AWS_ACCESS_KEY_ID="your_aws_access_key"
+    AWS_SECRET_ACCESS_KEY="your_aws_secret_key"
+    AWS_REGION="us-east-1"
+    # News API Keys (get from respective websites)
+    NEWSAPI_KEY="your_newsapi_key_from_newsapi.org"
+    SERPER_API_KEY="your_serper_key_from_serper.dev"
+    ```
+    **Get API Keys:**
+    - NewsAPI: https://newsapi.org/
+    - Serper.dev: https://serper.dev/
+    """)
+def main():
+    """Main Streamlit application"""
+    # Page configuration
+    st.set_page_config(
+        page_title="📰 News Summarizer Agent",
+        page_icon="🤖",
+        layout="wide"
+    )
+    # Header
+    st.title("🤖 News Summarizer Agent")
+    st.markdown("*Powered by Agentic AI with AWS Bedrock Claude 3.5 Sonnet*")
+    st.markdown("Get the top 5 latest news on any topic with AI-generated relevant headlines and summaries.")
+    # Sidebar
+    with st.sidebar:
+        st.header("🔧 Configuration")
+        create_sample_env()
+        st.header("📊 Agent Features")
+        st.markdown("""
+        ✅ **Multi-Source News Fetching**
+        - NewsAPI.org integration
+        - Serper.dev (Google News) integration
+        ✅ **Agentic AI Processing**
+        - Relevant headline generation
+        - Intelligent summarization
+        - Duplicate removal
+        ✅ **Smart Analysis**
+        - Topic-focused content
+        - Latest news prioritization
+        - Professional summaries
+        """)
+    # Initialize the agent
+    if 'news_agent' not in st.session_state:
+        try:
+            with st.spinner("🚀 Initializing News Agent..."):
+                st.session_state.news_agent = NewsAgent()
+                st.success("✅ News Agent initialized successfully!")
+        except Exception as e:
+            st.error(f"❌ Failed to initialize News Agent: {e}")
+            st.stop()
+    # Main interface
+    st.header("🔍 Search for News")
+    # Input section
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        topic = st.text_input(
+            "Enter topic, company name, or keywords:",
+            placeholder="e.g., Tesla, Climate Change, Artificial Intelligence, Apple, Biden...",
+            help="Enter any topic you want to get the latest news about"
+        )
+    with col2:
+        search_button = st.button("🔍 Get News", type="primary", use_container_width=True)
+    # Example topics
+    st.markdown("**💡 Example Topics:**")
+    example_topics = ["Tesla", "Climate Change", "Artificial Intelligence", "Apple", "Bitcoin", "Space X", "Netflix", "Microsoft"]
+    cols = st.columns(4)
+    for i, example_topic in enumerate(example_topics):
+        with cols[i % 4]:
+            if st.button(example_topic, key=f"example_{i}"):
+                st.session_state.selected_topic = example_topic
+                topic = example_topic
+    # Process the search
+    if search_button and topic:
+        try:
+            # Process news using agentic AI
+            processed_articles = st.session_state.news_agent.process_news_agentic(topic)
+            if processed_articles:
+                # Display results
+                st.session_state.news_agent.display_news_results(processed_articles, topic)
+                # Download option
+                st.markdown("---")
+                col1, col2 = st.columns([1, 1])
+                with col1:
+                    if st.button("📥 Download Results as JSON"):
+                        json_data = json.dumps(processed_articles, indent=2)
+                        st.download_button(
+                            label="📄 Download JSON",
+                            data=json_data,
+                            file_name=f"news_summary_{topic.replace(' ', '_')}.json",
+                            mime="application/json"
+                        )
+            else:
+                st.warning("⚠️ No news articles found for this topic. Try a different search term.")
+        except Exception as e:
+            st.error(f"❌ Error processing news: {e}")
+    elif search_button and not topic:
+        st.warning("⚠️ Please enter a topic to search for news.")
+    # Footer
+    st.markdown("---")
+    st.markdown("*🤖 Built with Agentic AI • Powered by AWS Bedrock Claude 3.5 Sonnet • News from NewsAPI & Serper.dev*")
 if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

	@@ -1 +1,10 @@
1	- huggingface_hub==0.25.2

+huggingface_hub==0.25.2
+qdrant_client
+streamlit
+boto3
+PyPDF2
+chromadb
+datasets
+streamlit
+boto3