Spaces:

Sasmitah
/

NewsSummarizationTTS

Sleeping

App Files Files Community

Sasmita Harini commited on Mar 23, 2025

Commit

37b27d1

1 Parent(s): 8279aeb

Updated app.py with new title and utils.py with latest fetch logic

Browse files

Files changed (9) hide show

README.md +10 -31
__pycache__/api.cpython-39.pyc +0 -0
__pycache__/app.cpython-39.pyc +0 -0
__pycache__/backend.cpython-39.pyc +0 -0
__pycache__/utils.cpython-39.pyc +0 -0
api.py +82 -0
app.py +109 -66
requirements.txt +13 -15
utils.py +76 -56

README.md CHANGED Viewed

@@ -1,34 +1,13 @@
----
-title: News Summarization and Text-to-Speech
-emoji: 📰
-colorFrom: blue
-colorTo: green
-sdk: streamlit
-sdk_version: "1.36.0"
-app_file: app.py
-pinned: false
----
-# News Summarization and Text-to-Speech App
-This Space fetches news articles about a specified company, summarizes them, performs sentiment analysis, and generates a Hindi audio summary of the final sentiment.
-## How to Use
-1. Enter a company name (e.g., "Tesla") in the text box.
-2. Click "Fetch News".
-3. View the summarized news, download the text file, and listen to or download the Hindi audio of the sentiment analysis.
-## Features
-- Fetches news from multiple RSS feeds.
-- Summarizes articles using T5 model.
-- Performs sentiment analysis and topic extraction.
-- Translates sentiment to Hindi and generates audio.
-## Dependencies
-See `requirements.txt` for the full list of Python packages.
-## Notes
-- Requires a Groq API key (set as a secret in Space settings).
-- Limited to 10 articles per request to manage resources.
-Check out the configuration reference at [https://huggingface.co/docs/hub/spaces-config-reference](https://huggingface.co/docs/hub/spaces-config-reference).

+# News Summarization and Text-to-Speech Application
+This application fetches news articles about a specified company, summarizes them, performs sentiment analysis, and generates a Hindi audio summary of the final sentiment.
+## Prerequisites
+- Python 3.10+
+- A Groq API key (set as an environment variable: `GROQ_API_KEY`)
+## Setup Instructions
+1. Clone the repository:
+   ```bash
+   git clone <repository-url>
+   cd <repository-directory>

__pycache__/api.cpython-39.pyc ADDED Viewed

Binary file (3.08 kB). View file

__pycache__/app.cpython-39.pyc ADDED Viewed

Binary file (4.42 kB). View file

__pycache__/backend.cpython-39.pyc ADDED Viewed

Binary file (5.79 kB). View file

__pycache__/utils.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/utils.cpython-39.pyc and b/__pycache__/utils.cpython-39.pyc differ

api.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import utils
+from deep_translator import GoogleTranslator
+from gtts import gTTS
+import base64
+import io
+import json
+import uvicorn
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="News Analysis API")
+translator = GoogleTranslator(source='en', target='hi')
+class CompanyRequest(BaseModel):
+    company_name: str
+@app.post("/api/fetch_news")
+async def fetch_news(request: CompanyRequest):
+    try:
+        company_name = request.company_name.strip().lower()
+        if not company_name:
+            raise HTTPException(status_code=400, detail="Company name is required")
+        logger.info(f"Fetching news for {company_name}")
+        file_name = utils.fetch_and_save_news(company_name)
+        if not file_name:
+            logger.warning(f"No news found for {company_name}")
+            raise HTTPException(status_code=404, detail=f"No news found for {company_name}")
+        with open(file_name, "r", encoding="utf-8") as file:
+            content = file.read()
+        try:
+            news_data = json.loads(content)  # Should work with updated utils.py
+            logger.info(f"Successfully parsed news data for {company_name}")
+            return news_data
+        except json.JSONDecodeError as e:
+            logger.error(f"JSON parsing failed: {str(e)}", exc_info=True)
+            raise HTTPException(status_code=500, detail=f"Error parsing JSON: {str(e)}")
+    except Exception as e:
+        logger.error(f"Error in fetch_news: {str(e)}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error fetching news: {str(e)}")
+@app.post("/api/text_to_speech")
+async def text_to_speech(request: CompanyRequest):
+    try:
+        company_name = request.company_name.strip().lower()
+        if not company_name:
+            raise HTTPException(status_code=400, detail="Company name is required")
+        file_name = f"{company_name}_news.txt"
+        try:
+            with open(file_name, "r", encoding="utf-8") as file:
+                news_data = json.load(file)
+            sentiment_text = news_data.get("Final Sentiment Analysis", "")
+            if not sentiment_text:
+                raise HTTPException(status_code=404, detail="Sentiment analysis not found")
+            hindi_text = translator.translate(sentiment_text)
+            tts = gTTS(text=hindi_text, lang='hi')
+            mp3_fp = io.BytesIO()
+            tts.write_to_fp(mp3_fp)
+            mp3_fp.seek(0)
+            audio_base64 = base64.b64encode(mp3_fp.read()).decode('utf-8')
+            return {"text": hindi_text, "audio_base64": audio_base64}
+        except FileNotFoundError:
+            raise HTTPException(status_code=404, detail=f"News file for {company_name} not found")
+    except Exception as e:
+        logger.error(f"Error in text_to_speech: {str(e)}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error generating speech: {str(e)}")
+@app.get("/api/health")
+async def health_check():
+    return {"status": "healthy"}
+if __name__ == "__main__":
+    uvicorn.run("api:app", host="0.0.0.0", port=8000, reload=True)

app.py CHANGED Viewed

@@ -1,80 +1,123 @@
 import streamlit as st
-import utils  # Import functions from utils.py
-import os
-from gtts import gTTS
-import tempfile
-import re
-from deep_translator import GoogleTranslator
 st.title("News Summarization and Text-to-Speech Application")
-# User input for company name
 company_name = st.text_input("Enter the company name:", "").strip().lower()
 if st.button("Fetch News"):
     if company_name:
-        # Run news extraction and analysis
-        st.write(f"Fetching news for **{company_name}**...")
-        # Call the function from utils.py
-        file_name = utils.fetch_and_save_news(company_name)
-        if os.path.exists(file_name):
-            st.success(f"Data saved in **{file_name}**")
-            # Read the file to display content
-            with open(file_name, "r", encoding="utf-8") as file:
-                text_content = file.read()
-                st.text_area("News Analysis", text_content, height=400)
-            # Provide a download button for text file
-            with open(file_name, "rb") as file:
                 st.download_button(
-                    label="Download Text File",
-                    data=file,
-                    file_name=file_name,
-                    mime="text/plain"
                 )
-            # Extract only the Final Sentiment Analysis line
-            final_sentiment_line = ""
-            with open(file_name, "r", encoding="utf-8") as file:
-                content = file.read()
-                # Use regular expression to find the Final Sentiment Analysis line
-                match = re.search(r'"Final Sentiment Analysis": "([^"]+)"', content)
-                if match:
-                    final_sentiment_line = match.group(1)
-            if final_sentiment_line:
-                st.subheader("Hindi Audio for Final Sentiment Analysis")
-                try:
-                    # First translate the English text to Hindi using deep_translator
-                    translator = GoogleTranslator(source='en', target='hi')
-                    hindi_text = translator.translate(final_sentiment_line)
-                    # Create Hindi audio from the translated text
-                    tts = gTTS(text=hindi_text, lang='hi', slow=False)
-                    # Save the audio in a temporary file
-                    temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
-                    tts.save(temp_audio_file.name)
-                    # Provide download button for the audio
-                    with open(temp_audio_file.name, "rb") as audio_file:
-                        audio_data = audio_file.read()
-                        st.download_button(
-                            label="Download Hindi Audio",
-                            data=audio_data,
-                            file_name=f"{company_name}_sentiment_hindi.mp3",
-                            mime="audio/mp3"
-                        )
-                except Exception as e:
-                    st.error(f"Error generating Hindi audio: {str(e)}")
-            else:
-                st.warning("Could not find Final Sentiment Analysis in the text.")
-        else:
-            st.error("No relevant news articles found.")
     else:
         st.warning("Please enter a company name.")

 import streamlit as st
+import requests
+import json
+import base64
+import io
+API_BASE_URL = "http://localhost:8000/api"
 st.title("News Summarization and Text-to-Speech Application")
 company_name = st.text_input("Enter the company name:", "").strip().lower()
 if st.button("Fetch News"):
     if company_name:
+        status = st.status("Fetching news...", expanded=True)
+        status.write(f"Fetching news for **{company_name}**...")
+        try:
+            response = requests.post(
+                f"{API_BASE_URL}/fetch_news",
+                json={"company_name": company_name},
+                timeout=120
+            )
+            response.raise_for_status()
+            news_data = response.json()
+            if not news_data or "Company" not in news_data:
+                status.update(label="No news found", state="error")
+                st.warning(f"No news found for {company_name}")
+            else:
+                status.update(label="News fetched successfully!", state="complete", expanded=False)
+                st.subheader(f"News Analysis for {news_data['Company']}")
+                # Articles section
+                st.subheader("Articles")
+                with st.expander("View Articles", expanded=False):
+                    for i, article in enumerate(news_data['Articles']):
+                        st.markdown(f"#### Article {i+1}: {article['Title']}")
+                        st.markdown(f"**Summary:** {article['Summary']}")
+                        st.markdown(f"**Sentiment:** {article['Sentiment']}")
+                        st.markdown(f"**Topics:** {', '.join(article['Topics'])}")
+                        st.divider()
+                # Sentiment Distribution
+                st.subheader("Sentiment Distribution")
+                sentiment_data = news_data['Comparative Sentiment Score']['Sentiment Distribution']
+                col1, col2, col3 = st.columns(3)
+                col1.metric("Positive", sentiment_data['Positive'])
+                col2.metric("Neutral", sentiment_data['Neutral'])
+                col3.metric("Negative", sentiment_data['Negative'])
+                # Topic Analysis
+                st.subheader("Topic Analysis")
+                with st.expander("View Topic Analysis", expanded=False):
+                    st.markdown("**Common Topics:**")
+                    st.write(", ".join(news_data['Topic Overlap']['Common Topics']))
+                    for key, value in news_data['Topic Overlap'].items():
+                        if key != "Common Topics":
+                            st.markdown(f"**{key}:**")
+                            st.write(", ".join(value))
+                # Coverage Differences
+                st.subheader("Coverage Differences")
+                with st.expander("View Comparative Analysis", expanded=False):
+                    coverage_diff = news_data['Coverage Differences']
+                    if isinstance(coverage_diff, str):
+                        st.write(coverage_diff)  # Fallback for error cases
+                    else:
+                        # Format line-by-line
+                        formatted_text = '"Coverage Differences": [\n'
+                        for i, item in enumerate(coverage_diff.get("Coverage Differences", [])):
+                            formatted_text += "{\n"
+                            formatted_text += f'    "Comparison": "{item["Comparison"]}",\n'
+                            formatted_text += f'    "Impact": "{item["Impact"]}"\n'
+                            formatted_text += "}" + (",\n" if i < len(coverage_diff["Coverage Differences"]) - 1 else "\n")
+                        formatted_text += "]"
+                        st.code(formatted_text, language="json")
+                # Final Sentiment Analysis
+                st.subheader("Final Sentiment Analysis")
+                st.info(news_data['Final Sentiment Analysis'])
+                # Download JSON
+                st.subheader("Download Data")
                 st.download_button(
+                    label="Download JSON File",
+                    data=json.dumps(news_data, indent=4),
+                    file_name=f"{company_name}_news.json",
+                    mime="application/json"
                 )
+                # Hindi Audio
+                st.subheader("Hindi Audio for Final Sentiment Analysis")
+                audio_response = requests.post(
+                    f"{API_BASE_URL}/text_to_speech",
+                    json={"company_name": company_name},
+                    timeout=60
+                )
+                audio_response.raise_for_status()
+                audio_data = audio_response.json()
+                #st.markdown(f"**Hindi translation:**")
+                #st.text(audio_data["text"])
+                audio_bytes = base64.b64decode(audio_data["audio_base64"])
+                #st.audio(audio_bytes, format="audio/mp3")
+                st.download_button(
+                    label="Download Hindi Audio",
+                    data=audio_bytes,
+                    file_name=f"{company_name}_sentiment_hindi.mp3",
+                    mime="audio/mp3"
+                )
+        except requests.exceptions.RequestException as e:
+            status.update(label="Connection error", state="error")
+            st.error(f"Error connecting to API: {str(e)}")
+            st.info("Make sure the FastAPI backend is running on http://localhost:8000")
+        except json.JSONDecodeError:
+            status.update(label="Invalid response", state="error")
+            st.error("Received invalid data from the API")
+        except Exception as e:
+            status.update(label="Processing error", state="error")
+            st.error(f"Error processing news data: {str(e)}")
     else:
         st.warning("Please enter a company name.")

requirements.txt CHANGED Viewed

@@ -1,17 +1,15 @@
-requests
-beautifulsoup4
-transformers
-nltk
-streamlit
-gtts
-newspaper3k
-requests>=2.31.0
-beautifulsoup4>=4.12.3
-transformers>=4.35.2
 torch>=2.1.0  # Required by transformers for T5 model
-keybert>=0.7.0
-spacy>=3.7.2
-nltk>=3.8.1
-groq>=0.4.2
 sentencepiece>=0.1.99  # Required by T5Tokenizer
-deep_translator

+requests==2.31.0
+beautifulsoup4==4.12.3
+transformers==4.38.2
 torch>=2.1.0  # Required by transformers for T5 model
+keybert==0.8.4
+spacy==3.7.4
+nltk==3.8.1
+groq==0.4.2
 sentencepiece>=0.1.99  # Required by T5Tokenizer
+streamlit==1.36.0
+fastapi==0.115.0
+pydantic==2.6.4
+uvicorn==0.30.6
+deep-translator==1.11.4
+gtts==2.5.3

utils.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import requests
 from bs4 import BeautifulSoup
 import time
@@ -62,10 +64,10 @@ rss_feeds = [
     "https://www.economist.com/business/rss.xml",  # The Economist Business
     "https://www.ft.com/companies/financials/rss",  # Financial Times Financials (Visa-relevant)
     "https://www.ft.com/rss/companies/technology",  # Financial Times Tech Companies
-    "https://feeds.a.dj.com/rss/WSJcomUSBusiness.xml",  # Wall Street Journal US Business
-    "https://www.forbes.com/money/feed/",  # Forbes Money
-    "https://www.reuters.com/arc/outboundfeeds/business/?outputType=xml",  # Reuters Business
-    "https://www.bloomberg.com/feed/podcasts/markets.xml",  # Bloomberg Markets
     "https://finance.yahoo.com/news/rssindex",  # Yahoo Finance News
     "https://www.nasdaq.com/feed/rssoutbound",  # Nasdaq News
     "https://www.marketwatch.com/rss/topstories",  # MarketWatch Top Stories
@@ -77,10 +79,11 @@ rss_feeds = [
     "https://www.theguardian.com/world/rss",  # The Guardian World
     "https://feeds.npr.org/1001/rss.xml",  # NPR News
     "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml",  # NYT Home Page
-    "https://apnews.com/hub/business?format=rss",  # Associated Press Business
-    "https://feeds.washingtonpost.com/rss/business",  # Washington Post Business
 ]
 headers = {
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
 }
@@ -197,6 +200,7 @@ def get_coverage_differences(articles, company_name):
         }}
       ]
     }}
     """
     try:
         completion = client.chat.completions.create(
@@ -212,13 +216,14 @@ def get_coverage_differences(articles, company_name):
         for chunk in completion:
             coverage_diff += chunk.choices[0].delta.content or ""
-        text = coverage_diff.strip()
         pattern = r'```json\s*([\s\S]*?)\s*```'
         match = re.search(pattern, text)
         if match:
-            json_str = match.group(1)
             try:
                 json_dict = json.loads(json_str)
                 json_dict = json.dumps(json_dict, indent=4)
                 return json_dict
@@ -229,6 +234,8 @@ def get_coverage_differences(articles, company_name):
     except Exception as e:
         return f"Error in Groq API call: {str(e)}"
 def similarity_based_common_topics(processed_articles, similarity_threshold=0.8, min_articles=2):
     keyword_clusters = defaultdict(list)
     for article in processed_articles:
@@ -297,7 +304,6 @@ def comparative_analysis(processed_articles, company_name):
                 deduplicated_unique.add(topic)
         unique_topics[f"Unique Topics in Article {idx+1}"] = deduplicated_unique
     final_sentiment = max(sentiment_summary, key=sentiment_summary.get)
     # Add stock growth expectation based on sentiment
     if final_sentiment == "Positive":
         sentiment_statement = (f"{company_name}’s latest news coverage is mostly {final_sentiment.lower()}. "
@@ -318,51 +324,88 @@ def fetch_and_save_news(company_name):
     if not company_name:
         print("❌ Error: Company name is required")
         return None
-    file_name = f"{company_name}_news.txt"
     articles = []
-    article_count = 0
-    article_limit = 10
-    print(f"🚀 Starting parallel fetching for company: {company_name}...")
     article_queue = queue.Queue()
     article_limit_reached = threading.Event()
-    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as fetch_executor:
-        fetch_futures = [fetch_executor.submit(fetch_articles_from_rss, url, company_name, article_queue, article_limit_reached)
-                         for url in rss_feeds]
-        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as process_executor:
             processing_futures = []
-            while article_count < article_limit and (not article_queue.empty() or not all(f.done() for f in fetch_futures)):
                 try:
-                    article_data = article_queue.get(timeout=0.1)
-                    future = process_executor.submit(process_article_content, article_data)
                     processing_futures.append(future)
                 except queue.Empty:
-                    continue
             for future in concurrent.futures.as_completed(processing_futures):
-                if article_count >= article_limit:
-                    article_limit_reached.set()
-                    break
                 result = future.result()
                 if result:
                     articles.append(result)
-                    article_count += 1
-                    print(f"📊 Processed {article_count}/{article_limit} articles")
-                    if article_count >= article_limit:
                         article_limit_reached.set()
-                        print(f"✅ Reached article limit of {article_limit}. Stopping search.")
                         break
     articles = articles[:article_limit]
     if not articles:
-        print(f"❌ No relevant articles found for company: {company_name}")
         return None
     print(f"✅ Saving {len(articles)} articles to {file_name}")
     analysis_result = comparative_analysis(articles, company_name)
     coverage_differences = get_coverage_differences(articles, company_name)
     sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}
     for article in articles:
         sentiment_distribution[article["sentiment"]] += 1
     formatted_articles = [{"Title": article["title"], "Summary": article["summary"],
                            "Sentiment": article["sentiment"], "Topics": article["keywords"].split(", ")}
                           for article in articles]
     output_data = {
         "Company": company_name,
         "Articles": formatted_articles,
@@ -374,34 +417,11 @@ def fetch_and_save_news(company_name):
         },
         "Final Sentiment Analysis": analysis_result['Final Sentiment Analysis']
     }
     with open(file_name, "w", encoding="utf-8") as file:
-        file.write(f'"Company": "{output_data["Company"]}",\n')
-        file.write('"Articles": [\n')
-        for i, article in enumerate(output_data["Articles"]):
-            file.write('{\n')
-            file.write(f'"Title": "{article["Title"]}",\n')
-            file.write(f'"Summary": "{article["Summary"]}",\n')
-            file.write(f'"Sentiment": "{article["Sentiment"]}",\n')
-            file.write(f'"Topics": {article["Topics"]}\n')
-            file.write('}' + (',\n' if i < len(output_data["Articles"]) - 1 else '\n'))
-        file.write('],\n')
-        file.write('"Comparative Sentiment Score": {\n')
-        file.write('"Sentiment Distribution": {\n')
-        for i, (sentiment, count) in enumerate(output_data["Comparative Sentiment Score"]["Sentiment Distribution"].items()):
-            file.write(f'"{sentiment}": {count}' + (',' if i < 2 else '') + '\n')
-        file.write('}\n')
-        file.write('},\n')
-        file.write(f'{output_data["Coverage Differences"]},\n')
-        file.write('"Topic Overlap": {\n')
-        file.write(f'"Common Topics": {output_data["Topic Overlap"]["Common Topics"]},\n')
-        for i, (key, value) in enumerate([(k, v) for k, v in output_data["Topic Overlap"].items() if k != "Common Topics"]):
-            file.write(f'"{key}": {value}' + (',\n' if i < len(output_data["Topic Overlap"]) - 2 else '\n'))
-        file.write('},\n')
-        file.write(f'"Final Sentiment Analysis": "{output_data["Final Sentiment Analysis"]}"\n')
-    print("\nOutput format:")
-    with open(file_name, "r", encoding="utf-8") as file:
-        print(file.read())
-    print("✅ File saved successfully!")
     return file_name
 if __name__ == "__main__":

+# utils.py
 import requests
 from bs4 import BeautifulSoup
 import time
     "https://www.economist.com/business/rss.xml",  # The Economist Business
     "https://www.ft.com/companies/financials/rss",  # Financial Times Financials (Visa-relevant)
     "https://www.ft.com/rss/companies/technology",  # Financial Times Tech Companies
+    "https://feeds.a.dj.com/rss/WSJcomUSBusiness.xml",  # Wall Street Journal US Business (updated URL)
+    "https://www.forbes.com/money/feed/",  # Forbes Money (updated URL)
+    "https://www.reuters.com/arc/outboundfeeds/business/?outputType=xml",  # Reuters Business (updated URL)
+    "https://www.bloomberg.com/feed/podcasts/markets.xml",  # Bloomberg Markets (updated URL)
     "https://finance.yahoo.com/news/rssindex",  # Yahoo Finance News
     "https://www.nasdaq.com/feed/rssoutbound",  # Nasdaq News
     "https://www.marketwatch.com/rss/topstories",  # MarketWatch Top Stories
     "https://www.theguardian.com/world/rss",  # The Guardian World
     "https://feeds.npr.org/1001/rss.xml",  # NPR News
     "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml",  # NYT Home Page
+    "https://apnews.com/hub/business?format=rss",  # Associated Press Business (updated URL)
+    "https://feeds.washingtonpost.com/rss/business",  # Washington Post Business (updated URL)
 ]
 headers = {
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
 }
         }}
       ]
     }}
     """
     try:
         completion = client.chat.completions.create(
         for chunk in completion:
             coverage_diff += chunk.choices[0].delta.content or ""
+        text = coverage_diff.strip()  # Fixed: removed space between 'text' and '='
         pattern = r'```json\s*([\s\S]*?)\s*```'
         match = re.search(pattern, text)
         if match:
+            json_str = match.group(1)  # Get the content between the markers
             try:
+                # Parse the JSON to verify it's valid and return as dictionary
                 json_dict = json.loads(json_str)
                 json_dict = json.dumps(json_dict, indent=4)
                 return json_dict
     except Exception as e:
         return f"Error in Groq API call: {str(e)}"
 def similarity_based_common_topics(processed_articles, similarity_threshold=0.8, min_articles=2):
     keyword_clusters = defaultdict(list)
     for article in processed_articles:
                 deduplicated_unique.add(topic)
         unique_topics[f"Unique Topics in Article {idx+1}"] = deduplicated_unique
     final_sentiment = max(sentiment_summary, key=sentiment_summary.get)
     # Add stock growth expectation based on sentiment
     if final_sentiment == "Positive":
         sentiment_statement = (f"{company_name}’s latest news coverage is mostly {final_sentiment.lower()}. "
     if not company_name:
         print("❌ Error: Company name is required")
         return None
+    file_name = f"{company_name}_news.json"
     articles = []
+    article_limit = 10  # Set desired article limit
     article_queue = queue.Queue()
     article_limit_reached = threading.Event()
+    print(f"🚀 Starting parallel fetching for {company_name}...")
+    # Use all RSS feeds for comprehensive search
+    with concurrent.futures.ThreadPoolExecutor(max_workers=20) as fetch_executor:
+        # Submit all RSS feed fetch tasks
+        fetch_futures = [fetch_executor.submit(
+            fetch_articles_from_rss,
+            url,
+            company_name,
+            article_queue,
+            article_limit_reached
+        ) for url in rss_feeds]
+        # Process articles concurrently
+        with concurrent.futures.ThreadPoolExecutor(max_workers=10) as process_executor:
             processing_futures = []
+            # Dynamic article processing loop
+            while len(articles) < article_limit:
                 try:
+                    # Get article with timeout
+                    article_data = article_queue.get(timeout=2)
+                    # Submit for processing
+                    future = process_executor.submit(
+                        process_article_content,
+                        article_data
+                    )
                     processing_futures.append(future)
                 except queue.Empty:
+                    # Check if we should continue waiting
+                    if all(f.done() for f in fetch_futures):
+                        print("⚠️ All feeds processed before reaching article limit")
+                        break
+            # Process completed articles
             for future in concurrent.futures.as_completed(processing_futures):
                 result = future.result()
                 if result:
                     articles.append(result)
+                    print(f"📊 Collected {len(articles)}/{article_limit} articles")
+                    # Exit immediately when limit reached
+                    if len(articles) >= article_limit:
                         article_limit_reached.set()
+                        print(f"✅ Reached {article_limit} articles. Stopping all operations.")
                         break
+    # Final article processing
     articles = articles[:article_limit]
     if not articles:
+        print(f"❌ No relevant articles found for {company_name}")
         return None
     print(f"✅ Saving {len(articles)} articles to {file_name}")
     analysis_result = comparative_analysis(articles, company_name)
     coverage_differences = get_coverage_differences(articles, company_name)
+    # Parse coverage_differences if it’s a string
+    if isinstance(coverage_differences, str):
+        try:
+            coverage_differences = json.loads(coverage_differences)
+        except json.JSONDecodeError as e:
+            print(f"❌ Failed to parse Coverage Differences: {e}")
+            coverage_differences = {"Coverage Differences": []}
     sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}
     for article in articles:
         sentiment_distribution[article["sentiment"]] += 1
     formatted_articles = [{"Title": article["title"], "Summary": article["summary"],
                            "Sentiment": article["sentiment"], "Topics": article["keywords"].split(", ")}
                           for article in articles]
     output_data = {
         "Company": company_name,
         "Articles": formatted_articles,
         },
         "Final Sentiment Analysis": analysis_result['Final Sentiment Analysis']
     }
     with open(file_name, "w", encoding="utf-8") as file:
+        json.dump(output_data, file, indent=4, ensure_ascii=False)
+    print(f"✅ File saved successfully as JSON: {file_name}")
     return file_name
 if __name__ == "__main__":