Spaces:

z4hid
/

spark-competitors

Sleeping

File size: 12,774 Bytes

from crewai_tools import BaseTool
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
from crewai import Agent, Task, Crew, LLM, Process
from crewai_tools import ScrapeWebsiteTool
from crewai_tools import EXASearchTool
from crewai_tools import ScrapeWebsiteTool
from pydantic import BaseModel
import json
import re
import streamlit as st
import os

class WebsiteAnalyzerTool(BaseTool):
    name: str = "WebsiteAnalyzer"
    description: str = "A tool for comprehensive website analysis, including UX assessment, loading speed, mobile responsiveness, content quality, CTA effectiveness, and SEO strategies."

    def _run(self, url: str) -> str:
        analyzer = WebsiteAnalyzer(url)
        results = analyzer.run_analysis()
        return self.format_results(results)

    def format_results(self, results):
        formatted = "Website Analysis Results:\n\n"
        for key, value in results.items():
            formatted += f"{key}:\n{value}\n\n"
        return formatted

class WebsiteAnalyzer:
    def __init__(self, url):
        self.url = url
        self.soup = None
        self.driver = None

    def fetch_page(self):
        response = requests.get(self.url)
        self.soup = BeautifulSoup(response.content, 'html.parser')

    def setup_selenium(self):
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        self.driver = webdriver.Chrome(options=chrome_options)

    def analyze_ux(self):
        ux_score = 0
        if self.soup.find('nav'):
            ux_score += 1
        if self.soup.find('footer'):
            ux_score += 1
        if self.soup.find('form'):
            ux_score += 1
        return f"UX Score: {ux_score}/3"

    def analyze_loading_speed(self):
        start_time = time.time()
        requests.get(self.url)
        load_time = time.time() - start_time
        return f"Loading Speed: {load_time:.2f} seconds"

    def analyze_mobile_responsiveness(self):
        self.setup_selenium()
        self.driver.get(self.url)
        self.driver.set_window_size(375, 812)  # iPhone X dimensions
        viewport = self.driver.find_element("css selector", "meta[name='viewport']")
        has_viewport = viewport is not None
        scroll_width = self.driver.execute_script("return document.body.scrollWidth")
        client_width = self.driver.execute_script("return document.documentElement.clientWidth")
        no_horizontal_scroll = scroll_width <= client_width
        self.driver.quit()
        return f"Mobile Responsive: {'Yes' if has_viewport and no_horizontal_scroll else 'No'}"

    def analyze_content_quality(self):
        paragraphs = self.soup.find_all('p')
        word_count = sum(len(p.text.split()) for p in paragraphs)
        headings = self.soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
        return f"Word Count: {word_count}, Number of Headings: {len(headings)}"

    def analyze_cta(self):
        buttons = self.soup.find_all('button')
        links = self.soup.find_all('a', class_=['btn', 'button', 'cta'])
        cta_count = len(buttons) + len(links)
        return f"Number of potential CTAs: {cta_count}"

    def analyze_seo(self):
        title = self.soup.title.string if self.soup.title else "No title"
        meta_description = self.soup.find('meta', attrs={'name': 'description'})
        meta_description = meta_description['content'] if meta_description else "No meta description"
        h1_tags = len(self.soup.find_all('h1'))
        return f"Title: {title}\nMeta Description: {meta_description}\nNumber of H1 tags: {h1_tags}"
    
    def analyze_social_media(self):
        social_media = {
            "Facebook": None,
            "Twitter": None,
            "LinkedIn": None,
            "Instagram": None,
            "YouTube": None
        }
        # Look for common patterns for social media links
        for a_tag in self.soup.find_all('a', href=True):
            href = a_tag['href'].lower()
            if "facebook.com" in href:
                social_media["Facebook"] = a_tag['href']
            elif "twitter.com" in href or "x.com" in href:
                social_media["Twitter"] = a_tag['href']
            elif "linkedin.com" in href:
                social_media["LinkedIn"] = a_tag['href']
            elif "instagram.com" in href:
                social_media["Instagram"] = a_tag['href']
            elif "youtube.com" in href:
                social_media["YouTube"] = a_tag['href']

        # Format the output to include the links
        formatted_social_media = [f"{platform}: {link}" for platform, link in social_media.items() if link]
        return f"Social Media Presence:\n" + "\n".join(formatted_social_media) if formatted_social_media else "Social Media Presence: None"

    def suggest_improvements(self):
        suggestions = []
        if len(self.soup.find_all('img', alt="")) > 0:
            suggestions.append("Add alt text to all images for better accessibility and SEO.")
        if not self.soup.find('meta', attrs={'name': 'description'}):
            suggestions.append("Add a meta description for better search engine results.")
        if len(self.soup.find_all('a', href="#")) > 0:
            suggestions.append("Replace empty links with meaningful ones or remove them.")
        return "\n".join(suggestions)

    def run_analysis(self):
        self.fetch_page()
        results = {
            "UX Assessment": self.analyze_ux(),
            "Loading Speed": self.analyze_loading_speed(),
            "Mobile Responsiveness": self.analyze_mobile_responsiveness(),
            "Content Quality": self.analyze_content_quality(),
            "CTA Effectiveness": self.analyze_cta(),
            "SEO Strategies": self.analyze_seo(),
            "Social Media Analysis": self.analyze_social_media(),  # Added social media analysis
            "Improvement Suggestions": self.suggest_improvements()
        }
        return results


# Initialize the LLM with Groq configuration
llm = LLM(
    model="groq/llama-3.2-90b-text-preview",  # You can change this to other Groq models
    api_key=os.environ['GROQ_API_KEY'],
    base_url="https://api.groq.com/openai/v1",
    temperature=0
)

llm_mistral = LLM(
    model="groq/gemma2-9b-it",
    api_key=os.environ['GROQ_API_KEY'],
    base_url="https://api.groq.com/openai/v1",
    temperature=0
)


scrape_tool = ScrapeWebsiteTool()

website_analyzer_tool = WebsiteAnalyzerTool()


# Define the Competitor Information Analyst agent
competitor_analyst = Agent(
    role="Competitor Information Analyst",
    goal="Provide in-depth competitor analysis by collecting essential business and website information.",
    backstory=(
        "You are an experienced Competitor Information Analyst with 8 years of expertise in identifying market competitors "
        "and uncovering valuable insights. Your strength lies in analyzing direct, indirect, and replacement competitors, "
        "highlighting their strengths and weaknesses, pricing strategies, and customer sentiment. You leverage this information "
        "to provide businesses with actionable strategies for market dominance and improved customer engagement."
    ),
    tools=[scrape_tool],
    llm=llm_mistral,
    verbose=True,
    max_iter=5,
    max_retry_limit=3,
)


report_writer = Agent(
    role="Competitor Report Writer",
    goal="Generate detailed competitor analysis reports and dashboard insights by synthesizing data from both Web Analyst and Competitor Information Analyst outputs.",
    backstory="You are responsible for creating insightful and comprehensive markdown reports, summarizing key competitor data including their basic information, market presence, product offerings, pricing strategy, website analysis, and customer sentiment. Your role is to provide actionable insights based on the data collected by the Web Analyst and Competitor Information Analyst.",
    llm=llm,
    verbose=True,
    max_iter=5,  # Increase iteration limit for more thorough outputs
    max_retry_limit=3,  # Allow more retries to ensure robust output
)


# Define the content analysis task for the Competitor Information Analyst agent
content_analysis_task = Task(
    description=(
        "Perform a thorough analysis of the website at the provided URL: {url}. Extract key competitor details and relevant business information, "
        "and provide insights into their market positioning. The analysis should include:"
        "\n1. **Company Information**: Company name, URL, contact email, phone, and location."
        "\n2. **Product/Service Offerings**: List of products or services offered, including any unique selling points."
        "\n3. **Strengths and Weaknesses**: Identify the key strengths and weaknesses of the competitor based on their website content."
        "\n4. **Target Market**: Analyze the competitor's target audience and market reach."
        "\n5. **Pricing Strategy**: Provide insights into their pricing structure and strategy (if available)."
        "\n6. **Customer Testimonials or Sentiment**: Review and summarize customer feedback from testimonials or reviews."
        "\n7. **User Experience (UX)**: Evaluate the overall user experience of the website, including design and ease of use."
        "\n8. **Call-to-Action (CTA) Effectiveness**: Assess the clarity and effectiveness of any calls-to-action present on the site."
    ),
    expected_output=(
        "A structured JSON report containing:"
        "\n- Company Information: Name:\n, URL:\n, contact details:\n, location:\n."
        "\n- Product/Service Offerings: Detailed list with unique selling points."
        "\n- Strengths and Weaknesses: Key strengths and weaknesses based on website content."
        "\n- Target Market: Identified audience and market positioning."
        "\n- Pricing Strategy: Pricing details and strategy (if available)."
        "\n- Customer Testimonials or Sentiment: Summary of customer reviews."
        "\n- UX Assessment: Evaluation of user experience."
        "\n- CTA Effectiveness: Analysis of call-to-action elements."
    ),
    agent=competitor_analyst,

)

# Create a task for competitor analysis using the defined model
competitor_analysis_task = Task(
    description=(
        """Create a detailed markdown report for {url} that includes the following:
        Competitor's basic information (company name, URL, email, location).
        Operating markets and regions.
        Products/services offered and pricing strategy.
        Strengths and weaknesses in their website and user experience.
        General customer sentiment from reviews or testimonials.
        Detailed analysis of their website based on {ux}:
            UX Assessment
            Loading Speed
            Mobile Responsiveness
            Content Quality
            CTA Effectiveness
            SEO Strategies
        Social Media Analysis
        Suggestions for improvement.
        Use the outputs from both Competitor Information Analyst to generate a comprehensive report and JSON summary"""
    ),
    agent=report_writer,
    expected_output=(
        "A structured markdown report with sections for basic info, operating markets, "
        "products/services, pricing strategy, website strengths/weaknesses, and customer sentiment. "
        "Additionally, a JSON summary containing key extracted insights."
    ),
    # output_pydantic=CompetitorAnalysis,  # Use a custom Pydantic model for structured JSON output
    context=[content_analysis_task],
    output_file=f"competitor_analysis.md",
    
)

# Create a crew with the agent and task
crew = Crew(
    agents=[competitor_analyst, report_writer],
    tasks=[content_analysis_task, competitor_analysis_task],
    verbose=True,
)

# Streamlit application
st.set_page_config(page_title="Spark Competitor Analysis", page_icon=":bar_chart:")
st.title("BrainSpark Competitor Analysis")

# Get the competitor URL from the user
competitor_url = st.text_input("Enter the competitor website URL:")

if st.button("Analyze Competitor"):
    if competitor_url:
        analyzes = WebsiteAnalyzer(url=competitor_url)
        analyze=analyzes.run_analysis()

        # Run the competitor analysis task
        crew_output = crew.kickoff(inputs={'url': competitor_url, 'ux': analyze})
        report = crew_output.raw

        # Display the report
        st.markdown(report)

        # Allow the user to download the report
        st.download_button(
            label="Download Competitor Analysis Report",
            data=report,
            file_name="competitor_analysis.md",
            mime="text/markdown",
        )
    else:
        st.error("Please enter a competitor website URL.")