Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import socket | |
| import logging | |
| import time | |
| import re | |
| from bs4 import BeautifulSoup | |
| from urllib.parse import urlparse, urljoin | |
| from typing import Dict, Tuple, Optional | |
| from google import genai | |
| # ------------------------------------------------- | |
| # Logging setup | |
| # ------------------------------------------------- | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s | %(levelname)s | %(message)s", | |
| ) | |
| logger = logging.getLogger(__name__) | |
| logger.info("AI Website Review Tool starting up") | |
| # ------------------------------------------------- | |
| # Constants | |
| # ------------------------------------------------- | |
| TIMEOUT = 15 | |
| MAX_RETRIES = 2 | |
| CONTENT_LIMIT = 12000 | |
| # ----------------------------- | |
| # URL Validation & Normalization | |
| # ----------------------------- | |
| def normalize_url(url: str) -> str: | |
| """Normalize and validate URL format.""" | |
| url = url.strip() | |
| parsed = urlparse(url) | |
| if not parsed.scheme: | |
| url = "https://" + url | |
| return url | |
| def validate_url(url: str) -> Tuple[bool, str]: | |
| """Validate URL format and accessibility.""" | |
| try: | |
| parsed = urlparse(url) | |
| if not parsed.netloc: | |
| return False, "Invalid URL format. Please include domain name." | |
| # Check for obviously invalid domains | |
| if len(parsed.netloc) < 4 or '.' not in parsed.netloc: | |
| return False, "Invalid domain name." | |
| return True, "" | |
| except Exception as e: | |
| return False, f"URL validation error: {str(e)}" | |
| # ----------------------------- | |
| # Proxy Option (if AFC blocks direct requests) | |
| # ----------------------------- | |
| USE_PROXY = False # Set to True if you need to use a proxy service | |
| def fetch_via_proxy(url: str) -> str: | |
| """Fetch content via a proxy service (for AFC restrictions).""" | |
| # Option 1: ScraperAPI (free tier available) | |
| # proxy_url = f"http://api.scraperapi.com?api_key=YOUR_KEY&url={url}" | |
| # Option 2: WebScraping.AI (free tier available) | |
| # proxy_url = f"https://api.webscraping.ai/html?api_key=YOUR_KEY&url={url}" | |
| # Option 3: ScrapingBee (free tier available) | |
| proxy_url = f"https://app.scrapingbee.com/api/v1/?api_key=YOUR_KEY&url={url}" | |
| response = requests.get(proxy_url, timeout=30) | |
| response.raise_for_status() | |
| return response.text | |
| # ----------------------------- | |
| # Enhanced Content Extraction | |
| # ----------------------------- | |
| def extract_website_info(soup: BeautifulSoup, url: str) -> Dict[str, str]: | |
| """Extract key website elements for analysis.""" | |
| info = {} | |
| # Title | |
| info['title'] = soup.title.string.strip() if soup.title else "" | |
| # Meta description | |
| meta_desc = soup.find("meta", attrs={"name": "description"}) | |
| info['meta_description'] = meta_desc.get("content", "").strip() if meta_desc else "" | |
| # Headings | |
| info['h1'] = soup.find("h1").get_text(strip=True) if soup.find("h1") else "" | |
| h2_tags = soup.find_all("h2", limit=5) | |
| info['h2s'] = " | ".join([h2.get_text(strip=True) for h2 in h2_tags]) | |
| # CTAs (buttons and prominent links) | |
| cta_patterns = ['button', 'btn', 'cta', 'call-to-action'] | |
| ctas = [] | |
| for pattern in cta_patterns: | |
| elements = soup.find_all(class_=re.compile(pattern, re.I)) | |
| ctas.extend([el.get_text(strip=True) for el in elements[:3]]) | |
| info['ctas'] = " | ".join(ctas[:5]) if ctas else "No clear CTAs found" | |
| # Contact information | |
| contact_indicators = soup.find_all(string=re.compile(r'contact|email|phone|call', re.I)) | |
| info['has_contact'] = len(contact_indicators) > 0 | |
| # Links analysis | |
| links = soup.find_all('a', href=True) | |
| info['total_links'] = len(links) | |
| external_links = [l for l in links if urlparse(l['href']).netloc and urlparse(l['href']).netloc != urlparse(url).netloc] | |
| info['external_links'] = len(external_links) | |
| return info | |
| def fetch_website_text(url: str) -> Tuple[str, bool]: | |
| """ | |
| Fetch and parse website content. | |
| Returns (content_string, success_boolean) | |
| """ | |
| socket.setdefaulttimeout(TIMEOUT) | |
| headers = { | |
| "User-Agent": ( | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " | |
| "AppleWebKit/537.36 (KHTML, like Gecko) " | |
| "Chrome/121.0 Safari/537.36" | |
| ), | |
| "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
| "Accept-Language": "en-US,en;q=0.9", | |
| "Accept-Encoding": "gzip, deflate", | |
| "DNT": "1", | |
| } | |
| for attempt in range(MAX_RETRIES): | |
| try: | |
| if USE_PROXY: | |
| html_content = fetch_via_proxy(url) | |
| soup = BeautifulSoup(html_content, "html.parser") | |
| break | |
| else: | |
| response = requests.get( | |
| url, | |
| headers=headers, | |
| timeout=TIMEOUT, | |
| allow_redirects=True, | |
| ) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| break | |
| except requests.exceptions.RequestException as e: | |
| if attempt == MAX_RETRIES - 1: | |
| raise | |
| time.sleep(1) | |
| # Remove noisy tags | |
| for tag in soup(["script", "style", "noscript", "iframe", "nav", "footer"]): | |
| tag.decompose() | |
| # Extract structured info | |
| info = extract_website_info(soup, url) | |
| # Body content | |
| body_text = " ".join(soup.stripped_strings) | |
| body_text = body_text[:CONTENT_LIMIT] | |
| content = f""" | |
| PAGE TITLE: {info['title']} | |
| META DESCRIPTION: {info['meta_description']} | |
| PRIMARY H1: {info['h1']} | |
| KEY H2 HEADINGS: {info['h2s']} | |
| CALL-TO-ACTION BUTTONS: {info['ctas']} | |
| CONTACT INFO PRESENT: {"Yes" if info['has_contact'] else "No"} | |
| LINK ANALYSIS: {info['total_links']} total links, {info['external_links']} external | |
| VISIBLE CONTENT: | |
| {body_text} | |
| """ | |
| return content, True | |
| def fetch_website_text_safe(url: str) -> Tuple[str, bool]: | |
| """Safe wrapper that never crashes.""" | |
| try: | |
| return fetch_website_text(url) | |
| except requests.exceptions.Timeout: | |
| return """β οΈ Website took too long to respond (timeout). | |
| This might indicate slow server performance. | |
| Analysis will be based on URL structure and general best practices.""", False | |
| except requests.exceptions.SSLError: | |
| return """β οΈ SSL Certificate error detected. | |
| This is a major trust issue that should be fixed immediately. | |
| Analysis will include this critical security concern.""", False | |
| except requests.exceptions.ConnectionError: | |
| return """β οΈ Could not connect to website. | |
| Website may be down or have DNS issues. | |
| Analysis will be based on general best practices.""", False | |
| except Exception as e: | |
| return f"""β οΈ Unable to fully fetch website content. | |
| Error: {str(e)} | |
| Analysis will be based on available information and general best practices.""", False | |
| # ----------------------------- | |
| # Gemini Analysis | |
| # ----------------------------- | |
| def analyze_website(api_key: str, url: str, industry: str, goal: str) -> str: | |
| """Main analysis function.""" | |
| # Validate inputs | |
| if not api_key or len(api_key) < 20: | |
| return "β Please enter a valid Gemini API key. Get one at https://aistudio.google.com/apikey" | |
| if not url: | |
| return "β Please enter a website URL." | |
| # Normalize and validate URL | |
| url = normalize_url(url) | |
| is_valid, error_msg = validate_url(url) | |
| if not is_valid: | |
| return f"β {error_msg}" | |
| try: | |
| # Initialize client | |
| try: | |
| client = genai.Client(api_key=api_key) | |
| except Exception as e: | |
| return f"β Invalid API key. Please check your Gemini API key.\nError: {str(e)}" | |
| # Fetch website content | |
| website_text, fetch_success = fetch_website_text_safe(url) | |
| fetch_status = "β Full content analysis" if fetch_success else "β οΈ Limited analysis" | |
| # Build enhanced prompt | |
| prompt = f"""You are an AI consultant with the company Esquire IT helping small businesses improve their websites. | |
| Business Context: | |
| - Industry: {industry} | |
| - Primary Goal: {goal} | |
| - URL: {url} | |
| - Content Fetch Status: {fetch_status} | |
| Analyze the website content below and provide a comprehensive business-focused review. | |
| Structure your response with clear sections: | |
| ## 1. Messaging Clarity (Score: X/10) | |
| **Main Issue:** [One sentence summary] | |
| **Recommendations:** | |
| - [Specific actionable item] | |
| - [Specific actionable item] | |
| - [Specific actionable item] | |
| ## 2. Conversion Effectiveness (Score: X/10) | |
| **Main Issue:** [One sentence summary] | |
| **Recommendations:** | |
| - [Specific actionable item] | |
| - [Specific actionable item] | |
| - [Specific actionable item] | |
| ## 3. Trust & Credibility (Score: X/10) | |
| **Main Issue:** [One sentence summary] | |
| **Recommendations:** | |
| - [Specific actionable item] | |
| - [Specific actionable item] | |
| - [Specific actionable item] | |
| ## 4. User Experience Issues | |
| - [Issue 1] | |
| - [Issue 2] | |
| - [Issue 3] | |
| ## 5. AI & Automation Opportunities | |
| For a {industry} business with limited tech resources: | |
| - [Practical AI tool/solution #1] | |
| - [Practical AI tool/solution #2] | |
| - [Practical AI tool/solution #3] | |
| ## Summary | |
| **Overall Score:** X/100 | |
| **Top 3 Priority Fixes:** | |
| 1. [Most urgent fix] | |
| 2. [Second priority] | |
| 3. [Third priority] | |
| Use clear, non-technical language that a small business owner would understand. | |
| Website Content: | |
| {website_text} | |
| """ | |
| # Generate analysis | |
| response = client.models.generate_content( | |
| model="gemini-2.5-flash-lite", | |
| contents=prompt, | |
| ) | |
| result = f"# Analysis for {url}\n\n{response.text}" | |
| if not fetch_success: | |
| result += "\n\n---\nβ οΈ **Note:** Analysis was performed with limited content due to website access issues." | |
| return result | |
| except Exception as e: | |
| logger.error(f"Analysis error: {str(e)}") | |
| return f"β Error during analysis: {str(e)}\n\nPlease check your API key and try again." | |
| # ----------------------------- | |
| # Gradio UI | |
| # ----------------------------- | |
| with gr.Blocks( | |
| title="AI Website Review Tool", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container {max-width: 900px !important} | |
| #output {min-height: 500px} | |
| """ | |
| ) as demo: | |
| gr.Markdown("# π Esquire IT AI Website Review Tool") | |
| gr.Markdown( | |
| "Get actionable insights to improve your small business website using Esquire IT's AI analysis." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| api_key = gr.Textbox( | |
| label="π Gemini API Key", | |
| placeholder="Paste your Gemini API key here", | |
| type="password", | |
| info="Get your free API key at https://aistudio.google.com/apikey", | |
| ) | |
| url = gr.Textbox( | |
| label="π Website URL", | |
| placeholder="example.com or https://example.com", | |
| info="Enter the homepage or any page you want analyzed", | |
| ) | |
| # gr.Examples( | |
| # examples=[ | |
| # ["https://www.stripe.com"], | |
| # ["https://www.shopify.com"], | |
| # ], | |
| # inputs=url, | |
| # label="Try example websites", | |
| # ) | |
| with gr.Row(): | |
| industry = gr.Dropdown( | |
| label="π’ Industry", | |
| choices=[ | |
| "General SMB", | |
| "Law Firm", | |
| "Hospitality", | |
| "Healthcare", | |
| "Real Estate", | |
| "E-commerce", | |
| "Consulting", | |
| "Restaurant", | |
| "Fitness", | |
| "Education", | |
| ], | |
| value="General SMB", | |
| ) | |
| goal = gr.Dropdown( | |
| label="π― Primary Goal", | |
| choices=[ | |
| "Generate leads", | |
| "Sell products", | |
| "Sell services", | |
| "Build credibility", | |
| "Educate visitors", | |
| "Book appointments", | |
| ], | |
| value="Generate leads", | |
| ) | |
| analyze_btn = gr.Button("π Analyze Website", variant="primary", size="lg") | |
| with gr.Row(): | |
| output = gr.Markdown(elem_id="output") | |
| analyze_btn.click( | |
| fn=analyze_website, | |
| inputs=[api_key, url, industry, goal], | |
| outputs=output, | |
| ) | |
| demo.launch() |