Spaces:

Sakshi2005
/

Audit_AI

Runtime error

App Files Files Community

Sakshi2005 commited on Feb 18

Commit

27697ee

verified ·

1 Parent(s): 710b938

Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

.env.example +4 -0
.gitignore +39 -0
.gradio/certificate.pem +31 -0
README.md +94 -12
README_GRADIO.md +288 -0
accessibility_checker.py +90 -0
ai_analyzer.py +78 -0
app_gradio.py +319 -0
history_tracker.py +69 -0
link_checker.py +74 -0
mobile_checker.py +89 -0
report_generator.py +163 -0
requirements.txt +11 -0
scanner.py +56 -0
scoring.py +25 -0
utils.py +24 -0

.env.example ADDED Viewed

	@@ -0,0 +1,4 @@

+# Google Gemini API Configuration
+GEMINI_API_KEY=your_gemini_api_key_here
+# Get your API key from: https://aistudio.google.com/app/apikey

.gitignore ADDED Viewed

	@@ -0,0 +1,39 @@

+# Environment variables
+.env
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+dist/
+build/
+*.egg
+# Virtual environments
+venv/
+env/
+ENV/
+.venv
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+desktop.ini
+# Project specific
+audit_history.json
+audit_report_*.pdf
+*.log
+# Gradio
+flagged/

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

README.md CHANGED Viewed

@@ -1,12 +1,94 @@
----
-title: Audit AI
-emoji: 👀
-colorFrom: blue
-colorTo: blue
-sdk: gradio
-sdk_version: 6.6.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Audit_AI
+app_file: app_gradio.py
+sdk: gradio
+sdk_version: 5.47.2
+---
+# 🧠 AuditAI — AI Website Auditor
+An **Agentic AI-powered web application** built with **Gradio** that audits any website and provides **SEO, performance, accessibility, and security insights**, along with **AI-generated fixes and optimized HTML**.
+## 📌 Features
+- 🔍 **Website Scanning**
+  - Page load time
+  - HTTPS detection
+  - Page size analysis
+  - Internal vs external links
+  - Headings structure (H1, H2, H3)
+  - Images without ALT attributes
+  - Scripts, paragraphs, and links count
+- 🤖 **Agentic AI Analysis**
+  - Automatically detects website issues
+  - Provides actionable AI-powered suggestions
+  - Generates **HTML & SEO fix snippets**
+  - Produces **fully optimized HTML**
+  - Extracts top SEO keywords
+  - Analyzes heading hierarchy
+- 📊 **Interactive Dashboard**
+  - Overall website score
+  - SEO, Performance, Accessibility & Security scores
+  - Gauge & radar charts
+  - Bar charts & pie charts
+  - Keyword word cloud
+  - Heading hierarchy treemap
+  - Page element heatmap
+- ⬇️ **Download Optimized HTML**
+  - One-click download of AI-improved webpage
+---
+## 🔍 Usage
+1. Run the app locally using Gradio.
+2. Enter a valid website URL.
+3. Click **🚀 Start Audit**.
+4. View:
+   - ⚠️ Detected issues
+   - ✅ AI-generated suggestions
+   - 📊 Visual audit dashboard
+   - 🤖 Agentic AI fixes
+   - 📄 PDF Reports
+5. Download the **optimized HTML** or **PDF report** if available.
+---
+---
+## 📊 How It Works
+1. The app scans the website using **BeautifulSoup & Requests**.
+2. Raw metrics are calculated (SEO, performance, accessibility, mobile, security).
+3. Scan data is sent to **Google Gemini** for agentic analysis.
+4. AI returns:
+   - Issues
+   - Suggestions
+   - Fix snippets
+   - Optimized HTML
+5. Results are visualized in a rich Gradio dashboard.
+---
+## ⚙️ Tech Stack
+- **Python 3.9+**
+- **Gradio** — Web UI
+- **Google Gemini API** — Agentic AI analysis
+- **BeautifulSoup** — HTML parsing
+- **Requests** — Web scraping
+- **Plotly & Matplotlib** — Interactive charts
+- **WordCloud** — Keyword visualization
+- **FPDF** — PDF report generation
+- **dotenv** — Environment variables
+---
+---
+## 👨‍💻 Author
+**Sakshi Gupta**

README_GRADIO.md ADDED Viewed

	@@ -0,0 +1,288 @@

+# 🧠 AuditAI — Enhanced Agentic AI Website Auditor (Gradio Edition)
+An **Agentic AI-powered web application** built with **Gradio** that provides comprehensive website audits including **SEO, performance, accessibility, security, mobile responsiveness**, and **broken link detection** with **AI-generated insights and PDF reports**.
+---
+## 🆕 What's New in Gradio Edition
+### **Enhanced Features:**
+- ✅ **Accessibility Checker** - WCAG 2.1 compliance analysis
+- ✅ **Mobile Responsiveness Analyzer** - Viewport, responsive images, touch targets
+- ✅ **Broken Link Detection** - Parallel link checking with detailed reports
+- ✅ **PDF Report Generation** - Professional downloadable audit reports
+- ✅ **Historical Tracking** - Track score improvements over time
+- ✅ **Trend Analysis** - Visualize performance changes across audits
+- ✅ **Enhanced UI** - Modern Gradio tabbed interface with better UX
+### **Original Features (Retained):**
+- 🔍 Website scanning (load time, HTTPS, page size, links, headings)
+- 🤖 Agentic AI analysis with Google Gemini 1.5 Flash
+- 📊 Interactive visualizations (gauges, radar charts, bar charts)
+- ⬇️ Downloadable optimized HTML
+- 💡 AI-powered suggestions and fix snippets
+---
+## 🚀 Quick Start
+### 1️⃣ Install Dependencies
+```bash
+pip install -r requirements.txt
+```
+### 2️⃣ Set Up Gemini API Key
+Create a `.env` file in the project root:
+```env
+GEMINI_API_KEY=your_gemini_api_key_here
+```
+### 3️⃣ Run the Gradio App
+```bash
+python app_gradio.py
+```
+The app will launch at `http://localhost:7860` with a shareable link.
+### 4️⃣ Run the Original Streamlit App (Optional)
+```bash
+streamlit run app.py
+```
+---
+## 📋 New Features Details
+### **♿ Accessibility Checker** (`accessibility_checker.py`)
+Analyzes WCAG 2.1 compliance:
+- Missing alt text on images
+- Proper heading hierarchy (H1-H6)
+- Form labels and ARIA landmarks
+- Link text quality
+- Language attributes
+- Skip navigation links
+- Video captions
+### **📱 Mobile Responsiveness** (`mobile_checker.py`)
+Checks mobile-friendliness:
+- Viewport meta tag validation
+- Responsive images (srcset/sizes)
+- Page size optimization for mobile
+- Flash content detection
+- Fixed-width elements
+- Touch target sizes
+- Media queries analysis
+- Relative font sizing
+### **🔗 Broken Link Detector** (`link_checker.py`)
+Identifies broken links:
+- Parallel processing for speed (10 concurrent workers)
+- Checks up to 50 links per audit
+- HTTP status code validation
+- Internal vs external link tracking
+- Detailed error reporting
+### **📄 PDF Report Generator** (`report_generator.py`)
+Creates professional reports:
+- Multi-page comprehensive audit summary
+- Color-coded scores and metrics
+- All detected issues organized by category
+- AI recommendations
+- Broken link details
+- Timestamp and metadata
+### **📈 Historical Tracking** (`history_tracker.py`)
+Tracks performance over time:
+- JSON-based storage (last 100 audits)
+- Per-site history retrieval
+- Trend data for visualizations
+- Score comparison across audits
+---
+## 🎨 Gradio UI Structure
+The new interface uses **5 tabs**:
+1. **📊 Overview** - Summary, scores, gauge & radar charts
+2. **📈 Metrics & Trends** - Technical metrics and historical trends
+3. **⚠️ Issues** - AI, accessibility, mobile, and broken link issues
+4. **✅ Recommendations** - AI-powered suggestions
+5. **📄 PDF Report** - Download comprehensive report
+---
+## 📊 Scoring System
+### **Overall Score Calculation** (0-100)
+Based on:
+- HTTPS (15 points)
+- Load time (5-15 points)
+- Title presence (10 points)
+- Meta description (10 points)
+- H1 tags (5-10 points)
+- Images with alt text (up to 10 points)
+- Links & scripts (up to 10 points)
+- Paragraph content (up to 10 points)
+- HTTP status (10 points)
+### **Individual Scores**
+- **SEO Score:** `100 - (images_without_alt × 5)`
+- **Performance Score:** `100 - (load_time × 10)`
+- **Accessibility Score:** WCAG compliance based (0-100)
+- **Security Score:** 100 if HTTPS, else 50
+- **Mobile Score:** Mobile-friendliness based (0-100)
+---
+## 🔧 Tech Stack
+### **Core Technologies**
+- **Python 3.9+**
+- **Gradio 4.x** — Modern web UI framework
+- **Google Gemini API** — Gemini 1.5 Flash for AI analysis
+- **BeautifulSoup4** — HTML parsing
+- **Requests** — HTTP client
+### **Visualization & Reports**
+- **Plotly** — Interactive charts (gauges, radar, bar)
+- **Matplotlib** — Word clouds
+- **Pandas** — Data manipulation
+- **FPDF** — PDF generation
+### **Other**
+- **python-dotenv** — Environment variables
+- **concurrent.futures** — Parallel link checking
+---
+## 📁 Project Structure
+```
+AuditAI-main/
+├── app.py                      # Original Streamlit app
+├── app_gradio.py               # NEW: Gradio app
+├── scanner.py                  # Website scanner
+├── ai_analyzer.py              # OpenAI integration
+├── scoring.py                  # Score calculation
+├── dashboard.py                # Streamlit dashboard
+├── utils.py                    # Utility functions
+├── accessibility_checker.py    # NEW: Accessibility analysis
+├── mobile_checker.py           # NEW: Mobile responsiveness
+├── link_checker.py             # NEW: Broken link detection
+├── report_generator.py         # NEW: PDF generation
+├── history_tracker.py          # NEW: Historical tracking
+├── requirements.txt            # Dependencies
+├── README.md                   # Original readme
+├── README_GRADIO.md           # This file
+└── .env                        # API keys (create this)
+```
+---
+## 🎯 Usage Guide
+1. **Enter URL:** Input the website URL (e.g., `https://example.com`)
+2. **Choose Options:** Check/uncheck "Check for Broken Links" (optional, slower)
+3. **Click Audit:** Start the comprehensive analysis
+4. **View Results:**
+   - Overview tab shows summary and scores
+   - Issues tab lists all detected problems
+   - Recommendations tab shows AI suggestions
+   - PDF tab provides downloadable report
+5. **Track Progress:** Re-audit the same site to see trend improvements
+---
+## ⚡ Performance Notes
+- **Broken Link Checking:** Uses parallel processing (10 workers) but can take 30-60s for 50 links
+- **AI Analysis:** Powered by Google Gemini AI | Enhanced with Advanced Analytics
+- **PDF Generation:** Instant (<1s)
+- **Historical Trends:** Only show after 2+ audits of the same site
+---
+## 🔒 Environment Variables
+Required in `.env` file:
+```env
+GEMINI_API_KEY=your-gemini-key-here
+```
+---
+## 🆚 Gradio vs Streamlit
+### **Why Gradio?**
+- ✅ Easier deployment (built-in sharing)
+- ✅ Better tab organization
+- ✅ Cleaner API for complex workflows
+- ✅ Automatic shareable links
+- ✅ Better mobile experience
+### **Keeping Streamlit?**
+Both versions are maintained. Use:
+- `app_gradio.py` for the enhanced version
+- `app.py` for the original Streamlit version
+---
+## 👨‍💻 Author
+**Mirza Yasir Abdullah Baig**
+- 🌐 [Kaggle](https://www.kaggle.com/mirzayasirabdullah07)
+- 💼 [LinkedIn](https://www.linkedin.com/in/mirza-yasir-abdullah-baig/)
+- 💻 [GitHub](https://github.com/mirzayasirabdullahbaig07)
+---
+## 📝 License
+Educational purposes. Not for commercial use without permission.
+---
+## 🐛 Troubleshooting
+**Issue:** Gemini API errors
+**Solution:** Check your API key in `.env` and get it from https://aistudio.google.com/app/apikey
+**Issue:** Broken link checking takes too long
+**Solution:** Uncheck the "Check for Broken Links" option
+**Issue:** PDF generation fails
+**Solution:** Ensure `fpdf` is installed: `pip install fpdf`
+**Issue:** No trend data shown
+**Solution:** Audit the same site multiple times to build history
+---
+## 🚀 Future Enhancements
+- [ ] Multi-page website crawling
+- [ ] Competitor comparison
+- [ ] Lighthouse integration
+- [ ] Email report scheduling
+- [ ] Database storage (replace JSON)
+- [ ] Custom scoring weights
+- [ ] Screenshot capture
+- [ ] Security header analysis
+---
+## 📸 Screenshots
+Coming soon! Run the app to see the beautiful new Gradio interface.
+---
+**Enjoy auditing! 🎉**

accessibility_checker.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from bs4 import BeautifulSoup
+def check_accessibility(soup, url):
+    """
+    Checks WCAG 2.1 accessibility guidelines
+    Returns dict with accessibility issues and score
+    """
+    issues = []
+    score = 100
+    # Check for missing alt text on images
+    images = soup.find_all('img')
+    images_without_alt = [img for img in images if not img.get('alt')]
+    if images_without_alt:
+        issues.append(f"❌ {len(images_without_alt)} images missing alt text")
+        score -= min(20, len(images_without_alt) * 2)
+    # Check for proper heading hierarchy
+    headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
+    h1_count = len(soup.find_all('h1'))
+    if h1_count == 0:
+        issues.append("❌ No H1 heading found - important for screen readers")
+        score -= 10
+    elif h1_count > 1:
+        issues.append(f"⚠️ Multiple H1 headings ({h1_count}) - should be unique")
+        score -= 5
+    # Check for form labels
+    forms = soup.find_all('form')
+    for form in forms:
+        inputs = form.find_all(['input', 'select', 'textarea'])
+        for input_elem in inputs:
+            if input_elem.get('type') not in ['submit', 'button', 'hidden']:
+                label_id = input_elem.get('id')
+                if not label_id or not form.find('label', {'for': label_id}):
+                    issues.append("❌ Form inputs missing associated labels")
+                    score -= 5
+                    break
+    # Check for color contrast (basic check)
+    inline_styles = soup.find_all(style=True)
+    if inline_styles:
+        issues.append("⚠️ Inline styles detected - may affect accessibility")
+        score -= 3
+    # Check for ARIA landmarks
+    main_tag = soup.find('main')
+    nav_tag = soup.find('nav')
+    if not main_tag:
+        issues.append("⚠️ No <main> landmark - helps screen reader navigation")
+        score -= 5
+    if not nav_tag:
+        issues.append("⚠️ No <nav> landmark found")
+        score -= 3
+    # Check for link text
+    links = soup.find_all('a')
+    generic_link_text = ['click here', 'read more', 'here', 'link']
+    for link in links:
+        text = link.get_text().strip().lower()
+        if text in generic_link_text:
+            issues.append("❌ Generic link text found (e.g., 'click here') - use descriptive text")
+            score -= 5
+            break
+    # Check for lang attribute
+    html_tag = soup.find('html')
+    if html_tag and not html_tag.get('lang'):
+        issues.append("❌ Missing lang attribute on <html> tag")
+        score -= 10
+    # Check for skip links
+    skip_link = soup.find('a', href='#main') or soup.find('a', href='#content')
+    if not skip_link:
+        issues.append("⚠️ No skip navigation link found")
+        score -= 5
+    # Check for video captions
+    videos = soup.find_all('video')
+    for video in videos:
+        if not video.find('track', kind='captions'):
+            issues.append("❌ Videos missing captions/subtitles")
+            score -= 10
+            break
+    return {
+        'accessibility_score': max(0, score),
+        'accessibility_issues': issues if issues else ["✅ No major accessibility issues detected"],
+        'wcag_compliance': 'Good' if score >= 80 else 'Needs Improvement' if score >= 60 else 'Poor'
+    }

ai_analyzer.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import google.generativeai as genai
+from dotenv import load_dotenv
+import os
+import json
+import re
+load_dotenv()
+genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
+model = genai.GenerativeModel('gemini-1.5-flash')
+def analyze_with_ai(scan_data):
+    """
+    Returns:
+    - issues: list of problems
+    - suggestions: list of improvements
+    - fix_snippets: code snippets for fixes
+    - optimized_html: full HTML with improvements (agentic AI)
+    - keywords: top keywords
+    - headings_count: H1/H2/H3 count
+    """
+    # Generate dummy keywords from title
+    keywords = re.findall(r'\b\w+\b', scan_data.get("title", ""))[:10]
+    prompt = f"""
+You are a website audit and optimization expert.
+Analyze this website scan data and provide:
+1) issues (list)
+2) suggestions (list)
+3) fix_snippets (list of HTML/SEO fixes)
+4) optimized_html (full HTML content with improvements applied)
+5) keywords (list)
+6) headings_count (dict of H1, H2, H3 counts)
+Respond ONLY in JSON format.
+Scan Data:
+{json.dumps(scan_data, indent=2)}
+"""
+    try:
+        response = model.generate_content(prompt)
+        content = response.text
+        # Clean markdown code blocks if present
+        if '```json' in content:
+            content = content.split('```json')[1].split('```')[0].strip()
+        elif '```' in content:
+            content = content.split('```')[1].split('```')[0].strip()
+        ai_report = json.loads(content)
+        # Fallbacks
+        ai_report.setdefault("keywords", keywords)
+        ai_report.setdefault("headings_count", scan_data.get("headings_count", {}))
+        ai_report.setdefault("fix_snippets", [])
+        ai_report.setdefault("optimized_html", "")
+        return ai_report
+    except Exception as e:
+        # Fallback
+        return {
+            "issues": [
+                f"H1 tags found: {scan_data.get('h1_count',0)}",
+                f"Images without ALT: {scan_data.get('images_without_alt',0)}",
+                f"Page load time: {scan_data.get('load_time',0)}s"
+            ],
+            "suggestions": [
+                "Add missing meta description",
+                "Optimize images and include ALT text",
+                "Improve page speed"
+            ],
+            "fix_snippets": [
+                "<meta name='description' content='Your description here'>",
+                "<img src='image.jpg' alt='Descriptive text'>"
+            ],
+            "optimized_html": "<!-- Add optimized HTML here -->",
+            "keywords": keywords,
+            "headings_count": scan_data.get("headings_count", {})
+        }

app_gradio.py ADDED Viewed

	@@ -0,0 +1,319 @@

+import gradio as gr
+from scanner import scan_website
+from ai_analyzer import analyze_with_ai
+from utils import normalize_url, is_valid_url
+from scoring import calculate_score
+from accessibility_checker import check_accessibility
+from mobile_checker import check_mobile_responsiveness
+from link_checker import check_broken_links
+from report_generator import generate_pdf_report
+from history_tracker import save_audit, get_trend_data
+import plotly.graph_objects as go
+import plotly.express as px
+import pandas as pd
+from bs4 import BeautifulSoup
+import requests
+def create_gauge_chart(score, title):
+    """Create a gauge chart for scores"""
+    fig = go.Figure(go.Indicator(
+        mode="gauge+number",
+        value=score,
+        title={'text': title},
+        gauge={
+            'axis': {'range': [0, 100]},
+            'bar': {'color': "darkblue"},
+            'steps': [
+                {'range': [0, 50], 'color': "lightcoral"},
+                {'range': [50, 80], 'color': "lightyellow"},
+                {'range': [80, 100], 'color': "lightgreen"}
+            ],
+            'threshold': {
+                'line': {'color': "red", 'width': 4},
+                'thickness': 0.75,
+                'value': 90
+            }
+        }
+    ))
+    fig.update_layout(height=300)
+    return fig
+def create_radar_chart(scores_dict):
+    """Create radar chart for all scores"""
+    categories = list(scores_dict.keys())
+    values = list(scores_dict.values())
+    fig = go.Figure()
+    fig.add_trace(go.Scatterpolar(
+        r=values,
+        theta=categories,
+        fill='toself',
+        name='Audit Scores'
+    ))
+    fig.update_layout(
+        polar=dict(radialaxis=dict(range=[0, 100])),
+        title="Overall Website Health Radar",
+        height=400
+    )
+    return fig
+def create_metrics_bar_chart(scan_data):
+    """Create bar chart for SEO metrics"""
+    metrics_data = pd.DataFrame({
+        'Metric': ['H1 Tags', 'H2 Tags', 'H3 Tags', 'Images w/o ALT', 'Links', 'Scripts'],
+        'Value': [
+            scan_data.get('h1_count', 0),
+            scan_data.get('h2_count', 0),
+            scan_data.get('h3_count', 0),
+            scan_data.get('images_without_alt', 0),
+            scan_data.get('links_count', 0),
+            scan_data.get('scripts_count', 0)
+        ]
+    })
+    fig = px.bar(metrics_data, x='Metric', y='Value',
+                 title='SEO & Technical Metrics',
+                 color='Value',
+                 color_continuous_scale='Viridis')
+    fig.update_layout(height=400)
+    return fig
+def create_trend_chart(url):
+    """Create trend chart from history"""
+    trend_data = get_trend_data(url)
+    if not trend_data:
+        return None
+    df = pd.DataFrame(trend_data['scores'])
+    df['Date'] = trend_data['dates']
+    fig = go.Figure()
+    for col in df.columns[:-1]:
+        fig.add_trace(go.Scatter(x=df['Date'], y=df[col], mode='lines+markers', name=col))
+    fig.update_layout(
+        title='Score Trends Over Time',
+        xaxis_title='Date',
+        yaxis_title='Score',
+        height=400
+    )
+    return fig
+def audit_website(url, check_links=True):
+    """Main audit function"""
+    if not url or not is_valid_url(url):
+        return ("❌ Invalid URL", None, None, None, None, None, None, None, None, None, None)
+    url = normalize_url(url)
+    status_msg = f"🔍 Scanning {url}..."
+    # Step 1: Scan website
+    scan_data = scan_website(url)
+    if "error" in scan_data:
+        return (f"❌ Error: {scan_data['error']}", None, None, None, None, None, None, None, None, None, None)
+    # Step 2: Get page content for additional checks
+    try:
+        response = requests.get(url, timeout=10, headers={"User-Agent": "AI-Site-Auditor"})
+        soup = BeautifulSoup(response.text, 'html.parser')
+    except:
+        return ("❌ Failed to fetch page content", None, None, None, None, None, None, None, None, None, None)
+    # Step 3: Run all checks
+    accessibility_data = check_accessibility(soup, url)
+    mobile_data = check_mobile_responsiveness(soup, scan_data.get('page_size_mb', 0))
+    if check_links:
+        link_data = check_broken_links(url, soup, max_links=50)
+    else:
+        link_data = {'total_links_checked': 0, 'working_links': 0, 'broken_links_count': 0,
+                     'broken_links_details': [], 'link_health': 'Skipped'}
+    # Step 4: Calculate scores
+    overall_score = calculate_score(scan_data)
+    scan_data["overall_score"] = overall_score
+    scan_data["seo_score"] = max(0, 100 - scan_data.get("images_without_alt", 0) * 5)
+    scan_data["performance_score"] = max(0, 100 - scan_data.get("load_time", 5) * 10)
+    scan_data["security_score"] = 100 if scan_data.get("https") else 50
+    # Step 5: AI Analysis
+    ai_report = analyze_with_ai(scan_data)
+    # Step 6: Save to history
+    save_audit(url, scan_data, ai_report, accessibility_data, mobile_data, link_data)
+    # Step 7: Create visualizations
+    scores_dict = {
+        'SEO': scan_data["seo_score"],
+        'Performance': scan_data["performance_score"],
+        'Accessibility': accessibility_data['accessibility_score'],
+        'Security': scan_data["security_score"],
+        'Mobile': mobile_data['mobile_score']
+    }
+    gauge_overall = create_gauge_chart(overall_score, "Overall Score")
+    radar_chart = create_radar_chart(scores_dict)
+    metrics_chart = create_metrics_bar_chart(scan_data)
+    trend_chart = create_trend_chart(url)
+    # Step 8: Format results
+    summary = f"""
+# 🎯 Audit Summary for {url}
+## 📊 Scores
+- **Overall Score:** {overall_score}/100
+- **SEO Score:** {scan_data['seo_score']}/100
+- **Performance Score:** {scan_data['performance_score']}/100
+- **Accessibility Score:** {accessibility_data['accessibility_score']}/100
+- **Security Score:** {scan_data['security_score']}/100
+- **Mobile Score:** {mobile_data['mobile_score']}/100
+## 🔧 Technical Metrics
+- **Load Time:** {scan_data.get('load_time', 0)}s
+- **Page Size:** {scan_data.get('page_size_mb', 0):.2f} MB
+- **HTTPS:** {'✅ Yes' if scan_data.get('https') else '❌ No'}
+- **Status Code:** {scan_data.get('status_code', 'N/A')}
+## 🔗 Link Health
+- **Total Links Checked:** {link_data['total_links_checked']}
+- **Working Links:** {link_data['working_links']}
+- **Broken Links:** {link_data['broken_links_count']}
+- **Health Status:** {link_data['link_health']}
+## 📱 Mobile Friendliness
+- **Status:** {mobile_data['mobile_friendly']}
+## ♿ Accessibility
+- **WCAG Compliance:** {accessibility_data['wcag_compliance']}
+"""
+    # Format AI Issues
+    ai_issues_text = "## ⚠️ AI Detected Issues\n\n"
+    for issue in ai_report.get('issues', [])[:10]:
+        ai_issues_text += f"- {issue}\n"
+    # Format AI Suggestions
+    ai_suggestions_text = "## ✅ AI Recommendations\n\n"
+    for suggestion in ai_report.get('suggestions', [])[:10]:
+        ai_suggestions_text += f"- {suggestion}\n"
+    # Format Accessibility Issues
+    accessibility_text = "## ♿ Accessibility Issues\n\n"
+    for issue in accessibility_data.get('accessibility_issues', []):
+        accessibility_text += f"{issue}\n\n"
+    # Format Mobile Issues
+    mobile_text = "## 📱 Mobile Issues\n\n"
+    for issue in mobile_data.get('mobile_issues', []):
+        mobile_text += f"{issue}\n\n"
+    # Format Broken Links
+    broken_links_text = "## 🔗 Broken Links Details\n\n"
+    if link_data['broken_links_details']:
+        for broken in link_data['broken_links_details']:
+            broken_links_text += f"- **URL:** {broken['url']}\n"
+            broken_links_text += f"  **Status:** {broken['status']}\n\n"
+    else:
+        broken_links_text += "✅ No broken links detected!\n"
+    # Generate PDF
+    try:
+        pdf_path = generate_pdf_report(url, scan_data, ai_report, accessibility_data, mobile_data, link_data)
+    except:
+        pdf_path = None
+    return (
+        summary,
+        ai_issues_text,
+        ai_suggestions_text,
+        accessibility_text,
+        mobile_text,
+        broken_links_text,
+        gauge_overall,
+        radar_chart,
+        metrics_chart,
+        trend_chart if trend_chart else None,
+        pdf_path
+    )
+# Create Gradio Interface
+with gr.Blocks(title="AuditAI - Agentic Website Auditor", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🧠 AuditAI - Agentic AI Website Auditor
+    **Powered by Google Gemini 1.5 Flash | Enhanced with Advanced Analytics**
+    Comprehensive website auditing with SEO, Performance, Accessibility, Security, and Mobile analysis.
+    """)
+    with gr.Row():
+        with gr.Column(scale=3):
+            url_input = gr.Textbox(
+                label="Website URL",
+                placeholder="https://example.com",
+                info="Enter the full URL of the website to audit"
+            )
+        with gr.Column(scale=1):
+            check_links_checkbox = gr.Checkbox(
+                label="Check for Broken Links",
+                value=True,
+                info="May take longer"
+            )
+    audit_btn = gr.Button("🚀 Start Audit", variant="primary", size="lg")
+    with gr.Tabs():
+        with gr.Tab("📊 Overview"):
+            summary_output = gr.Markdown(label="Audit Summary")
+            with gr.Row():
+                gauge_plot = gr.Plot(label="Overall Score")
+                radar_plot = gr.Plot(label="Health Radar")
+        with gr.Tab("📈 Metrics & Trends"):
+            metrics_plot = gr.Plot(label="Technical Metrics")
+            trend_plot = gr.Plot(label="Historical Trends")
+        with gr.Tab("⚠️ Issues"):
+            ai_issues_output = gr.Markdown(label="AI Detected Issues")
+            accessibility_output = gr.Markdown(label="Accessibility Issues")
+            mobile_output = gr.Markdown(label="Mobile Issues")
+            broken_links_output = gr.Markdown(label="Broken Links")
+        with gr.Tab("✅ Recommendations"):
+            ai_suggestions_output = gr.Markdown(label="AI Recommendations")
+        with gr.Tab("📄 PDF Report"):
+            gr.Markdown("### Download your comprehensive audit report")
+            pdf_output = gr.File(label="Download PDF Report")
+    # Event handler
+    audit_btn.click(
+        fn=audit_website,
+        inputs=[url_input, check_links_checkbox],
+        outputs=[
+            summary_output,
+            ai_issues_output,
+            ai_suggestions_output,
+            accessibility_output,
+            mobile_output,
+            broken_links_output,
+            gauge_plot,
+            radar_plot,
+            metrics_plot,
+            trend_plot,
+            pdf_output
+        ]
+    )
+    gr.Markdown("""
+    ---
+    ### 👨‍💻 Built by Sakshi Gupta
+    **Features:** SEO Analysis • Performance Metrics • Accessibility Check • Broken Link Detection •
+    Mobile Responsiveness • AI-Powered Insights • PDF Reports • Historical Tracking
+    """)
+if __name__ == "__main__":
+    demo.launch(share=True)

history_tracker.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import json
+import os
+from datetime import datetime
+HISTORY_FILE = "audit_history.json"
+def load_history():
+    """Load audit history from JSON file"""
+    if os.path.exists(HISTORY_FILE):
+        try:
+            with open(HISTORY_FILE, 'r') as f:
+                return json.load(f)
+        except:
+            return []
+    return []
+def save_audit(url, scan_data, ai_report, accessibility_data, mobile_data, link_data):
+    """Save current audit to history"""
+    history = load_history()
+    audit_entry = {
+        'timestamp': datetime.now().isoformat(),
+        'url': url,
+        'overall_score': scan_data.get('overall_score', 0),
+        'seo_score': scan_data.get('seo_score', 0),
+        'performance_score': scan_data.get('performance_score', 0),
+        'accessibility_score': accessibility_data.get('accessibility_score', 0),
+        'security_score': scan_data.get('security_score', 0),
+        'mobile_score': mobile_data.get('mobile_score', 0),
+        'load_time': scan_data.get('load_time', 0),
+        'page_size_mb': scan_data.get('page_size_mb', 0),
+        'broken_links': link_data.get('broken_links_count', 0),
+        'https': scan_data.get('https', False)
+    }
+    history.append(audit_entry)
+    # Keep only last 100 audits
+    history = history[-100:]
+    with open(HISTORY_FILE, 'w') as f:
+        json.dump(history, f, indent=2)
+    return audit_entry
+def get_site_history(url, limit=10):
+    """Get history for a specific site"""
+    history = load_history()
+    site_history = [entry for entry in history if entry['url'] == url]
+    return site_history[-limit:]
+def get_trend_data(url):
+    """Get trend data for charts"""
+    site_history = get_site_history(url, limit=20)
+    if not site_history:
+        return None
+    dates = [entry['timestamp'][:10] for entry in site_history]
+    scores = {
+        'Overall': [entry['overall_score'] for entry in site_history],
+        'SEO': [entry['seo_score'] for entry in site_history],
+        'Performance': [entry['performance_score'] for entry in site_history],
+        'Accessibility': [entry['accessibility_score'] for entry in site_history],
+        'Security': [entry['security_score'] for entry in site_history],
+        'Mobile': [entry['mobile_score'] for entry in site_history]
+    }
+    return {'dates': dates, 'scores': scores}

link_checker.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin, urlparse
+from concurrent.futures import ThreadPoolExecutor, as_completed
+def check_broken_links(url, soup, max_links=50, timeout=5):
+    """
+    Checks for broken links on the page
+    Returns dict with broken links, total links checked, and status
+    """
+    broken_links = []
+    working_links = 0
+    skipped_links = 0
+    # Extract all links
+    all_links = soup.find_all('a', href=True)
+    links_to_check = []
+    for link in all_links[:max_links]:  # Limit to avoid overwhelming
+        href = link.get('href')
+        # Skip anchors, mailto, tel, javascript
+        if href.startswith(('#', 'mailto:', 'tel:', 'javascript:')):
+            skipped_links += 1
+            continue
+        # Convert relative URLs to absolute
+        full_url = urljoin(url, href)
+        # Only check HTTP/HTTPS
+        if full_url.startswith(('http://', 'https://')):
+            links_to_check.append((href, full_url))
+    # Check links in parallel for speed
+    def check_single_link(link_data):
+        original_href, full_url = link_data
+        try:
+            response = requests.head(full_url, timeout=timeout, allow_redirects=True,
+                                    headers={"User-Agent": "AI-Site-Auditor"})
+            # If HEAD fails, try GET
+            if response.status_code >= 400:
+                response = requests.get(full_url, timeout=timeout,
+                                       headers={"User-Agent": "AI-Site-Auditor"})
+            if response.status_code >= 400:
+                return {'broken': True, 'url': original_href, 'status': response.status_code}
+            else:
+                return {'broken': False}
+        except requests.exceptions.RequestException as e:
+            return {'broken': True, 'url': original_href, 'status': 'Error', 'error': str(e)[:50]}
+    # Use ThreadPoolExecutor for parallel checking
+    with ThreadPoolExecutor(max_workers=10) as executor:
+        futures = {executor.submit(check_single_link, link): link for link in links_to_check}
+        for future in as_completed(futures):
+            result = future.result()
+            if result['broken']:
+                broken_links.append(result)
+            else:
+                working_links += 1
+    total_checked = len(links_to_check)
+    broken_count = len(broken_links)
+    return {
+        'total_links_checked': total_checked,
+        'working_links': working_links,
+        'broken_links_count': broken_count,
+        'broken_links_details': broken_links[:10],  # Limit details to first 10
+        'skipped_links': skipped_links,
+        'link_health': 'Excellent' if broken_count == 0 else 'Good' if broken_count <= 2 else 'Needs Attention'
+    }

mobile_checker.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from bs4 import BeautifulSoup
+def check_mobile_responsiveness(soup, page_size_mb):
+    """
+    Checks mobile-friendliness and responsive design
+    Returns dict with mobile issues and score
+    """
+    issues = []
+    score = 100
+    # Check viewport meta tag
+    viewport = soup.find('meta', attrs={'name': 'viewport'})
+    if not viewport:
+        issues.append("❌ Missing viewport meta tag - critical for mobile devices")
+        score -= 25
+    else:
+        content = viewport.get('content', '')
+        if 'width=device-width' not in content:
+            issues.append("⚠️ Viewport should include 'width=device-width'")
+            score -= 10
+        if 'initial-scale=1' not in content:
+            issues.append("⚠️ Viewport should include 'initial-scale=1'")
+            score -= 5
+    # Check for responsive images
+    images = soup.find_all('img')
+    responsive_images = [img for img in images if img.get('srcset') or img.get('sizes')]
+    if images and len(responsive_images) == 0:
+        issues.append("⚠️ No responsive images detected (consider using srcset)")
+        score -= 10
+    # Check page size for mobile
+    if page_size_mb > 3:
+        issues.append(f"❌ Page size ({page_size_mb:.2f}MB) too large for mobile - should be <3MB")
+        score -= 15
+    elif page_size_mb > 1.5:
+        issues.append(f"⚠️ Page size ({page_size_mb:.2f}MB) could be optimized for mobile")
+        score -= 5
+    # Check for mobile-unfriendly elements
+    flash = soup.find_all(['embed', 'object'], type='application/x-shockwave-flash')
+    if flash:
+        issues.append("❌ Flash content detected - not supported on mobile devices")
+        score -= 20
+    # Check for fixed width elements
+    tables = soup.find_all('table')
+    for table in tables:
+        if table.get('width') and 'px' in str(table.get('width')):
+            issues.append("⚠️ Fixed-width tables detected - may not be mobile-friendly")
+            score -= 5
+            break
+    # Check for touch-friendly elements
+    buttons = soup.find_all('button')
+    links = soup.find_all('a')
+    small_touch_targets = 0
+    for elem in buttons + links:
+        style = elem.get('style', '')
+        if 'font-size' in style and any(size in style for size in ['8px', '9px', '10px']):
+            small_touch_targets += 1
+    if small_touch_targets > 0:
+        issues.append(f"⚠️ {small_touch_targets} elements may have small touch targets")
+        score -= 10
+    # Check for media queries in stylesheets
+    styles = soup.find_all('style')
+    links_css = soup.find_all('link', rel='stylesheet')
+    has_media_queries = False
+    for style in styles:
+        if '@media' in style.get_text():
+            has_media_queries = True
+            break
+    if not has_media_queries and len(styles) > 0:
+        issues.append("⚠️ No media queries detected in inline styles")
+        score -= 10
+    # Check font sizes
+    if not soup.find_all(style=lambda x: x and 'font-size' in x and any(unit in x for unit in ['em', 'rem', '%'])):
+        issues.append("⚠️ Consider using relative font sizes (em, rem, %) for better mobile scaling")
+        score -= 5
+    return {
+        'mobile_score': max(0, score),
+        'mobile_issues': issues if issues else ["✅ Good mobile responsiveness"],
+        'mobile_friendly': 'Yes' if score >= 80 else 'Partially' if score >= 60 else 'No'
+    }

report_generator.py ADDED Viewed

	@@ -0,0 +1,163 @@

+from fpdf import FPDF
+from datetime import datetime
+import json
+class PDFReport(FPDF):
+    def header(self):
+        self.set_font('Arial', 'B', 16)
+        self.cell(0, 10, 'AuditAI - Website Audit Report', 0, 1, 'C')
+        self.set_font('Arial', 'I', 10)
+        self.cell(0, 5, f'Generated on {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}', 0, 1, 'C')
+        self.ln(5)
+    def footer(self):
+        self.set_y(-15)
+        self.set_font('Arial', 'I', 8)
+        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
+def generate_pdf_report(url, scan_data, ai_report, accessibility_data, mobile_data, link_data):
+    """
+    Generates a comprehensive PDF audit report
+    Returns: PDF file path
+    """
+    pdf = PDFReport()
+    pdf.add_page()
+    pdf.set_auto_page_break(auto=True, margin=15)
+    # Website URL
+    pdf.set_font('Arial', 'B', 14)
+    pdf.cell(0, 10, 'Website Analyzed:', 0, 1)
+    pdf.set_font('Arial', '', 12)
+    pdf.cell(0, 8, url, 0, 1)
+    pdf.ln(5)
+    # Overall Scores Section
+    pdf.set_font('Arial', 'B', 14)
+    pdf.set_fill_color(200, 220, 255)
+    pdf.cell(0, 10, 'Overall Performance Scores', 0, 1, 'L', True)
+    pdf.ln(2)
+    pdf.set_font('Arial', '', 11)
+    scores = [
+        ('Overall Score', scan_data.get('overall_score', 0)),
+        ('SEO Score', scan_data.get('seo_score', 0)),
+        ('Performance Score', scan_data.get('performance_score', 0)),
+        ('Accessibility Score', accessibility_data.get('accessibility_score', 0)),
+        ('Security Score', scan_data.get('security_score', 0)),
+        ('Mobile Score', mobile_data.get('mobile_score', 0))
+    ]
+    for label, score in scores:
+        color = (0, 200, 0) if score >= 80 else (255, 165, 0) if score >= 60 else (255, 0, 0)
+        pdf.set_text_color(*color)
+        pdf.cell(100, 8, f'{label}:', 0, 0)
+        pdf.set_font('Arial', 'B', 11)
+        pdf.cell(0, 8, f'{score}/100', 0, 1)
+        pdf.set_font('Arial', '', 11)
+    pdf.set_text_color(0, 0, 0)
+    pdf.ln(5)
+    # Technical Metrics
+    pdf.set_font('Arial', 'B', 14)
+    pdf.set_fill_color(200, 220, 255)
+    pdf.cell(0, 10, 'Technical Metrics', 0, 1, 'L', True)
+    pdf.ln(2)
+    pdf.set_font('Arial', '', 11)
+    metrics = [
+        ('Load Time', f"{scan_data.get('load_time', 0)} seconds"),
+        ('Page Size', f"{scan_data.get('page_size_mb', 0):.2f} MB"),
+        ('HTTPS Enabled', 'Yes' if scan_data.get('https') else 'No'),
+        ('Status Code', str(scan_data.get('status_code', 'N/A'))),
+        ('Total Links', str(scan_data.get('links_count', 0))),
+        ('Internal Links', str(scan_data.get('internal_links', 0))),
+        ('External Links', str(scan_data.get('external_links', 0))),
+        ('Images without ALT', str(scan_data.get('images_without_alt', 0))),
+        ('H1 Tags', str(scan_data.get('h1_count', 0))),
+        ('Scripts', str(scan_data.get('scripts_count', 0)))
+    ]
+    for label, value in metrics:
+        pdf.cell(95, 7, f'{label}:', 0, 0)
+        pdf.cell(0, 7, value, 0, 1)
+    pdf.ln(5)
+    # Link Health
+    pdf.set_font('Arial', 'B', 14)
+    pdf.set_fill_color(200, 220, 255)
+    pdf.cell(0, 10, 'Link Health Check', 0, 1, 'L', True)
+    pdf.ln(2)
+    pdf.set_font('Arial', '', 11)
+    pdf.cell(95, 7, 'Total Links Checked:', 0, 0)
+    pdf.cell(0, 7, str(link_data.get('total_links_checked', 0)), 0, 1)
+    pdf.cell(95, 7, 'Working Links:', 0, 0)
+    pdf.cell(0, 7, str(link_data.get('working_links', 0)), 0, 1)
+    pdf.cell(95, 7, 'Broken Links:', 0, 0)
+    pdf.set_text_color(255, 0, 0) if link_data.get('broken_links_count', 0) > 0 else pdf.set_text_color(0, 200, 0)
+    pdf.cell(0, 7, str(link_data.get('broken_links_count', 0)), 0, 1)
+    pdf.set_text_color(0, 0, 0)
+    pdf.ln(5)
+    # Broken Links Details
+    if link_data.get('broken_links_details'):
+        pdf.set_font('Arial', 'B', 12)
+        pdf.cell(0, 8, 'Broken Links Found:', 0, 1)
+        pdf.set_font('Arial', '', 9)
+        for broken in link_data['broken_links_details'][:10]:
+            pdf.multi_cell(0, 5, f"- {broken['url']} (Status: {broken['status']})")
+        pdf.ln(3)
+    # AI Detected Issues
+    pdf.add_page()
+    pdf.set_font('Arial', 'B', 14)
+    pdf.set_fill_color(255, 200, 200)
+    pdf.cell(0, 10, 'AI Detected Issues', 0, 1, 'L', True)
+    pdf.ln(2)
+    pdf.set_font('Arial', '', 10)
+    for issue in ai_report.get('issues', [])[:15]:
+        pdf.multi_cell(0, 6, f'- {issue}')
+    pdf.ln(5)
+    # Accessibility Issues
+    pdf.set_font('Arial', 'B', 14)
+    pdf.set_fill_color(255, 230, 200)
+    pdf.cell(0, 10, 'Accessibility Issues', 0, 1, 'L', True)
+    pdf.ln(2)
+    pdf.set_font('Arial', '', 10)
+    for issue in accessibility_data.get('accessibility_issues', [])[:15]:
+        pdf.multi_cell(0, 6, f'{issue}')
+    pdf.ln(5)
+    # Mobile Issues
+    pdf.set_font('Arial', 'B', 14)
+    pdf.set_fill_color(230, 200, 255)
+    pdf.cell(0, 10, 'Mobile Responsiveness Issues', 0, 1, 'L', True)
+    pdf.ln(2)
+    pdf.set_font('Arial', '', 10)
+    for issue in mobile_data.get('mobile_issues', [])[:15]:
+        pdf.multi_cell(0, 6, f'{issue}')
+    pdf.ln(5)
+    # AI Suggestions
+    pdf.add_page()
+    pdf.set_font('Arial', 'B', 14)
+    pdf.set_fill_color(200, 255, 200)
+    pdf.cell(0, 10, 'AI Recommendations', 0, 1, 'L', True)
+    pdf.ln(2)
+    pdf.set_font('Arial', '', 10)
+    for suggestion in ai_report.get('suggestions', [])[:20]:
+        pdf.multi_cell(0, 6, f'- {suggestion}')
+    # Save PDF
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"audit_report_{timestamp}.pdf"
+    pdf.output(filename)
+    return filename

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+streamlit
+gradio
+requests
+beautifulsoup4
+google-generativeai
+python-dotenv
+plotly
+pandas
+wordcloud
+matplotlib
+fpdf

scanner.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from bs4 import BeautifulSoup
+import time
+from utils import safe_request
+def scan_website(url):
+    data = {}
+    # Measure total load time including HTTP request
+    start = time.time()
+    response = safe_request(url)
+    if not response:
+        return {"error": "Unable to fetch URL", "score": 0}
+    soup = BeautifulSoup(response.text, "html.parser")
+    load_time = round(time.time() - start, 2)
+    # Page size in MB
+    page_size_mb = len(response.content) / (1024*1024)
+    # Count internal vs external links
+    internal_links = 0
+    external_links = 0
+    for link in soup.find_all("a", href=True):
+        href = link.get("href")
+        if href.startswith("http") and url.split("//")[1] in href:
+            internal_links += 1
+        elif href.startswith("http"):
+            external_links += 1
+    # Heading counts
+    headings_count = {
+        "H1": len(soup.find_all("h1")),
+        "H2": len(soup.find_all("h2")),
+        "H3": len(soup.find_all("h3"))
+    }
+    data.update({
+        "status_code": response.status_code,
+        "load_time": load_time,
+        "https": url.startswith("https"),
+        "title": soup.title.string if soup.title else "Missing",
+        "meta_description": bool(soup.find("meta", attrs={"name": "description"})),
+        "h1_count": headings_count["H1"],
+        "h2_count": headings_count["H2"],
+        "h3_count": headings_count["H3"],
+        "headings_count": headings_count,
+        "images_without_alt": len([img for img in soup.find_all("img") if not img.get("alt")]),
+        "links_count": len(soup.find_all("a")),
+        "internal_links": internal_links,
+        "external_links": external_links,
+        "scripts_count": len(soup.find_all("script")),
+        "paragraph_count": len(soup.find_all("p")),
+        "page_size_mb": page_size_mb
+    })
+    return data

scoring.py ADDED Viewed

	@@ -0,0 +1,25 @@

+def calculate_score(scan_data):
+    score = 0
+    score += 15 if scan_data.get("https") else 0
+    load_time = scan_data.get("load_time", 5)
+    if load_time <= 1: score += 15
+    elif load_time <= 3: score += 10
+    else: score += 5
+    score += 10 if scan_data.get("title") != "Missing" else 0
+    score += 10 if scan_data.get("meta_description") else 0
+    score += 10 if scan_data.get("h1_count", 0) >= 1 else 5
+    missing_alt = scan_data.get("images_without_alt", 0)
+    score += max(0, 10 - missing_alt*2)
+    score += min(5, scan_data.get("links_count", 0)*0.1)
+    score += min(5, scan_data.get("scripts_count", 0)*0.1)
+    paragraphs = scan_data.get("paragraph_count", 0)
+    score += 10 if paragraphs >= 3 else max(0, paragraphs*3)
+    score += 10 if scan_data.get("status_code") == 200 else 0
+    return round(min(score, 100), 2)

utils.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import re
+import requests
+def normalize_url(url):
+    if not url.startswith(("http://", "https://")):
+        return "https://" + url
+    return url
+def is_valid_url(url):
+    regex = re.compile(
+        r'^(https?:\/\/)?([\da-z.-]+)\.([a-z.]{2,6})([\/\w .-]*)*\/?$'
+    )
+    return re.match(regex, url) is not None
+def safe_request(url, timeout=10):
+    try:
+        response = requests.get(
+            url,
+            timeout=timeout,
+            headers={"User-Agent": "AI-Site-Auditor"}
+        )
+        return response
+    except requests.exceptions.RequestException:
+        return None