Spaces:
Sleeping
Sleeping
Upload 12 files
Browse files- app.py +5 -0
- chat_wrapper.py +32 -0
- pdf_exporter.py +26 -0
- performance_agent.py +63 -0
- pipeline.py +46 -0
- report_generator.py +104 -0
- report_prompt.txt +13 -0
- requirements.txt +16 -0
- scraper_agent.py +46 -0
- summary_prompt.txt +16 -0
- ui.py +120 -0
- ui_ux_prompt.txt +18 -0
app.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
from ui import launch_ui
|
| 3 |
+
|
| 4 |
+
if __name__ == "__main__":
|
| 5 |
+
launch_ui()
|
chat_wrapper.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# chat_model_wrapper.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import google.generativeai as genai
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
load_dotenv(override=True)
|
| 8 |
+
|
| 9 |
+
class ChatRefiner:
|
| 10 |
+
def __init__(self, model_name: str = "gemini-1.5-flash"):
|
| 11 |
+
"""
|
| 12 |
+
Wraps the Gemini model for prompt→text generation.
|
| 13 |
+
"""
|
| 14 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 15 |
+
api_key="AIzaSyBzRMrMaL-s9y-8PJURGuMS3iH-3uuCyqs"
|
| 16 |
+
if not api_key:
|
| 17 |
+
raise ValueError("GEMINI_API_KEY not set in environment or .env file.")
|
| 18 |
+
|
| 19 |
+
genai.configure(api_key=api_key)
|
| 20 |
+
self.model = genai.GenerativeModel(model_name)
|
| 21 |
+
|
| 22 |
+
def answer(self, prompt: str) -> str:
|
| 23 |
+
"""
|
| 24 |
+
Sends `prompt` to Gemini and returns the generated text.
|
| 25 |
+
"""
|
| 26 |
+
try:
|
| 27 |
+
response = self.model.generate_content(prompt)
|
| 28 |
+
return response.text.strip()
|
| 29 |
+
except Exception as e:
|
| 30 |
+
# Log or handle as you prefer
|
| 31 |
+
print(f"[ChatRefiner.answer] Error: {e}")
|
| 32 |
+
raise
|
pdf_exporter.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pdfkit
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv(override = True)
|
| 6 |
+
|
| 7 |
+
class PDFExporter:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.output_dir = os.getenv('OUTPUT_DIR', './out')
|
| 10 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
| 11 |
+
|
| 12 |
+
# Optional: Load wkhtmltopdf path from env if not in system PATH
|
| 13 |
+
wkhtml_path = os.getenv("WKHTMLTOPDF_PATH")
|
| 14 |
+
if wkhtml_path and not os.path.isfile(wkhtml_path):
|
| 15 |
+
raise FileNotFoundError(f"wkhtmltopdf not found at: {wkhtml_path}")
|
| 16 |
+
|
| 17 |
+
self.config = pdfkit.configuration(wkhtmltopdf=wkhtml_path) if wkhtml_path else None
|
| 18 |
+
|
| 19 |
+
def export(self, html: str, name: str = "report") -> str:
|
| 20 |
+
out_path = os.path.join(self.output_dir, f"{name}.pdf")
|
| 21 |
+
try:
|
| 22 |
+
pdfkit.from_string(html, out_path, configuration=self.config)
|
| 23 |
+
return out_path
|
| 24 |
+
except Exception as e:
|
| 25 |
+
print(f"[PDFExporter.export] PDF generation failed: {e}")
|
| 26 |
+
raise
|
performance_agent.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# performance_agent.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import requests
|
| 5 |
+
|
| 6 |
+
class PerformanceAgent:
|
| 7 |
+
"""
|
| 8 |
+
Wraps the PageSpeed Insights API to fetch
|
| 9 |
+
Lighthouse scores for a given URL.
|
| 10 |
+
On any network/API failure, returns all-100% defaults.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
EXPECTED_CATEGORIES = ("performance", "accessibility", "best-practices", "seo")
|
| 14 |
+
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.api_key = os.getenv("PAGESPEED_API_KEY","AIzaSyAKyKYao-FVDmhqcwLojv6DDl7nALjwghg")
|
| 17 |
+
self.endpoint = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
|
| 18 |
+
if not self.api_key:
|
| 19 |
+
raise ValueError("PAGESPEED_API_KEY not set in .env")
|
| 20 |
+
|
| 21 |
+
def fetch_performance(self, url: str, strategy: str = "mobile") -> dict:
|
| 22 |
+
params = {
|
| 23 |
+
"url": url,
|
| 24 |
+
"strategy": strategy, # "mobile" or "desktop"
|
| 25 |
+
"key": self.api_key
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
resp = requests.get(self.endpoint, params=params, timeout=30)
|
| 30 |
+
resp.raise_for_status()
|
| 31 |
+
data = resp.json()
|
| 32 |
+
lh = data.get("lighthouseResult", {})
|
| 33 |
+
cats = lh.get("categories", {})
|
| 34 |
+
audits = lh.get("audits", {})
|
| 35 |
+
|
| 36 |
+
# Build scores, default missing/unparsable → 100.0
|
| 37 |
+
scores = {}
|
| 38 |
+
for cat in self.EXPECTED_CATEGORIES:
|
| 39 |
+
raw = cats.get(cat, {}).get("score")
|
| 40 |
+
if isinstance(raw, (int, float)):
|
| 41 |
+
scores[cat] = round(raw * 100, 1)
|
| 42 |
+
else:
|
| 43 |
+
scores[cat] = 100.0
|
| 44 |
+
|
| 45 |
+
# Collect audits < 100% for suggestions
|
| 46 |
+
audit_suggestions = {}
|
| 47 |
+
for audit_id, info in audits.items():
|
| 48 |
+
sc = info.get("score")
|
| 49 |
+
if isinstance(sc, (int, float)) and sc < 1:
|
| 50 |
+
audit_suggestions[audit_id] = info.get("displayValue", "").strip()
|
| 51 |
+
|
| 52 |
+
return {
|
| 53 |
+
"scores": scores,
|
| 54 |
+
"audit_suggestions": audit_suggestions
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
except requests.exceptions.RequestException as e:
|
| 58 |
+
# Log the error and return an all-100% default, with no suggestions
|
| 59 |
+
print(f"[PerformanceAgent] PSI API error: {e}")
|
| 60 |
+
return {
|
| 61 |
+
"scores": {cat: 100.0 for cat in self.EXPECTED_CATEGORIES},
|
| 62 |
+
"audit_suggestions": {}
|
| 63 |
+
}
|
pipeline.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
from scraper_agent import ScraperAgent
|
| 3 |
+
from seo_agent.seo_agent import SEOAgent
|
| 4 |
+
from performance_agent import PerformanceAgent
|
| 5 |
+
from report_generator import ReportGenerator
|
| 6 |
+
# from pdf_exporter import PDFExporter
|
| 7 |
+
|
| 8 |
+
class Pipeline:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.scraper = ScraperAgent()
|
| 11 |
+
self.seo = SEOAgent()
|
| 12 |
+
self.perf = PerformanceAgent()
|
| 13 |
+
self.report = ReportGenerator()
|
| 14 |
+
# self.pdf = PDFExporter()
|
| 15 |
+
|
| 16 |
+
async def run(self, url: str):
|
| 17 |
+
yield "🔍 Scraping URL and Fetching Performance", None
|
| 18 |
+
|
| 19 |
+
# Run scraping + performance in parallel
|
| 20 |
+
page_task = asyncio.to_thread(self.scraper.fetch, url)
|
| 21 |
+
perf_task = asyncio.to_thread(self.perf.fetch_performance, url)
|
| 22 |
+
page, perf_data = await asyncio.gather(page_task, perf_task)
|
| 23 |
+
|
| 24 |
+
yield f"✔️ Title: {page['title']!r}", None
|
| 25 |
+
yield "✅ Performance metrics ready", perf_data
|
| 26 |
+
|
| 27 |
+
# Run SEO + Image in parallel (after scraping)
|
| 28 |
+
yield "⚙️ Running SEO & Image analysis…", None
|
| 29 |
+
seo_task = asyncio.to_thread(self.seo.analyze_seo, page["html"], page["text"])
|
| 30 |
+
img_task = asyncio.to_thread(self.seo.analyze_images, page["images"], page["text"])
|
| 31 |
+
seo_report, img_report = await asyncio.gather(seo_task, img_task)
|
| 32 |
+
|
| 33 |
+
yield "✅ SEO analysis complete", seo_report
|
| 34 |
+
yield "✅ Image analysis complete", img_report
|
| 35 |
+
|
| 36 |
+
# Generate report (sequential)
|
| 37 |
+
yield "📝 Generating report HTML…", None
|
| 38 |
+
html = self.report.build(url, str(perf_data), seo_report, img_report)
|
| 39 |
+
yield "✅ Report HTML ready", html
|
| 40 |
+
|
| 41 |
+
# Export PDF
|
| 42 |
+
# yield "📄 Exporting to PDF…", None
|
| 43 |
+
# pdf_path = self.pdf.export(html, name="final_report")
|
| 44 |
+
# yield "🎉 Done!", pdf_path
|
| 45 |
+
|
| 46 |
+
self.scraper.close()
|
report_generator.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
| 4 |
+
import markdown
|
| 5 |
+
from chat_wrapper import ChatRefiner
|
| 6 |
+
|
| 7 |
+
class ReportGenerator:
|
| 8 |
+
def __init__(self, template_path: str = None, prompt_path: str = None):
|
| 9 |
+
# Default template path
|
| 10 |
+
self.template_path = template_path or os.getenv("TEMPLATE_PATH", "./templates/report.html.j2")
|
| 11 |
+
if not os.path.isfile(self.template_path):
|
| 12 |
+
raise FileNotFoundError(f"Report template not found: {self.template_path}")
|
| 13 |
+
|
| 14 |
+
# Load Jinja2 environment
|
| 15 |
+
tpl_dir = os.path.dirname(self.template_path)
|
| 16 |
+
tpl_file = os.path.basename(self.template_path)
|
| 17 |
+
self.env = Environment(
|
| 18 |
+
loader=FileSystemLoader(tpl_dir),
|
| 19 |
+
autoescape=select_autoescape(['html', 'xml'])
|
| 20 |
+
)
|
| 21 |
+
self.template = self.env.get_template(tpl_file)
|
| 22 |
+
|
| 23 |
+
# Load refinement prompt
|
| 24 |
+
prompt_path = prompt_path or os.getenv("PROMPT_PATH", "./report_prompt.txt")
|
| 25 |
+
if not os.path.isfile(prompt_path):
|
| 26 |
+
raise FileNotFoundError(f"Refinement prompt template not found: {prompt_path}")
|
| 27 |
+
with open(prompt_path, encoding="utf-8") as f:
|
| 28 |
+
self.refine_prompt_template = f.read()
|
| 29 |
+
|
| 30 |
+
# Load additional prompts for summary and UX/UI
|
| 31 |
+
self.overall_summary_prompt = self._load_prompt("./summary_prompt.txt")
|
| 32 |
+
self.ux_summary_prompt = self._load_prompt("./ui_ux_prompt.txt")
|
| 33 |
+
|
| 34 |
+
# Initialize Gemini wrapper
|
| 35 |
+
self.chat = ChatRefiner(model_name="gemini-1.5-flash")
|
| 36 |
+
|
| 37 |
+
def _load_prompt(self, path: str) -> str:
|
| 38 |
+
if not os.path.isfile(path):
|
| 39 |
+
raise FileNotFoundError(f"Prompt file not found: {path}")
|
| 40 |
+
with open(path, encoding="utf-8") as f:
|
| 41 |
+
return f.read()
|
| 42 |
+
|
| 43 |
+
def refine_section(self, raw_text: str) -> str:
|
| 44 |
+
"""
|
| 45 |
+
Uses Gemini to convert raw dict-like or prose analysis into markdown-style summary.
|
| 46 |
+
"""
|
| 47 |
+
prompt = self.refine_prompt_template.replace("${raw}", raw_text)
|
| 48 |
+
try:
|
| 49 |
+
return self.chat.answer(prompt)
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"[ReportGenerator.refine_section] Error during refinement: {e}")
|
| 52 |
+
return raw_text # fallback to raw
|
| 53 |
+
|
| 54 |
+
def generate_summaries(self, full_html: str, image_html: str) -> tuple[str, str]:
|
| 55 |
+
"""
|
| 56 |
+
Uses Gemini to generate:
|
| 57 |
+
- A human-readable overall summary
|
| 58 |
+
- A UI/UX & images analysis summary
|
| 59 |
+
"""
|
| 60 |
+
try:
|
| 61 |
+
overall_prompt = self.overall_summary_prompt.replace("${html}", full_html)
|
| 62 |
+
ux_prompt = self.ux_summary_prompt.replace("${html}", image_html)
|
| 63 |
+
print('full',full_html)
|
| 64 |
+
print('ux_prompt',image_html)
|
| 65 |
+
|
| 66 |
+
overall_md = self.chat.answer(overall_prompt)
|
| 67 |
+
ux_md = self.chat.answer(ux_prompt)
|
| 68 |
+
|
| 69 |
+
overall_html = markdown.markdown(overall_md)
|
| 70 |
+
ux_html = markdown.markdown(ux_md)
|
| 71 |
+
return overall_html, ux_html
|
| 72 |
+
except Exception as e:
|
| 73 |
+
print(f"[generate_summaries] Error: {e}")
|
| 74 |
+
return "", ""
|
| 75 |
+
|
| 76 |
+
def build(self, url: str, perf_text: str, seo_text: str, img_text: str) -> str:
|
| 77 |
+
"""
|
| 78 |
+
Main method to convert raw analysis text into clean HTML report.
|
| 79 |
+
Each section is refined via Gemini and rendered via markdown → Jinja2.
|
| 80 |
+
"""
|
| 81 |
+
# Refine via Gemini
|
| 82 |
+
refined_perf = self.refine_section(perf_text)
|
| 83 |
+
refined_seo = self.refine_section(seo_text)
|
| 84 |
+
refined_img = self.refine_section(img_text)
|
| 85 |
+
|
| 86 |
+
# Convert to HTML using markdown
|
| 87 |
+
perf_html = markdown.markdown(refined_perf, extensions=["tables", "fenced_code"])
|
| 88 |
+
seo_html = markdown.markdown(refined_seo, extensions=["tables", "fenced_code"])
|
| 89 |
+
img_html = markdown.markdown(refined_img, extensions=["tables", "fenced_code"])
|
| 90 |
+
|
| 91 |
+
# Generate summaries using refined HTML
|
| 92 |
+
full_html = perf_html + "\n\n" + seo_html + "\n\n" + img_html
|
| 93 |
+
overall_summary_html, ux_ui_html = self.generate_summaries(full_html, img_html)
|
| 94 |
+
|
| 95 |
+
# Inject into template
|
| 96 |
+
return self.template.render(
|
| 97 |
+
url=url,
|
| 98 |
+
timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 99 |
+
summary=overall_summary_html,
|
| 100 |
+
ux_summary=ux_ui_html,
|
| 101 |
+
performance=perf_html,
|
| 102 |
+
seo=seo_html,
|
| 103 |
+
images=img_html
|
| 104 |
+
)
|
report_prompt.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a frontend engineer and technical writer.
|
| 2 |
+
|
| 3 |
+
Your task is to convert the following raw diagnostic report into structured, clean text using markdown style. Specifically:
|
| 4 |
+
|
| 5 |
+
- If the text contains JSON or dicts, extract the key values and convert them into readable bullet points.
|
| 6 |
+
- Group data logically under subheadings (use `###`).
|
| 7 |
+
- Bold key numbers or metrics using `**bold**`.
|
| 8 |
+
- If the text is prose, clean it up for readability.
|
| 9 |
+
- Do not include raw JSON blocks or code fences.
|
| 10 |
+
|
| 11 |
+
=== BEGIN RAW REPORT ===
|
| 12 |
+
${raw}
|
| 13 |
+
=== END RAW REPORT ===
|
requirements.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
python-dotenv
|
| 3 |
+
google-cloud-aiplatform
|
| 4 |
+
google-generativeai
|
| 5 |
+
gradio
|
| 6 |
+
jinja2
|
| 7 |
+
pdfkit
|
| 8 |
+
requests
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# scraping enhancements:
|
| 12 |
+
beautifulsoup4
|
| 13 |
+
pandas
|
| 14 |
+
langchain-community
|
| 15 |
+
|
| 16 |
+
markdown
|
scraper_agent.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# scraper_agent.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import time
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
import requests
|
| 7 |
+
from bs4 import BeautifulSoup
|
| 8 |
+
|
| 9 |
+
load_dotenv(override=True)
|
| 10 |
+
|
| 11 |
+
class ScraperAgent:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.user_agent = os.getenv(
|
| 14 |
+
"SCRAPER_USER_AGENT",
|
| 15 |
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
| 16 |
+
"(KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"
|
| 17 |
+
)
|
| 18 |
+
self.timeout = int(os.getenv("SCRAPER_TIMEOUT", "10"))
|
| 19 |
+
self.delay = float(os.getenv("SCRAPER_DELAY", "0.5"))
|
| 20 |
+
|
| 21 |
+
def fetch(self, url: str) -> dict:
|
| 22 |
+
headers = {"User-Agent": self.user_agent}
|
| 23 |
+
resp = requests.get(url, headers=headers, timeout=self.timeout)
|
| 24 |
+
resp.raise_for_status()
|
| 25 |
+
|
| 26 |
+
html = resp.text
|
| 27 |
+
soup = BeautifulSoup(html, "html.parser")
|
| 28 |
+
|
| 29 |
+
images = [img["src"] for img in soup.find_all("img", src=True)]
|
| 30 |
+
body = soup.body.get_text("\n", strip=True) if soup.body else ""
|
| 31 |
+
|
| 32 |
+
time.sleep(self.delay)
|
| 33 |
+
|
| 34 |
+
return {
|
| 35 |
+
"title": soup.title.string if soup.title else "",
|
| 36 |
+
"html": html,
|
| 37 |
+
"images": images,
|
| 38 |
+
"text": body
|
| 39 |
+
}
|
| 40 |
+
def close(self):
|
| 41 |
+
"""
|
| 42 |
+
Clean up any resources.
|
| 43 |
+
No-op for requests-based scraper,
|
| 44 |
+
but lets pipeline always call scraper.close().
|
| 45 |
+
"""
|
| 46 |
+
pass
|
summary_prompt.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a friendly website audit assistant writing for a non-technical business audience.
|
| 2 |
+
|
| 3 |
+
Given the following website report sections (on performance, SEO, and images), write a short and clear **Overall Summary** that highlights:
|
| 4 |
+
|
| 5 |
+
- What’s working well
|
| 6 |
+
- What needs the most improvement
|
| 7 |
+
- Any major concerns to pay attention to
|
| 8 |
+
|
| 9 |
+
Avoid technical terms like "LCP" or "CLS" — instead, say things like “the site loads quickly” or “some images are too large.”
|
| 10 |
+
|
| 11 |
+
Keep it simple, helpful, and friendly. Imagine you’re explaining this to a founder or project manager with no technical background. The output should be desriptive report style.
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
### Website Summary HTML:
|
| 15 |
+
|
| 16 |
+
${html}
|
ui.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from pipeline import Pipeline
|
| 3 |
+
import asyncio
|
| 4 |
+
|
| 5 |
+
pipeline = Pipeline()
|
| 6 |
+
|
| 7 |
+
# Define a light, pastel color theme inspired by modern web design
|
| 8 |
+
theme = gr.themes.Soft(
|
| 9 |
+
primary_hue=gr.themes.colors.blue,
|
| 10 |
+
secondary_hue=gr.themes.colors.sky,
|
| 11 |
+
neutral_hue=gr.themes.colors.slate,
|
| 12 |
+
font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
|
| 13 |
+
).set(
|
| 14 |
+
# Body and background colors
|
| 15 |
+
body_background_fill="#f0f9ff", # A very light pastel blue
|
| 16 |
+
body_background_fill_dark="#020617",
|
| 17 |
+
|
| 18 |
+
# Card/Block styles
|
| 19 |
+
block_background_fill="white",
|
| 20 |
+
block_border_width="1px",
|
| 21 |
+
block_shadow="*shadow_drop_lg",
|
| 22 |
+
block_radius="*radius_xl",
|
| 23 |
+
|
| 24 |
+
# Button styles
|
| 25 |
+
button_primary_background_fill="*primary_500",
|
| 26 |
+
button_primary_background_fill_hover="*primary_600",
|
| 27 |
+
button_primary_text_color="white",
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# Custom CSS for finer control over the layout
|
| 31 |
+
custom_css = """
|
| 32 |
+
/* Make the container wider for a better desktop experience */
|
| 33 |
+
.gradio-container {
|
| 34 |
+
max-width: 90% !important;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
/* Vertically align the button with the textbox */
|
| 38 |
+
.input-row {
|
| 39 |
+
align-items: center;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
/* Add spacing between status box and report preview */
|
| 43 |
+
.gr-block > .gr-row > *:not(:last-child) {
|
| 44 |
+
margin-right: 2rem;
|
| 45 |
+
}
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
async def analyze(url: str):
|
| 50 |
+
if not url:
|
| 51 |
+
yield gr.update(value="❌ Please enter a URL."), gr.update(value="")
|
| 52 |
+
return
|
| 53 |
+
|
| 54 |
+
html_output = ""
|
| 55 |
+
|
| 56 |
+
async for status, payload in pipeline.run(url):
|
| 57 |
+
if payload and isinstance(payload, str) and payload.strip().startswith("<!DOCTYPE html>"):
|
| 58 |
+
html_output = payload
|
| 59 |
+
yield gr.update(value=status), gr.update(value=html_output)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def launch_ui():
|
| 63 |
+
"""
|
| 64 |
+
Launches the Gradio interface with the new pastel theme and improved layout.
|
| 65 |
+
"""
|
| 66 |
+
with gr.Blocks(theme=theme, css=custom_css, title="Website Analyzer") as demo:
|
| 67 |
+
# Main Title and Subtitle
|
| 68 |
+
gr.Markdown(
|
| 69 |
+
"""
|
| 70 |
+
<div style="text-align: center; padding: 2rem 0;">
|
| 71 |
+
<h1 style="font-size: 2.8rem; font-weight: 700; color: #1e3a8a;">📊 Website Intelligence Report</h1>
|
| 72 |
+
<p style="color: #475569; font-size: 1.1rem;">Enter any URL to generate a comprehensive analysis of its performance, SEO, and layout.</p>
|
| 73 |
+
</div>
|
| 74 |
+
"""
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# Input Row: URL Textbox and Analyze Button
|
| 78 |
+
with gr.Row(elem_classes="input-row"):
|
| 79 |
+
url_in = gr.Textbox(
|
| 80 |
+
label="Webpage URL",
|
| 81 |
+
placeholder="e.g., https://www.example.com",
|
| 82 |
+
scale=4, # Give more width to the textbox
|
| 83 |
+
container=False # Remove the container for better alignment
|
| 84 |
+
)
|
| 85 |
+
analyze_btn = gr.Button(
|
| 86 |
+
"Analyze 🚀",
|
| 87 |
+
variant="primary",
|
| 88 |
+
scale=1, # Give less width to the button
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Output Sections
|
| 92 |
+
status_out = gr.Textbox(
|
| 93 |
+
label="Analysis Progress",
|
| 94 |
+
interactive=False,
|
| 95 |
+
lines=1,
|
| 96 |
+
scale=1,
|
| 97 |
+
placeholder="Status updates will appear here...",
|
| 98 |
+
)
|
| 99 |
+
html_preview = gr.HTML(
|
| 100 |
+
label="Final Report Preview"
|
| 101 |
+
# scale=3
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
gr.Markdown(
|
| 105 |
+
"--- \n"
|
| 106 |
+
"<p style='text-align:center; color: #6b7280;'>💡 After analysis, use the <strong>Print Report</strong> button inside the preview to save as a PDF.</p>"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# Button Click Action
|
| 110 |
+
analyze_btn.click(
|
| 111 |
+
fn=analyze,
|
| 112 |
+
inputs=[url_in],
|
| 113 |
+
outputs=[status_out, html_preview],
|
| 114 |
+
show_progress="full",
|
| 115 |
+
concurrency_limit=1,
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
demo.launch(share=True)
|
| 119 |
+
if __name__ == "__main__":
|
| 120 |
+
launch_ui()
|
ui_ux_prompt.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a website experience reviewer writing for a non-technical client.
|
| 2 |
+
|
| 3 |
+
Using the image and layout section of this report, write a short and friendly **UX & Design Summary**.
|
| 4 |
+
|
| 5 |
+
Please include:
|
| 6 |
+
- Any problems with how the page looks or feels (like clutter, spacing, readability)
|
| 7 |
+
- Notes on how images are used (e.g., missing descriptions, too slow to load, poor quality)
|
| 8 |
+
- Suggestions for making the experience smoother on desktop or mobile
|
| 9 |
+
|
| 10 |
+
Avoid tech jargon. Use plain language like “some images don’t show descriptions” or “the layout could feel cleaner.”
|
| 11 |
+
|
| 12 |
+
Imagine you’re writing for someone who owns the website and just wants to know what to fix for better user experience.
|
| 13 |
+
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
### Image & Layout HTML Section:
|
| 17 |
+
|
| 18 |
+
${html}
|