Tngarg commited on
Commit
db16232
·
verified ·
1 Parent(s): 2ee7f94

Upload 12 files

Browse files
app.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # app.py
2
+ from ui import launch_ui
3
+
4
+ if __name__ == "__main__":
5
+ launch_ui()
chat_wrapper.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # chat_model_wrapper.py
2
+
3
+ import os
4
+ import google.generativeai as genai
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv(override=True)
8
+
9
+ class ChatRefiner:
10
+ def __init__(self, model_name: str = "gemini-1.5-flash"):
11
+ """
12
+ Wraps the Gemini model for prompt→text generation.
13
+ """
14
+ api_key = os.getenv("GEMINI_API_KEY")
15
+ api_key="AIzaSyBzRMrMaL-s9y-8PJURGuMS3iH-3uuCyqs"
16
+ if not api_key:
17
+ raise ValueError("GEMINI_API_KEY not set in environment or .env file.")
18
+
19
+ genai.configure(api_key=api_key)
20
+ self.model = genai.GenerativeModel(model_name)
21
+
22
+ def answer(self, prompt: str) -> str:
23
+ """
24
+ Sends `prompt` to Gemini and returns the generated text.
25
+ """
26
+ try:
27
+ response = self.model.generate_content(prompt)
28
+ return response.text.strip()
29
+ except Exception as e:
30
+ # Log or handle as you prefer
31
+ print(f"[ChatRefiner.answer] Error: {e}")
32
+ raise
pdf_exporter.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pdfkit
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv(override = True)
6
+
7
+ class PDFExporter:
8
+ def __init__(self):
9
+ self.output_dir = os.getenv('OUTPUT_DIR', './out')
10
+ os.makedirs(self.output_dir, exist_ok=True)
11
+
12
+ # Optional: Load wkhtmltopdf path from env if not in system PATH
13
+ wkhtml_path = os.getenv("WKHTMLTOPDF_PATH")
14
+ if wkhtml_path and not os.path.isfile(wkhtml_path):
15
+ raise FileNotFoundError(f"wkhtmltopdf not found at: {wkhtml_path}")
16
+
17
+ self.config = pdfkit.configuration(wkhtmltopdf=wkhtml_path) if wkhtml_path else None
18
+
19
+ def export(self, html: str, name: str = "report") -> str:
20
+ out_path = os.path.join(self.output_dir, f"{name}.pdf")
21
+ try:
22
+ pdfkit.from_string(html, out_path, configuration=self.config)
23
+ return out_path
24
+ except Exception as e:
25
+ print(f"[PDFExporter.export] PDF generation failed: {e}")
26
+ raise
performance_agent.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # performance_agent.py
2
+
3
+ import os
4
+ import requests
5
+
6
+ class PerformanceAgent:
7
+ """
8
+ Wraps the PageSpeed Insights API to fetch
9
+ Lighthouse scores for a given URL.
10
+ On any network/API failure, returns all-100% defaults.
11
+ """
12
+
13
+ EXPECTED_CATEGORIES = ("performance", "accessibility", "best-practices", "seo")
14
+
15
+ def __init__(self):
16
+ self.api_key = os.getenv("PAGESPEED_API_KEY","AIzaSyAKyKYao-FVDmhqcwLojv6DDl7nALjwghg")
17
+ self.endpoint = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
18
+ if not self.api_key:
19
+ raise ValueError("PAGESPEED_API_KEY not set in .env")
20
+
21
+ def fetch_performance(self, url: str, strategy: str = "mobile") -> dict:
22
+ params = {
23
+ "url": url,
24
+ "strategy": strategy, # "mobile" or "desktop"
25
+ "key": self.api_key
26
+ }
27
+
28
+ try:
29
+ resp = requests.get(self.endpoint, params=params, timeout=30)
30
+ resp.raise_for_status()
31
+ data = resp.json()
32
+ lh = data.get("lighthouseResult", {})
33
+ cats = lh.get("categories", {})
34
+ audits = lh.get("audits", {})
35
+
36
+ # Build scores, default missing/unparsable → 100.0
37
+ scores = {}
38
+ for cat in self.EXPECTED_CATEGORIES:
39
+ raw = cats.get(cat, {}).get("score")
40
+ if isinstance(raw, (int, float)):
41
+ scores[cat] = round(raw * 100, 1)
42
+ else:
43
+ scores[cat] = 100.0
44
+
45
+ # Collect audits < 100% for suggestions
46
+ audit_suggestions = {}
47
+ for audit_id, info in audits.items():
48
+ sc = info.get("score")
49
+ if isinstance(sc, (int, float)) and sc < 1:
50
+ audit_suggestions[audit_id] = info.get("displayValue", "").strip()
51
+
52
+ return {
53
+ "scores": scores,
54
+ "audit_suggestions": audit_suggestions
55
+ }
56
+
57
+ except requests.exceptions.RequestException as e:
58
+ # Log the error and return an all-100% default, with no suggestions
59
+ print(f"[PerformanceAgent] PSI API error: {e}")
60
+ return {
61
+ "scores": {cat: 100.0 for cat in self.EXPECTED_CATEGORIES},
62
+ "audit_suggestions": {}
63
+ }
pipeline.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from scraper_agent import ScraperAgent
3
+ from seo_agent.seo_agent import SEOAgent
4
+ from performance_agent import PerformanceAgent
5
+ from report_generator import ReportGenerator
6
+ # from pdf_exporter import PDFExporter
7
+
8
+ class Pipeline:
9
+ def __init__(self):
10
+ self.scraper = ScraperAgent()
11
+ self.seo = SEOAgent()
12
+ self.perf = PerformanceAgent()
13
+ self.report = ReportGenerator()
14
+ # self.pdf = PDFExporter()
15
+
16
+ async def run(self, url: str):
17
+ yield "🔍 Scraping URL and Fetching Performance", None
18
+
19
+ # Run scraping + performance in parallel
20
+ page_task = asyncio.to_thread(self.scraper.fetch, url)
21
+ perf_task = asyncio.to_thread(self.perf.fetch_performance, url)
22
+ page, perf_data = await asyncio.gather(page_task, perf_task)
23
+
24
+ yield f"✔️ Title: {page['title']!r}", None
25
+ yield "✅ Performance metrics ready", perf_data
26
+
27
+ # Run SEO + Image in parallel (after scraping)
28
+ yield "⚙️ Running SEO & Image analysis…", None
29
+ seo_task = asyncio.to_thread(self.seo.analyze_seo, page["html"], page["text"])
30
+ img_task = asyncio.to_thread(self.seo.analyze_images, page["images"], page["text"])
31
+ seo_report, img_report = await asyncio.gather(seo_task, img_task)
32
+
33
+ yield "✅ SEO analysis complete", seo_report
34
+ yield "✅ Image analysis complete", img_report
35
+
36
+ # Generate report (sequential)
37
+ yield "📝 Generating report HTML…", None
38
+ html = self.report.build(url, str(perf_data), seo_report, img_report)
39
+ yield "✅ Report HTML ready", html
40
+
41
+ # Export PDF
42
+ # yield "📄 Exporting to PDF…", None
43
+ # pdf_path = self.pdf.export(html, name="final_report")
44
+ # yield "🎉 Done!", pdf_path
45
+
46
+ self.scraper.close()
report_generator.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime
3
+ from jinja2 import Environment, FileSystemLoader, select_autoescape
4
+ import markdown
5
+ from chat_wrapper import ChatRefiner
6
+
7
+ class ReportGenerator:
8
+ def __init__(self, template_path: str = None, prompt_path: str = None):
9
+ # Default template path
10
+ self.template_path = template_path or os.getenv("TEMPLATE_PATH", "./templates/report.html.j2")
11
+ if not os.path.isfile(self.template_path):
12
+ raise FileNotFoundError(f"Report template not found: {self.template_path}")
13
+
14
+ # Load Jinja2 environment
15
+ tpl_dir = os.path.dirname(self.template_path)
16
+ tpl_file = os.path.basename(self.template_path)
17
+ self.env = Environment(
18
+ loader=FileSystemLoader(tpl_dir),
19
+ autoescape=select_autoescape(['html', 'xml'])
20
+ )
21
+ self.template = self.env.get_template(tpl_file)
22
+
23
+ # Load refinement prompt
24
+ prompt_path = prompt_path or os.getenv("PROMPT_PATH", "./report_prompt.txt")
25
+ if not os.path.isfile(prompt_path):
26
+ raise FileNotFoundError(f"Refinement prompt template not found: {prompt_path}")
27
+ with open(prompt_path, encoding="utf-8") as f:
28
+ self.refine_prompt_template = f.read()
29
+
30
+ # Load additional prompts for summary and UX/UI
31
+ self.overall_summary_prompt = self._load_prompt("./summary_prompt.txt")
32
+ self.ux_summary_prompt = self._load_prompt("./ui_ux_prompt.txt")
33
+
34
+ # Initialize Gemini wrapper
35
+ self.chat = ChatRefiner(model_name="gemini-1.5-flash")
36
+
37
+ def _load_prompt(self, path: str) -> str:
38
+ if not os.path.isfile(path):
39
+ raise FileNotFoundError(f"Prompt file not found: {path}")
40
+ with open(path, encoding="utf-8") as f:
41
+ return f.read()
42
+
43
+ def refine_section(self, raw_text: str) -> str:
44
+ """
45
+ Uses Gemini to convert raw dict-like or prose analysis into markdown-style summary.
46
+ """
47
+ prompt = self.refine_prompt_template.replace("${raw}", raw_text)
48
+ try:
49
+ return self.chat.answer(prompt)
50
+ except Exception as e:
51
+ print(f"[ReportGenerator.refine_section] Error during refinement: {e}")
52
+ return raw_text # fallback to raw
53
+
54
+ def generate_summaries(self, full_html: str, image_html: str) -> tuple[str, str]:
55
+ """
56
+ Uses Gemini to generate:
57
+ - A human-readable overall summary
58
+ - A UI/UX & images analysis summary
59
+ """
60
+ try:
61
+ overall_prompt = self.overall_summary_prompt.replace("${html}", full_html)
62
+ ux_prompt = self.ux_summary_prompt.replace("${html}", image_html)
63
+ print('full',full_html)
64
+ print('ux_prompt',image_html)
65
+
66
+ overall_md = self.chat.answer(overall_prompt)
67
+ ux_md = self.chat.answer(ux_prompt)
68
+
69
+ overall_html = markdown.markdown(overall_md)
70
+ ux_html = markdown.markdown(ux_md)
71
+ return overall_html, ux_html
72
+ except Exception as e:
73
+ print(f"[generate_summaries] Error: {e}")
74
+ return "", ""
75
+
76
+ def build(self, url: str, perf_text: str, seo_text: str, img_text: str) -> str:
77
+ """
78
+ Main method to convert raw analysis text into clean HTML report.
79
+ Each section is refined via Gemini and rendered via markdown → Jinja2.
80
+ """
81
+ # Refine via Gemini
82
+ refined_perf = self.refine_section(perf_text)
83
+ refined_seo = self.refine_section(seo_text)
84
+ refined_img = self.refine_section(img_text)
85
+
86
+ # Convert to HTML using markdown
87
+ perf_html = markdown.markdown(refined_perf, extensions=["tables", "fenced_code"])
88
+ seo_html = markdown.markdown(refined_seo, extensions=["tables", "fenced_code"])
89
+ img_html = markdown.markdown(refined_img, extensions=["tables", "fenced_code"])
90
+
91
+ # Generate summaries using refined HTML
92
+ full_html = perf_html + "\n\n" + seo_html + "\n\n" + img_html
93
+ overall_summary_html, ux_ui_html = self.generate_summaries(full_html, img_html)
94
+
95
+ # Inject into template
96
+ return self.template.render(
97
+ url=url,
98
+ timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
99
+ summary=overall_summary_html,
100
+ ux_summary=ux_ui_html,
101
+ performance=perf_html,
102
+ seo=seo_html,
103
+ images=img_html
104
+ )
report_prompt.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a frontend engineer and technical writer.
2
+
3
+ Your task is to convert the following raw diagnostic report into structured, clean text using markdown style. Specifically:
4
+
5
+ - If the text contains JSON or dicts, extract the key values and convert them into readable bullet points.
6
+ - Group data logically under subheadings (use `###`).
7
+ - Bold key numbers or metrics using `**bold**`.
8
+ - If the text is prose, clean it up for readability.
9
+ - Do not include raw JSON blocks or code fences.
10
+
11
+ === BEGIN RAW REPORT ===
12
+ ${raw}
13
+ === END RAW REPORT ===
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ python-dotenv
3
+ google-cloud-aiplatform
4
+ google-generativeai
5
+ gradio
6
+ jinja2
7
+ pdfkit
8
+ requests
9
+
10
+
11
+ # scraping enhancements:
12
+ beautifulsoup4
13
+ pandas
14
+ langchain-community
15
+
16
+ markdown
scraper_agent.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # scraper_agent.py
2
+
3
+ import os
4
+ import time
5
+ from dotenv import load_dotenv
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+
9
+ load_dotenv(override=True)
10
+
11
+ class ScraperAgent:
12
+ def __init__(self):
13
+ self.user_agent = os.getenv(
14
+ "SCRAPER_USER_AGENT",
15
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
16
+ "(KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"
17
+ )
18
+ self.timeout = int(os.getenv("SCRAPER_TIMEOUT", "10"))
19
+ self.delay = float(os.getenv("SCRAPER_DELAY", "0.5"))
20
+
21
+ def fetch(self, url: str) -> dict:
22
+ headers = {"User-Agent": self.user_agent}
23
+ resp = requests.get(url, headers=headers, timeout=self.timeout)
24
+ resp.raise_for_status()
25
+
26
+ html = resp.text
27
+ soup = BeautifulSoup(html, "html.parser")
28
+
29
+ images = [img["src"] for img in soup.find_all("img", src=True)]
30
+ body = soup.body.get_text("\n", strip=True) if soup.body else ""
31
+
32
+ time.sleep(self.delay)
33
+
34
+ return {
35
+ "title": soup.title.string if soup.title else "",
36
+ "html": html,
37
+ "images": images,
38
+ "text": body
39
+ }
40
+ def close(self):
41
+ """
42
+ Clean up any resources.
43
+ No-op for requests-based scraper,
44
+ but lets pipeline always call scraper.close().
45
+ """
46
+ pass
summary_prompt.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a friendly website audit assistant writing for a non-technical business audience.
2
+
3
+ Given the following website report sections (on performance, SEO, and images), write a short and clear **Overall Summary** that highlights:
4
+
5
+ - What’s working well
6
+ - What needs the most improvement
7
+ - Any major concerns to pay attention to
8
+
9
+ Avoid technical terms like "LCP" or "CLS" — instead, say things like “the site loads quickly” or “some images are too large.”
10
+
11
+ Keep it simple, helpful, and friendly. Imagine you’re explaining this to a founder or project manager with no technical background. The output should be desriptive report style.
12
+ ---
13
+
14
+ ### Website Summary HTML:
15
+
16
+ ${html}
ui.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pipeline import Pipeline
3
+ import asyncio
4
+
5
+ pipeline = Pipeline()
6
+
7
+ # Define a light, pastel color theme inspired by modern web design
8
+ theme = gr.themes.Soft(
9
+ primary_hue=gr.themes.colors.blue,
10
+ secondary_hue=gr.themes.colors.sky,
11
+ neutral_hue=gr.themes.colors.slate,
12
+ font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
13
+ ).set(
14
+ # Body and background colors
15
+ body_background_fill="#f0f9ff", # A very light pastel blue
16
+ body_background_fill_dark="#020617",
17
+
18
+ # Card/Block styles
19
+ block_background_fill="white",
20
+ block_border_width="1px",
21
+ block_shadow="*shadow_drop_lg",
22
+ block_radius="*radius_xl",
23
+
24
+ # Button styles
25
+ button_primary_background_fill="*primary_500",
26
+ button_primary_background_fill_hover="*primary_600",
27
+ button_primary_text_color="white",
28
+ )
29
+
30
+ # Custom CSS for finer control over the layout
31
+ custom_css = """
32
+ /* Make the container wider for a better desktop experience */
33
+ .gradio-container {
34
+ max-width: 90% !important;
35
+ }
36
+
37
+ /* Vertically align the button with the textbox */
38
+ .input-row {
39
+ align-items: center;
40
+ }
41
+
42
+ /* Add spacing between status box and report preview */
43
+ .gr-block > .gr-row > *:not(:last-child) {
44
+ margin-right: 2rem;
45
+ }
46
+ """
47
+
48
+
49
+ async def analyze(url: str):
50
+ if not url:
51
+ yield gr.update(value="❌ Please enter a URL."), gr.update(value="")
52
+ return
53
+
54
+ html_output = ""
55
+
56
+ async for status, payload in pipeline.run(url):
57
+ if payload and isinstance(payload, str) and payload.strip().startswith("<!DOCTYPE html>"):
58
+ html_output = payload
59
+ yield gr.update(value=status), gr.update(value=html_output)
60
+
61
+
62
+ def launch_ui():
63
+ """
64
+ Launches the Gradio interface with the new pastel theme and improved layout.
65
+ """
66
+ with gr.Blocks(theme=theme, css=custom_css, title="Website Analyzer") as demo:
67
+ # Main Title and Subtitle
68
+ gr.Markdown(
69
+ """
70
+ <div style="text-align: center; padding: 2rem 0;">
71
+ <h1 style="font-size: 2.8rem; font-weight: 700; color: #1e3a8a;">📊 Website Intelligence Report</h1>
72
+ <p style="color: #475569; font-size: 1.1rem;">Enter any URL to generate a comprehensive analysis of its performance, SEO, and layout.</p>
73
+ </div>
74
+ """
75
+ )
76
+
77
+ # Input Row: URL Textbox and Analyze Button
78
+ with gr.Row(elem_classes="input-row"):
79
+ url_in = gr.Textbox(
80
+ label="Webpage URL",
81
+ placeholder="e.g., https://www.example.com",
82
+ scale=4, # Give more width to the textbox
83
+ container=False # Remove the container for better alignment
84
+ )
85
+ analyze_btn = gr.Button(
86
+ "Analyze 🚀",
87
+ variant="primary",
88
+ scale=1, # Give less width to the button
89
+ )
90
+
91
+ # Output Sections
92
+ status_out = gr.Textbox(
93
+ label="Analysis Progress",
94
+ interactive=False,
95
+ lines=1,
96
+ scale=1,
97
+ placeholder="Status updates will appear here...",
98
+ )
99
+ html_preview = gr.HTML(
100
+ label="Final Report Preview"
101
+ # scale=3
102
+ )
103
+
104
+ gr.Markdown(
105
+ "--- \n"
106
+ "<p style='text-align:center; color: #6b7280;'>💡 After analysis, use the <strong>Print Report</strong> button inside the preview to save as a PDF.</p>"
107
+ )
108
+
109
+ # Button Click Action
110
+ analyze_btn.click(
111
+ fn=analyze,
112
+ inputs=[url_in],
113
+ outputs=[status_out, html_preview],
114
+ show_progress="full",
115
+ concurrency_limit=1,
116
+ )
117
+
118
+ demo.launch(share=True)
119
+ if __name__ == "__main__":
120
+ launch_ui()
ui_ux_prompt.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a website experience reviewer writing for a non-technical client.
2
+
3
+ Using the image and layout section of this report, write a short and friendly **UX & Design Summary**.
4
+
5
+ Please include:
6
+ - Any problems with how the page looks or feels (like clutter, spacing, readability)
7
+ - Notes on how images are used (e.g., missing descriptions, too slow to load, poor quality)
8
+ - Suggestions for making the experience smoother on desktop or mobile
9
+
10
+ Avoid tech jargon. Use plain language like “some images don’t show descriptions” or “the layout could feel cleaner.”
11
+
12
+ Imagine you’re writing for someone who owns the website and just wants to know what to fix for better user experience.
13
+
14
+ ---
15
+
16
+ ### Image & Layout HTML Section:
17
+
18
+ ${html}