Spaces:

Hammad712
/

website-audit

Sleeping

App Files Files Community

website-audit / main.py

Hammad712

Update main.py

809d658 verified 9 months ago

raw

history blame contribute delete

7.52 kB

	import re
	import os
	import json
	import requests
	import google.generativeai as genai
	from reportlab.lib.pagesizes import letter
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, ListFlowable, ListItem
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from fastapi import FastAPI, HTTPException, Query
	from fastapi.responses import FileResponse

	# ----------------------------
	# Helper functions (provided code)
	# ----------------------------

	def convert_markdown(text):
	"""
	Convert simple markdown bold markers text to HTML <b> tags.
	"""
	return re.sub(r"\\(.?)\\*", r"<b>\1</b>", text)

	def parse_report_text(report_text):
	"""
	Parse the generated report text and return a list of ReportLab flowables
	with proper headings, subheadings, and bullet-point formatting.

	Conventions used:
	- A line that starts and ends with '**' is treated as a heading.
	- Lines starting with '*' are treated as bullet items.
	- All other nonempty lines are treated as normal paragraphs.
	"""
	styles = getSampleStyleSheet()
	# You can tweak these styles if needed.
	heading_style = styles["Heading1"]
	subheading_style = styles["Heading2"]
	normal_style = styles["BodyText"]
	bullet_style = styles["Bullet"]

	flowables = []
	bullet_items = [] # temporary list for bullet items

	# Split the report text into lines.
	lines = report_text.splitlines()
	for line in lines:
	line = line.strip()
	# If the line is empty, flush any bullet list and add a spacer.
	if not line:
	if bullet_items:
	flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))
	bullet_items = []
	flowables.append(Spacer(1, 12))
	continue

	# Check if line is a bullet point (starts with '*')
	if line.startswith("*"):
	bullet_text = line.lstrip("*").strip()
	bullet_text = convert_markdown(bullet_text)
	bullet_paragraph = Paragraph(bullet_text, bullet_style)
	bullet_items.append(bullet_paragraph)
	# Check if the entire line is a heading (starts and ends with '**')
	elif line.startswith("") and line.endswith(""):
	if bullet_items:
	flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))
	bullet_items = []
	# Remove the asterisks and convert markdown if needed.
	heading_text = convert_markdown(line.strip("*").strip())
	flowables.append(Paragraph(heading_text, heading_style))
	else:
	if bullet_items:
	flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))
	bullet_items = []
	# Otherwise, treat as normal paragraph.
	normal_text = convert_markdown(line)
	flowables.append(Paragraph(normal_text, normal_style))

	# Flush any remaining bullet items.
	if bullet_items:
	flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))

	return flowables

	def get_pagespeed_data(target_url, pagespeed_api_key):
	"""
	Fetch data from the PageSpeed Insights API for the given URL.
	"""
	endpoint = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
	params = {
	"url": target_url,
	"key": pagespeed_api_key
	}
	response = requests.get(endpoint, params=params)
	if response.status_code == 200:
	return response.json()
	else:
	raise Exception(f"Error fetching PageSpeed data: {response.status_code} - {response.text}")

	def generate_report_with_gemini(pagespeed_data, gemini_api_key):
	"""
	Uses the Gemini model to generate a detailed report based on the PageSpeed Insights data.
	"""
	# Configure the Gemini API with the provided key
	genai.configure(api_key=gemini_api_key)

	# Select a Gemini model. For this example, we use 'gemini-2.0-flash'.
	model = genai.GenerativeModel("gemini-2.0-flash")

	# Prepare the prompt including the pretty-printed JSON.
	prompt = (
	"Please generate a detailed and well-structured report on the website performance based "
	"on the following PageSpeed Insights data. Organize the report with clear headings, do not generate tables in the report "
	"subheadings, and bullet points for key metrics and recommendations:\n\n" +
	json.dumps(pagespeed_data, indent=2)
	)

	# Generate the content using Gemini.
	response = model.generate_content(prompt)

	if response and hasattr(response, "text"):
	return response.text
	else:
	return "No report could be generated."

	def save_report_to_pdf(report_text, pdf_filename="pagespeed_report.pdf"):
	"""
	Format the report text into headings, subheadings, and bullet points, then save it as a PDF.
	"""
	# Create a SimpleDocTemplate for the PDF
	doc = SimpleDocTemplate(pdf_filename, pagesize=letter,
	rightMargin=72, leftMargin=72,
	topMargin=72, bottomMargin=72)

	# Parse the report text into platypus flowables
	flowables = parse_report_text(report_text)

	# Build the PDF document.
	doc.build(flowables)
	print(f"Report saved as {pdf_filename}")

	# ----------------------------
	# FastAPI App and Endpoints
	# ----------------------------
	app = FastAPI()

	# Load API keys from environment variables.
	PAGESPEED_API_KEY = os.environ.get("PAGESPEED_API_KEY")
	GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
	PDF_FILENAME = "pagespeed_report.pdf"

	if not PAGESPEED_API_KEY or not GEMINI_API_KEY:
	raise Exception("Please set the PAGESPEED_API_KEY and GEMINI_API_KEY environment variables.")

	@app.get("/")
	def read_root():
	"""
	Root endpoint providing basic information.
	"""
	return {
	"message": "Welcome to the PageSpeed Insights Report Generator API. "
	"Use /generate_report?url=<target_url> (POST) to generate a report and /download_pdf (GET) to download the PDF."
	}

	@app.post("/generate_report")
	def generate_report(url: str = Query(..., description="The target URL for which to generate the report")):
	"""
	Generates the report by:
	1. Fetching PageSpeed Insights data for the given URL.
	2. Generating a report via the Gemini API.
	3. Saving the report as a PDF.
	Returns the generated report text.
	"""
	try:
	# Step 1: Fetch PageSpeed data for the provided URL.
	pagespeed_data = get_pagespeed_data(url, PAGESPEED_API_KEY)
	# Step 2: Generate report using the Gemini API.
	report_text = generate_report_with_gemini(pagespeed_data, GEMINI_API_KEY)
	# Step 3: Save the report as a PDF.
	save_report_to_pdf(report_text, pdf_filename=PDF_FILENAME)
	return {"report": report_text}
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/download_pdf")
	def download_pdf():
	"""
	Returns the generated PDF file for download.
	Make sure you have already called /generate_report.
	"""
	if not os.path.exists(PDF_FILENAME):
	raise HTTPException(status_code=404, detail="PDF report not found. Please generate the report first.")
	return FileResponse(PDF_FILENAME, media_type="application/pdf", filename=PDF_FILENAME)

	# To run the app, use the command:
	# uvicorn main:app --reload