File size: 1,894 Bytes
27697ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from bs4 import BeautifulSoup
import time
from utils import safe_request

def scan_website(url):
    data = {}

    # Measure total load time including HTTP request
    start = time.time()
    response = safe_request(url)
    if not response:
        return {"error": "Unable to fetch URL", "score": 0}

    soup = BeautifulSoup(response.text, "html.parser")
    load_time = round(time.time() - start, 2)

    # Page size in MB
    page_size_mb = len(response.content) / (1024*1024)

    # Count internal vs external links
    internal_links = 0
    external_links = 0
    for link in soup.find_all("a", href=True):
        href = link.get("href")
        if href.startswith("http") and url.split("//")[1] in href:
            internal_links += 1
        elif href.startswith("http"):
            external_links += 1

    # Heading counts
    headings_count = {
        "H1": len(soup.find_all("h1")),
        "H2": len(soup.find_all("h2")),
        "H3": len(soup.find_all("h3"))
    }

    data.update({
        "status_code": response.status_code,
        "load_time": load_time,
        "https": url.startswith("https"),
        "title": soup.title.string if soup.title else "Missing",
        "meta_description": bool(soup.find("meta", attrs={"name": "description"})),
        "h1_count": headings_count["H1"],
        "h2_count": headings_count["H2"],
        "h3_count": headings_count["H3"],
        "headings_count": headings_count,
        "images_without_alt": len([img for img in soup.find_all("img") if not img.get("alt")]),
        "links_count": len(soup.find_all("a")),
        "internal_links": internal_links,
        "external_links": external_links,
        "scripts_count": len(soup.find_all("script")),
        "paragraph_count": len(soup.find_all("p")),
        "page_size_mb": page_size_mb
    })

    return data