File size: 2,557 Bytes
2b079e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f579252
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from flask import Flask, render_template, request, flash
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urlparse

app = Flask(__name__)
app.secret_key = "super_duper_secret_key"

HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/125.0.0.0 Safari/537.36"
    )
}


def is_valid_url(url: str) -> bool:
    """

    Validate URL format.

    """
    parsed = urlparse(url)
    return parsed.scheme in ("http", "https") and parsed.netloc


@app.route("/", methods=["GET", "POST"])
def index():
    table_html = None

    if request.method == "POST":
        url = request.form.get("url", "").strip()

        if not is_valid_url(url):
            flash("Invalid URL. Please enter a valid URL.", "danger")
            return render_template("index.html")

        try:
            response = requests.get(
                url,
                headers=HEADERS,
                timeout=10
            )

            response.raise_for_status()

            soup = BeautifulSoup(response.text, "html.parser")

            # Extract all links
            links = []

            for a in soup.find_all("a", href=True):
                href = a["href"].strip()

                if href:
                    links.append({
                        "Link": href
                    })

            if not links:
                flash("No links found on the page.", "warning")
                return render_template("index.html")

            # Convert to DataFrame
            df = pd.DataFrame(links)

            # Remove duplicates
            df.drop_duplicates(inplace=True)

            # Convert DataFrame to HTML
            table_html = df.to_html(
                classes="table table-bordered table-striped",
                index=False,
                escape=False
            )

        except requests.exceptions.Timeout:
            flash("Request timed out.", "danger")

        except requests.exceptions.ConnectionError:
            flash("Failed to connect to the website.", "danger")

        except requests.exceptions.HTTPError as e:
            flash(f"HTTP Error: {e}", "danger")

        except Exception as e:
            flash(f"Unexpected Error: {str(e)}", "danger")

    return render_template("index.html", table=table_html)


if __name__ == "__main__":
    app.run(debug=True, host='0.0.0.0', port=7860)