| from flask import Flask, render_template, request, flash
|
| import requests
|
| from bs4 import BeautifulSoup
|
| import pandas as pd
|
| from urllib.parse import urlparse
|
|
|
| app = Flask(__name__)
|
| app.secret_key = "super_duper_secret_key"
|
|
|
| HEADERS = {
|
| "User-Agent": (
|
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| "AppleWebKit/537.36 (KHTML, like Gecko) "
|
| "Chrome/125.0.0.0 Safari/537.36"
|
| )
|
| }
|
|
|
|
|
| def is_valid_url(url: str) -> bool:
|
| """
|
| Validate URL format.
|
| """
|
| parsed = urlparse(url)
|
| return parsed.scheme in ("http", "https") and parsed.netloc
|
|
|
|
|
| @app.route("/", methods=["GET", "POST"])
|
| def index():
|
| table_html = None
|
|
|
| if request.method == "POST":
|
| url = request.form.get("url", "").strip()
|
|
|
| if not is_valid_url(url):
|
| flash("Invalid URL. Please enter a valid URL.", "danger")
|
| return render_template("index.html")
|
|
|
| try:
|
| response = requests.get(
|
| url,
|
| headers=HEADERS,
|
| timeout=10
|
| )
|
|
|
| response.raise_for_status()
|
|
|
| soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
|
| links = []
|
|
|
| for a in soup.find_all("a", href=True):
|
| href = a["href"].strip()
|
|
|
| if href:
|
| links.append({
|
| "Link": href
|
| })
|
|
|
| if not links:
|
| flash("No links found on the page.", "warning")
|
| return render_template("index.html")
|
|
|
|
|
| df = pd.DataFrame(links)
|
|
|
|
|
| df.drop_duplicates(inplace=True)
|
|
|
|
|
| table_html = df.to_html(
|
| classes="table table-bordered table-striped",
|
| index=False,
|
| escape=False
|
| )
|
|
|
| except requests.exceptions.Timeout:
|
| flash("Request timed out.", "danger")
|
|
|
| except requests.exceptions.ConnectionError:
|
| flash("Failed to connect to the website.", "danger")
|
|
|
| except requests.exceptions.HTTPError as e:
|
| flash(f"HTTP Error: {e}", "danger")
|
|
|
| except Exception as e:
|
| flash(f"Unexpected Error: {str(e)}", "danger")
|
|
|
| return render_template("index.html", table=table_html)
|
|
|
|
|
| if __name__ == "__main__":
|
| app.run(debug=True, host='0.0.0.0', port=7860) |