from flask import Flask, render_template, request, flash import requests from bs4 import BeautifulSoup import pandas as pd from urllib.parse import urlparse app = Flask(__name__) app.secret_key = "super_duper_secret_key" HEADERS = { "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/125.0.0.0 Safari/537.36" ) } def is_valid_url(url: str) -> bool: """ Validate URL format. """ parsed = urlparse(url) return parsed.scheme in ("http", "https") and parsed.netloc @app.route("/", methods=["GET", "POST"]) def index(): table_html = None if request.method == "POST": url = request.form.get("url", "").strip() if not is_valid_url(url): flash("Invalid URL. Please enter a valid URL.", "danger") return render_template("index.html") try: response = requests.get( url, headers=HEADERS, timeout=10 ) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") # Extract all links links = [] for a in soup.find_all("a", href=True): href = a["href"].strip() if href: links.append({ "Link": href }) if not links: flash("No links found on the page.", "warning") return render_template("index.html") # Convert to DataFrame df = pd.DataFrame(links) # Remove duplicates df.drop_duplicates(inplace=True) # Convert DataFrame to HTML table_html = df.to_html( classes="table table-bordered table-striped", index=False, escape=False ) except requests.exceptions.Timeout: flash("Request timed out.", "danger") except requests.exceptions.ConnectionError: flash("Failed to connect to the website.", "danger") except requests.exceptions.HTTPError as e: flash(f"HTTP Error: {e}", "danger") except Exception as e: flash(f"Unexpected Error: {str(e)}", "danger") return render_template("index.html", table=table_html) if __name__ == "__main__": app.run(debug=True, host='0.0.0.0', port=7860)