import requests from bs4 import BeautifulSoup from urllib.parse import urlparse from .config_loader import load_app_config from .models import ResolvedLink APP_CONFIG = load_app_config() REQUESTS_CONFIG = APP_CONFIG["requests"] HEADERS = {"User-Agent": REQUESTS_CONFIG["user_agent"]} MAX_REDIRECTS = REQUESTS_CONFIG["max_redirects"] TIMEOUT = REQUESTS_CONFIG["timeout_seconds"] def resolve_link(url: str) -> ResolvedLink: try: parsed = urlparse(url) if parsed.scheme not in {"http", "https"} or not parsed.netloc: raise ValueError("Invalid URL format") with requests.Session() as session: session.max_redirects = MAX_REDIRECTS r = session.get(url, headers=HEADERS, timeout=TIMEOUT, allow_redirects=True) chain = [h.url for h in r.history] final_url = r.url domain = urlparse(final_url).netloc.lower().replace("www.", "") title = None if "text/html" in r.headers.get("Content-Type", ""): soup = BeautifulSoup(r.text, "html.parser") title_tag = soup.title title = title_tag.text.strip() if title_tag else None return ResolvedLink( original_url=url, final_url=final_url, final_domain=domain, status_code=r.status_code, redirect_chain=chain, title=title, error=None, ) except Exception as e: return ResolvedLink( original_url=url, final_url=None, final_domain=None, status_code=None, redirect_chain=[], title=None, error=str(e), )