import time, re, socket
from contextlib import closing
import httpx
import gradio as gr
from bs4 import BeautifulSoup
import tldextract

TIMEOUT = 8.0
UA = "VAX-FakeFastLite/1.0 (+https://vibeaxis.com)"

EDGE_HINT_HEADERS = [
    "cf-cache-status","x-cache","x-cache-hits","x-ser","server-timing",
    "x-worker-cache","x-akamai-transformed","x-cdn","x-amz-cf-pop","via"
]

CACHE_HEADERS = [
    "cache-control","age","etag","last-modified","vary","expires","pragma","surrogate-control"
]

def first_byte_timing(url: str) -> float | None:
    """
    Approximate TTFB by timing a streamed GET until first chunk.
    This is not a lab-grade metric but it’s consistent enough for “fake-fast” sniffing.
    """
    try:
        with httpx.Client(http2=False, timeout=TIMEOUT, headers={"User-Agent": UA}, follow_redirects=True) as c:
            start = time.perf_counter()
            with c.stream("GET", url) as r:
                for chunk in r.iter_bytes():
                    if chunk:
                        return (time.perf_counter() - start) * 1000.0
    except Exception:
        return None

def fetch(url: str):
    if not re.match(r'^https?://', url, flags=re.I):
        url = "https://" + url

    errors = []
    try:
        with httpx.Client(http2=False, timeout=TIMEOUT, headers={"User-Agent": UA}, follow_redirects=True) as c:
            head = c.head(url)
            # fallback if HEAD blocked
            if head.status_code >= 400 or int(head.headers.get("content-length", "0")) == 0:
                head = c.get(url)
            html = c.get(url).text
    except Exception as e:
        return {"error": f"Request failed: {e}"}

    # Parse DOM counts
    scripts = re.findall(r"<script\b", html, flags=re.I)
    inlines = re.findall(r"<script\b[^>]*>(?!\s*</script>)", html, flags=re.I)
    css_links = re.findall(r"<link\b[^>]*rel=[\"']stylesheet[\"']", html, flags=re.I)

    soup = BeautifulSoup(html, "html.parser")
    title = (soup.title.string.strip() if soup.title and soup.title.string else "")

    # Edge vs origin hints
    edge = {}
    for h in EDGE_HINT_HEADERS:
        if h in head.headers:
            edge[h] = head.headers.get(h)

    cache = {}
    for h in CACHE_HEADERS:
        if h in head.headers:
            cache[h] = head.headers.get(h)

    # CDN-ish tell via "via" or known server banners
    via = head.headers.get("via", "") + " " + head.headers.get("server", "")
    cdnish = any(k in via.lower() for k in ["cloudflare","akamai","fastly","cache","cdn","cloudfront","varnish"])

    # TTFB approx
    ttfb_ms = first_byte_timing(url)

    # Heuristics
    script_count = len(scripts)
    inline_count = len(inlines)
    css_count = len(css_links)

    cache_control = head.headers.get("cache-control","").lower()
    max_age = None
    if "max-age=" in cache_control:
        try:
            max_age = int(re.search(r"max-age=(\d+)", cache_control).group(1))
        except Exception:
            max_age = None

    age_hdr = head.headers.get("age")
    try:
        age_val = int(age_hdr) if age_hdr is not None else None
    except:
        age_val = None

    cf_status = head.headers.get("cf-cache-status","").lower()

    # Verdict: “fake-fast-ish” if CDN says HIT but TTFB still chunky and DOM heavy
    flags = []
    if ttfb_ms is not None and ttfb_ms > 800:
        flags.append(f"High TTFB ({ttfb_ms:.0f} ms)")
    if cdnish and ("hit" in cf_status or (age_val and age_val > 0)):
        flags.append("Edge cache present")
    if script_count > 20:
        flags.append(f"Script buffet ({script_count})")
    if inline_count > 3:
        flags.append(f"Inline scripts ({inline_count})")
    if css_count > 6:
        flags.append(f"Many stylesheets ({css_count})")

    # “Fake-fast smell” score (0–100)
    score = 0
    if ttfb_ms is None:
        score += 15
    else:
        if ttfb_ms > 1500: score += 45
        elif ttfb_ms > 800: score += 30
        elif ttfb_ms > 400: score += 15
    if cdnish: score += 10
    if "hit" in cf_status: score += 10
    if script_count > 20: score += 15
    if inline_count > 3: score += 10
    if css_count > 6: score += 10
    if max_age is not None and max_age < 60: score += 10

    verdict = (
        "Likely Fake-Fast (edge cache masking slow origin)"
        if score >= 60 else
        "Borderline (mixed signals)"
        if score >= 35 else
        "Looks Clean (at a glance)"
    )

    return {
        "url": url,
        "title": title,
        "ttfb_ms": None if ttfb_ms is None else round(ttfb_ms, 1),
        "headers_edge": edge,
        "headers_cache": cache,
        "counts": {
            "scripts_total": script_count,
            "scripts_inline": inline_count,
            "stylesheets": css_count
        },
        "flags": flags,
        "score_0_100": score,
        "verdict": verdict,
    }

def ui_analyze(url):
    data = fetch(url)
    if "error" in data:
        return data["error"], None, None, None, None

    summary = f"**{data['verdict']}** — score {data['score_0_100']}/100"
    meta = f"**TTFB:** {data['ttfb_ms']} ms • **Scripts:** {data['counts']['scripts_total']} (inline {data['counts']['scripts_inline']}) • **CSS:** {data['counts']['stylesheets']}"
    edge = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_edge'] or {}).items()])
    cache = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_cache'] or {}).items()])
    flags = ", ".join(data["flags"]) if data["flags"] else "—"

    md_summary = f"{summary}\n\n{meta}\n\n**Signals:** {flags}"
    md_edge = edge or "—"
    md_cache = cache or "—"

    return md_summary, md_edge, md_cache, data["url"], data.get("title","")

with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")) as demo:
    gr.Markdown("# Fake-Fast Lite\n**Sniffs edge-cache lipstick on a slow origin.** Not a lab test, just fast receipts.")
    with gr.Row():
        url = gr.Textbox(label="URL", placeholder="https://example.com")
        go = gr.Button("Analyze", variant="primary")
    out_summary = gr.Markdown()
    with gr.Row():
        out_edge = gr.Markdown(label="Edge / CDN Hints")
        out_cache = gr.Markdown(label="Cache Headers")
    with gr.Row():
        out_url = gr.Textbox(label="Final URL", interactive=False)
        out_title = gr.Textbox(label="Page Title", interactive=False)

    go.click(fn=ui_analyze, inputs=url, outputs=[out_summary, out_edge, out_cache, out_url, out_title])

if __name__ == "__main__":
    demo.launch()
# Add near imports
import urllib.parse as urlparse
import gradio as gr

# Modify your ui_analyze signature to accept the request object
def ui_analyze(url, request: gr.Request):
    # Prefill from ?url= if textbox empty
    if (not url) and request and hasattr(request, "query_params"):
        url = request.query_params.get("url", "") or ""
    data = fetch(url)
    if "error" in data:
        return data["error"], None, None, url, "", "", ""
    summary = f"**{data['verdict']}** — score {data['score_0_100']}/100"
    meta = f"**TTFB:** {data['ttfb_ms']} ms • **Scripts:** {data['counts']['scripts_total']} (inline {data['counts']['scripts_inline']}) • **CSS:** {data['counts']['stylesheets']}"
    edge = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_edge'] or {}).items()]) or "—"
    cache = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_cache'] or {}).items()]) or "—"
    flags = ", ".join(data["flags"]) if data["flags"] else "—"
    md_summary = f"{summary}\n\n{meta}\n\n**Signals:** {flags}"

    # Build shareable link to this Space with ?url=
    base = "https://huggingface.co/spaces/vibeaxis/fake-fast-lite"
    share = f"{base}?url={urlparse.quote(data['url'] or '')}"

    # return: out_summary, out_edge, out_cache, out_url, out_title, hidden_copy_src, share_link
    return md_summary, edge, cache, data["url"], data.get("title",""), md_summary, share

with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="sky")) as demo:
    gr.Markdown("# Fake-Fast Lite\n**Sniffs edge-cache lipstick on a slow origin.** Not a lab test, just fast receipts.")

    with gr.Row():
        url = gr.Textbox(label="URL", placeholder="https://example.com")
        go = gr.Button("Analyze", variant="primary")

    out_summary = gr.Markdown()
    with gr.Row():
        out_edge = gr.Markdown(label="Edge / CDN Hints")
        out_cache = gr.Markdown(label="Cache Headers")
    with gr.Row():
        out_url = gr.Textbox(label="Final URL", interactive=False)
        out_title = gr.Textbox(label="Page Title", interactive=False)

    # Hidden textbox mirrors summary so we can copy via JS
    copy_src = gr.Textbox(visible=False)
    share_link = gr.Textbox(label="Share this result", interactive=False)

    # Copy button using front-end JS
    copy_btn = gr.Button("Copy summary")
    copy_btn.click(
        fn=None,
        inputs=copy_src,
        outputs=None,
        _js="(txt) => { navigator.clipboard.writeText(txt); return; }"
    )

    go.click(
        fn=ui_analyze,
        inputs=url,
        outputs=[out_summary, out_edge, out_cache, out_url, out_title, copy_src, share_link]
    )