import time, re, socket from contextlib import closing import httpx import gradio as gr from bs4 import BeautifulSoup import tldextract TIMEOUT = 8.0 UA = "VAX-FakeFastLite/1.0 (+https://vibeaxis.com)" EDGE_HINT_HEADERS = [ "cf-cache-status","x-cache","x-cache-hits","x-ser","server-timing", "x-worker-cache","x-akamai-transformed","x-cdn","x-amz-cf-pop","via" ] CACHE_HEADERS = [ "cache-control","age","etag","last-modified","vary","expires","pragma","surrogate-control" ] def first_byte_timing(url: str) -> float | None: """ Approximate TTFB by timing a streamed GET until first chunk. This is not a lab-grade metric but it’s consistent enough for “fake-fast” sniffing. """ try: with httpx.Client(http2=False, timeout=TIMEOUT, headers={"User-Agent": UA}, follow_redirects=True) as c: start = time.perf_counter() with c.stream("GET", url) as r: for chunk in r.iter_bytes(): if chunk: return (time.perf_counter() - start) * 1000.0 except Exception: return None def fetch(url: str): if not re.match(r'^https?://', url, flags=re.I): url = "https://" + url errors = [] try: with httpx.Client(http2=False, timeout=TIMEOUT, headers={"User-Agent": UA}, follow_redirects=True) as c: head = c.head(url) # fallback if HEAD blocked if head.status_code >= 400 or int(head.headers.get("content-length", "0")) == 0: head = c.get(url) html = c.get(url).text except Exception as e: return {"error": f"Request failed: {e}"} # Parse DOM counts scripts = re.findall(r"]*>(?!\s*)", html, flags=re.I) css_links = re.findall(r"]*rel=[\"']stylesheet[\"']", html, flags=re.I) soup = BeautifulSoup(html, "html.parser") title = (soup.title.string.strip() if soup.title and soup.title.string else "") # Edge vs origin hints edge = {} for h in EDGE_HINT_HEADERS: if h in head.headers: edge[h] = head.headers.get(h) cache = {} for h in CACHE_HEADERS: if h in head.headers: cache[h] = head.headers.get(h) # CDN-ish tell via "via" or known server banners via = head.headers.get("via", "") + " " + head.headers.get("server", "") cdnish = any(k in via.lower() for k in ["cloudflare","akamai","fastly","cache","cdn","cloudfront","varnish"]) # TTFB approx ttfb_ms = first_byte_timing(url) # Heuristics script_count = len(scripts) inline_count = len(inlines) css_count = len(css_links) cache_control = head.headers.get("cache-control","").lower() max_age = None if "max-age=" in cache_control: try: max_age = int(re.search(r"max-age=(\d+)", cache_control).group(1)) except Exception: max_age = None age_hdr = head.headers.get("age") try: age_val = int(age_hdr) if age_hdr is not None else None except: age_val = None cf_status = head.headers.get("cf-cache-status","").lower() # Verdict: “fake-fast-ish” if CDN says HIT but TTFB still chunky and DOM heavy flags = [] if ttfb_ms is not None and ttfb_ms > 800: flags.append(f"High TTFB ({ttfb_ms:.0f} ms)") if cdnish and ("hit" in cf_status or (age_val and age_val > 0)): flags.append("Edge cache present") if script_count > 20: flags.append(f"Script buffet ({script_count})") if inline_count > 3: flags.append(f"Inline scripts ({inline_count})") if css_count > 6: flags.append(f"Many stylesheets ({css_count})") # “Fake-fast smell” score (0–100) score = 0 if ttfb_ms is None: score += 15 else: if ttfb_ms > 1500: score += 45 elif ttfb_ms > 800: score += 30 elif ttfb_ms > 400: score += 15 if cdnish: score += 10 if "hit" in cf_status: score += 10 if script_count > 20: score += 15 if inline_count > 3: score += 10 if css_count > 6: score += 10 if max_age is not None and max_age < 60: score += 10 verdict = ( "Likely Fake-Fast (edge cache masking slow origin)" if score >= 60 else "Borderline (mixed signals)" if score >= 35 else "Looks Clean (at a glance)" ) return { "url": url, "title": title, "ttfb_ms": None if ttfb_ms is None else round(ttfb_ms, 1), "headers_edge": edge, "headers_cache": cache, "counts": { "scripts_total": script_count, "scripts_inline": inline_count, "stylesheets": css_count }, "flags": flags, "score_0_100": score, "verdict": verdict, } def ui_analyze(url): data = fetch(url) if "error" in data: return data["error"], None, None, None, None summary = f"**{data['verdict']}** — score {data['score_0_100']}/100" meta = f"**TTFB:** {data['ttfb_ms']} ms • **Scripts:** {data['counts']['scripts_total']} (inline {data['counts']['scripts_inline']}) • **CSS:** {data['counts']['stylesheets']}" edge = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_edge'] or {}).items()]) cache = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_cache'] or {}).items()]) flags = ", ".join(data["flags"]) if data["flags"] else "—" md_summary = f"{summary}\n\n{meta}\n\n**Signals:** {flags}" md_edge = edge or "—" md_cache = cache or "—" return md_summary, md_edge, md_cache, data["url"], data.get("title","") with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")) as demo: gr.Markdown("# Fake-Fast Lite\n**Sniffs edge-cache lipstick on a slow origin.** Not a lab test, just fast receipts.") with gr.Row(): url = gr.Textbox(label="URL", placeholder="https://example.com") go = gr.Button("Analyze", variant="primary") out_summary = gr.Markdown() with gr.Row(): out_edge = gr.Markdown(label="Edge / CDN Hints") out_cache = gr.Markdown(label="Cache Headers") with gr.Row(): out_url = gr.Textbox(label="Final URL", interactive=False) out_title = gr.Textbox(label="Page Title", interactive=False) go.click(fn=ui_analyze, inputs=url, outputs=[out_summary, out_edge, out_cache, out_url, out_title]) if __name__ == "__main__": demo.launch() # Add near imports import urllib.parse as urlparse import gradio as gr # Modify your ui_analyze signature to accept the request object def ui_analyze(url, request: gr.Request): # Prefill from ?url= if textbox empty if (not url) and request and hasattr(request, "query_params"): url = request.query_params.get("url", "") or "" data = fetch(url) if "error" in data: return data["error"], None, None, url, "", "", "" summary = f"**{data['verdict']}** — score {data['score_0_100']}/100" meta = f"**TTFB:** {data['ttfb_ms']} ms • **Scripts:** {data['counts']['scripts_total']} (inline {data['counts']['scripts_inline']}) • **CSS:** {data['counts']['stylesheets']}" edge = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_edge'] or {}).items()]) or "—" cache = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_cache'] or {}).items()]) or "—" flags = ", ".join(data["flags"]) if data["flags"] else "—" md_summary = f"{summary}\n\n{meta}\n\n**Signals:** {flags}" # Build shareable link to this Space with ?url= base = "https://huggingface.co/spaces/vibeaxis/fake-fast-lite" share = f"{base}?url={urlparse.quote(data['url'] or '')}" # return: out_summary, out_edge, out_cache, out_url, out_title, hidden_copy_src, share_link return md_summary, edge, cache, data["url"], data.get("title",""), md_summary, share with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="sky")) as demo: gr.Markdown("# Fake-Fast Lite\n**Sniffs edge-cache lipstick on a slow origin.** Not a lab test, just fast receipts.") with gr.Row(): url = gr.Textbox(label="URL", placeholder="https://example.com") go = gr.Button("Analyze", variant="primary") out_summary = gr.Markdown() with gr.Row(): out_edge = gr.Markdown(label="Edge / CDN Hints") out_cache = gr.Markdown(label="Cache Headers") with gr.Row(): out_url = gr.Textbox(label="Final URL", interactive=False) out_title = gr.Textbox(label="Page Title", interactive=False) # Hidden textbox mirrors summary so we can copy via JS copy_src = gr.Textbox(visible=False) share_link = gr.Textbox(label="Share this result", interactive=False) # Copy button using front-end JS copy_btn = gr.Button("Copy summary") copy_btn.click( fn=None, inputs=copy_src, outputs=None, _js="(txt) => { navigator.clipboard.writeText(txt); return; }" ) go.click( fn=ui_analyze, inputs=url, outputs=[out_summary, out_edge, out_cache, out_url, out_title, copy_src, share_link] )