Fake-Fast-Lite / app.py
Vibeaxis's picture
Update app.py
9df2916 verified
import time, re, socket
from contextlib import closing
import httpx
import gradio as gr
from bs4 import BeautifulSoup
import tldextract
TIMEOUT = 8.0
UA = "VAX-FakeFastLite/1.0 (+https://vibeaxis.com)"
EDGE_HINT_HEADERS = [
"cf-cache-status","x-cache","x-cache-hits","x-ser","server-timing",
"x-worker-cache","x-akamai-transformed","x-cdn","x-amz-cf-pop","via"
]
CACHE_HEADERS = [
"cache-control","age","etag","last-modified","vary","expires","pragma","surrogate-control"
]
def first_byte_timing(url: str) -> float | None:
"""
Approximate TTFB by timing a streamed GET until first chunk.
This is not a lab-grade metric but it’s consistent enough for “fake-fast” sniffing.
"""
try:
with httpx.Client(http2=False, timeout=TIMEOUT, headers={"User-Agent": UA}, follow_redirects=True) as c:
start = time.perf_counter()
with c.stream("GET", url) as r:
for chunk in r.iter_bytes():
if chunk:
return (time.perf_counter() - start) * 1000.0
except Exception:
return None
def fetch(url: str):
if not re.match(r'^https?://', url, flags=re.I):
url = "https://" + url
errors = []
try:
with httpx.Client(http2=False, timeout=TIMEOUT, headers={"User-Agent": UA}, follow_redirects=True) as c:
head = c.head(url)
# fallback if HEAD blocked
if head.status_code >= 400 or int(head.headers.get("content-length", "0")) == 0:
head = c.get(url)
html = c.get(url).text
except Exception as e:
return {"error": f"Request failed: {e}"}
# Parse DOM counts
scripts = re.findall(r"<script\b", html, flags=re.I)
inlines = re.findall(r"<script\b[^>]*>(?!\s*</script>)", html, flags=re.I)
css_links = re.findall(r"<link\b[^>]*rel=[\"']stylesheet[\"']", html, flags=re.I)
soup = BeautifulSoup(html, "html.parser")
title = (soup.title.string.strip() if soup.title and soup.title.string else "")
# Edge vs origin hints
edge = {}
for h in EDGE_HINT_HEADERS:
if h in head.headers:
edge[h] = head.headers.get(h)
cache = {}
for h in CACHE_HEADERS:
if h in head.headers:
cache[h] = head.headers.get(h)
# CDN-ish tell via "via" or known server banners
via = head.headers.get("via", "") + " " + head.headers.get("server", "")
cdnish = any(k in via.lower() for k in ["cloudflare","akamai","fastly","cache","cdn","cloudfront","varnish"])
# TTFB approx
ttfb_ms = first_byte_timing(url)
# Heuristics
script_count = len(scripts)
inline_count = len(inlines)
css_count = len(css_links)
cache_control = head.headers.get("cache-control","").lower()
max_age = None
if "max-age=" in cache_control:
try:
max_age = int(re.search(r"max-age=(\d+)", cache_control).group(1))
except Exception:
max_age = None
age_hdr = head.headers.get("age")
try:
age_val = int(age_hdr) if age_hdr is not None else None
except:
age_val = None
cf_status = head.headers.get("cf-cache-status","").lower()
# Verdict: “fake-fast-ish” if CDN says HIT but TTFB still chunky and DOM heavy
flags = []
if ttfb_ms is not None and ttfb_ms > 800:
flags.append(f"High TTFB ({ttfb_ms:.0f} ms)")
if cdnish and ("hit" in cf_status or (age_val and age_val > 0)):
flags.append("Edge cache present")
if script_count > 20:
flags.append(f"Script buffet ({script_count})")
if inline_count > 3:
flags.append(f"Inline scripts ({inline_count})")
if css_count > 6:
flags.append(f"Many stylesheets ({css_count})")
# “Fake-fast smell” score (0–100)
score = 0
if ttfb_ms is None:
score += 15
else:
if ttfb_ms > 1500: score += 45
elif ttfb_ms > 800: score += 30
elif ttfb_ms > 400: score += 15
if cdnish: score += 10
if "hit" in cf_status: score += 10
if script_count > 20: score += 15
if inline_count > 3: score += 10
if css_count > 6: score += 10
if max_age is not None and max_age < 60: score += 10
verdict = (
"Likely Fake-Fast (edge cache masking slow origin)"
if score >= 60 else
"Borderline (mixed signals)"
if score >= 35 else
"Looks Clean (at a glance)"
)
return {
"url": url,
"title": title,
"ttfb_ms": None if ttfb_ms is None else round(ttfb_ms, 1),
"headers_edge": edge,
"headers_cache": cache,
"counts": {
"scripts_total": script_count,
"scripts_inline": inline_count,
"stylesheets": css_count
},
"flags": flags,
"score_0_100": score,
"verdict": verdict,
}
def ui_analyze(url):
data = fetch(url)
if "error" in data:
return data["error"], None, None, None, None
summary = f"**{data['verdict']}** — score {data['score_0_100']}/100"
meta = f"**TTFB:** {data['ttfb_ms']} ms • **Scripts:** {data['counts']['scripts_total']} (inline {data['counts']['scripts_inline']}) • **CSS:** {data['counts']['stylesheets']}"
edge = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_edge'] or {}).items()])
cache = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_cache'] or {}).items()])
flags = ", ".join(data["flags"]) if data["flags"] else "—"
md_summary = f"{summary}\n\n{meta}\n\n**Signals:** {flags}"
md_edge = edge or "—"
md_cache = cache or "—"
return md_summary, md_edge, md_cache, data["url"], data.get("title","")
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")) as demo:
gr.Markdown("# Fake-Fast Lite\n**Sniffs edge-cache lipstick on a slow origin.** Not a lab test, just fast receipts.")
with gr.Row():
url = gr.Textbox(label="URL", placeholder="https://example.com")
go = gr.Button("Analyze", variant="primary")
out_summary = gr.Markdown()
with gr.Row():
out_edge = gr.Markdown(label="Edge / CDN Hints")
out_cache = gr.Markdown(label="Cache Headers")
with gr.Row():
out_url = gr.Textbox(label="Final URL", interactive=False)
out_title = gr.Textbox(label="Page Title", interactive=False)
go.click(fn=ui_analyze, inputs=url, outputs=[out_summary, out_edge, out_cache, out_url, out_title])
if __name__ == "__main__":
demo.launch()
# Add near imports
import urllib.parse as urlparse
import gradio as gr
# Modify your ui_analyze signature to accept the request object
def ui_analyze(url, request: gr.Request):
# Prefill from ?url= if textbox empty
if (not url) and request and hasattr(request, "query_params"):
url = request.query_params.get("url", "") or ""
data = fetch(url)
if "error" in data:
return data["error"], None, None, url, "", "", ""
summary = f"**{data['verdict']}** — score {data['score_0_100']}/100"
meta = f"**TTFB:** {data['ttfb_ms']} ms • **Scripts:** {data['counts']['scripts_total']} (inline {data['counts']['scripts_inline']}) • **CSS:** {data['counts']['stylesheets']}"
edge = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_edge'] or {}).items()]) or "—"
cache = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_cache'] or {}).items()]) or "—"
flags = ", ".join(data["flags"]) if data["flags"] else "—"
md_summary = f"{summary}\n\n{meta}\n\n**Signals:** {flags}"
# Build shareable link to this Space with ?url=
base = "https://huggingface.co/spaces/vibeaxis/fake-fast-lite"
share = f"{base}?url={urlparse.quote(data['url'] or '')}"
# return: out_summary, out_edge, out_cache, out_url, out_title, hidden_copy_src, share_link
return md_summary, edge, cache, data["url"], data.get("title",""), md_summary, share
with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="sky")) as demo:
gr.Markdown("# Fake-Fast Lite\n**Sniffs edge-cache lipstick on a slow origin.** Not a lab test, just fast receipts.")
with gr.Row():
url = gr.Textbox(label="URL", placeholder="https://example.com")
go = gr.Button("Analyze", variant="primary")
out_summary = gr.Markdown()
with gr.Row():
out_edge = gr.Markdown(label="Edge / CDN Hints")
out_cache = gr.Markdown(label="Cache Headers")
with gr.Row():
out_url = gr.Textbox(label="Final URL", interactive=False)
out_title = gr.Textbox(label="Page Title", interactive=False)
# Hidden textbox mirrors summary so we can copy via JS
copy_src = gr.Textbox(visible=False)
share_link = gr.Textbox(label="Share this result", interactive=False)
# Copy button using front-end JS
copy_btn = gr.Button("Copy summary")
copy_btn.click(
fn=None,
inputs=copy_src,
outputs=None,
_js="(txt) => { navigator.clipboard.writeText(txt); return; }"
)
go.click(
fn=ui_analyze,
inputs=url,
outputs=[out_summary, out_edge, out_cache, out_url, out_title, copy_src, share_link]
)