Vibeaxis commited on
Commit
6544d63
·
verified ·
1 Parent(s): 2de638d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -0
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time, re, socket
2
+ from contextlib import closing
3
+ import httpx
4
+ import gradio as gr
5
+ from bs4 import BeautifulSoup
6
+ import tldextract
7
+
8
+ TIMEOUT = 8.0
9
+ UA = "VAX-FakeFastLite/1.0 (+https://vibeaxis.com)"
10
+
11
+ EDGE_HINT_HEADERS = [
12
+ "cf-cache-status","x-cache","x-cache-hits","x-ser","server-timing",
13
+ "x-worker-cache","x-akamai-transformed","x-cdn","x-amz-cf-pop","via"
14
+ ]
15
+
16
+ CACHE_HEADERS = [
17
+ "cache-control","age","etag","last-modified","vary","expires","pragma","surrogate-control"
18
+ ]
19
+
20
+ def first_byte_timing(url: str) -> float | None:
21
+ """
22
+ Approximate TTFB by timing a streamed GET until first chunk.
23
+ This is not a lab-grade metric but it’s consistent enough for “fake-fast” sniffing.
24
+ """
25
+ try:
26
+ with httpx.Client(http2=False, timeout=TIMEOUT, headers={"User-Agent": UA}, follow_redirects=True) as c:
27
+ start = time.perf_counter()
28
+ with c.stream("GET", url) as r:
29
+ for chunk in r.iter_bytes():
30
+ if chunk:
31
+ return (time.perf_counter() - start) * 1000.0
32
+ except Exception:
33
+ return None
34
+
35
+ def fetch(url: str):
36
+ if not re.match(r'^https?://', url, flags=re.I):
37
+ url = "https://" + url
38
+
39
+ errors = []
40
+ try:
41
+ with httpx.Client(http2=False, timeout=TIMEOUT, headers={"User-Agent": UA}, follow_redirects=True) as c:
42
+ head = c.head(url)
43
+ # fallback if HEAD blocked
44
+ if head.status_code >= 400 or int(head.headers.get("content-length", "0")) == 0:
45
+ head = c.get(url)
46
+ html = c.get(url).text
47
+ except Exception as e:
48
+ return {"error": f"Request failed: {e}"}
49
+
50
+ # Parse DOM counts
51
+ scripts = re.findall(r"<script\b", html, flags=re.I)
52
+ inlines = re.findall(r"<script\b[^>]*>(?!\s*</script>)", html, flags=re.I)
53
+ css_links = re.findall(r"<link\b[^>]*rel=[\"']stylesheet[\"']", html, flags=re.I)
54
+
55
+ soup = BeautifulSoup(html, "lxml")
56
+ title = (soup.title.string.strip() if soup.title and soup.title.string else "")
57
+
58
+ # Edge vs origin hints
59
+ edge = {}
60
+ for h in EDGE_HINT_HEADERS:
61
+ if h in head.headers:
62
+ edge[h] = head.headers.get(h)
63
+
64
+ cache = {}
65
+ for h in CACHE_HEADERS:
66
+ if h in head.headers:
67
+ cache[h] = head.headers.get(h)
68
+
69
+ # CDN-ish tell via "via" or known server banners
70
+ via = head.headers.get("via", "") + " " + head.headers.get("server", "")
71
+ cdnish = any(k in via.lower() for k in ["cloudflare","akamai","fastly","cache","cdn","cloudfront","varnish"])
72
+
73
+ # TTFB approx
74
+ ttfb_ms = first_byte_timing(url)
75
+
76
+ # Heuristics
77
+ script_count = len(scripts)
78
+ inline_count = len(inlines)
79
+ css_count = len(css_links)
80
+
81
+ cache_control = head.headers.get("cache-control","").lower()
82
+ max_age = None
83
+ if "max-age=" in cache_control:
84
+ try:
85
+ max_age = int(re.search(r"max-age=(\d+)", cache_control).group(1))
86
+ except Exception:
87
+ max_age = None
88
+
89
+ age_hdr = head.headers.get("age")
90
+ try:
91
+ age_val = int(age_hdr) if age_hdr is not None else None
92
+ except:
93
+ age_val = None
94
+
95
+ cf_status = head.headers.get("cf-cache-status","").lower()
96
+
97
+ # Verdict: “fake-fast-ish” if CDN says HIT but TTFB still chunky and DOM heavy
98
+ flags = []
99
+ if ttfb_ms is not None and ttfb_ms > 800:
100
+ flags.append(f"High TTFB ({ttfb_ms:.0f} ms)")
101
+ if cdnish and ("hit" in cf_status or (age_val and age_val > 0)):
102
+ flags.append("Edge cache present")
103
+ if script_count > 20:
104
+ flags.append(f"Script buffet ({script_count})")
105
+ if inline_count > 3:
106
+ flags.append(f"Inline scripts ({inline_count})")
107
+ if css_count > 6:
108
+ flags.append(f"Many stylesheets ({css_count})")
109
+
110
+ # “Fake-fast smell” score (0–100)
111
+ score = 0
112
+ if ttfb_ms is None:
113
+ score += 15
114
+ else:
115
+ if ttfb_ms > 1500: score += 45
116
+ elif ttfb_ms > 800: score += 30
117
+ elif ttfb_ms > 400: score += 15
118
+ if cdnish: score += 10
119
+ if "hit" in cf_status: score += 10
120
+ if script_count > 20: score += 15
121
+ if inline_count > 3: score += 10
122
+ if css_count > 6: score += 10
123
+ if max_age is not None and max_age < 60: score += 10
124
+
125
+ verdict = (
126
+ "Likely Fake-Fast (edge cache masking slow origin)"
127
+ if score >= 60 else
128
+ "Borderline (mixed signals)"
129
+ if score >= 35 else
130
+ "Looks Clean (at a glance)"
131
+ )
132
+
133
+ return {
134
+ "url": url,
135
+ "title": title,
136
+ "ttfb_ms": None if ttfb_ms is None else round(ttfb_ms, 1),
137
+ "headers_edge": edge,
138
+ "headers_cache": cache,
139
+ "counts": {
140
+ "scripts_total": script_count,
141
+ "scripts_inline": inline_count,
142
+ "stylesheets": css_count
143
+ },
144
+ "flags": flags,
145
+ "score_0_100": score,
146
+ "verdict": verdict,
147
+ }
148
+
149
+ def ui_analyze(url):
150
+ data = fetch(url)
151
+ if "error" in data:
152
+ return data["error"], None, None, None, None
153
+
154
+ summary = f"**{data['verdict']}** — score {data['score_0_100']}/100"
155
+ meta = f"**TTFB:** {data['ttfb_ms']} ms • **Scripts:** {data['counts']['scripts_total']} (inline {data['counts']['scripts_inline']}) • **CSS:** {data['counts']['stylesheets']}"
156
+ edge = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_edge'] or {}).items()])
157
+ cache = "\n".join([f"- **{k}:** {v}" for k,v in (data['headers_cache'] or {}).items()])
158
+ flags = ", ".join(data["flags"]) if data["flags"] else "—"
159
+
160
+ md_summary = f"{summary}\n\n{meta}\n\n**Signals:** {flags}"
161
+ md_edge = edge or "—"
162
+ md_cache = cache or "—"
163
+
164
+ return md_summary, md_edge, md_cache, data["url"], data.get("title","")
165
+
166
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="mint", secondary_hue="sky")) as demo:
167
+ gr.Markdown("# Fake-Fast Lite\n**Sniffs edge-cache lipstick on a slow origin.** Not a lab test, just fast receipts.")
168
+ with gr.Row():
169
+ url = gr.Textbox(label="URL", placeholder="https://example.com")
170
+ go = gr.Button("Analyze", variant="primary")
171
+ out_summary = gr.Markdown()
172
+ with gr.Row():
173
+ out_edge = gr.Markdown(label="Edge / CDN Hints")
174
+ out_cache = gr.Markdown(label="Cache Headers")
175
+ with gr.Row():
176
+ out_url = gr.Textbox(label="Final URL", interactive=False)
177
+ out_title = gr.Textbox(label="Page Title", interactive=False)
178
+
179
+ go.click(fn=ui_analyze, inputs=url, outputs=[out_summary, out_edge, out_cache, out_url, out_title])
180
+
181
+ if __name__ == "__main__":
182
+ demo.launch()