jsbrowser / app.py
wuhp's picture
Update app.py
d3389d5 verified
import asyncio
import json
import os
import base64
import sys
from pathlib import Path
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, UploadFile, Form
from fastapi.responses import HTMLResponse, JSONResponse, PlainTextResponse
from playwright.async_api import async_playwright, Page
app = FastAPI()
PORT = int(os.environ.get("PORT", 7860))
BASE_DIR = Path(os.getcwd())
SCRIPTS_DIR = BASE_DIR / "scripts"
RESULTS_DIR = BASE_DIR / "results"
SCRIPTS_DIR.mkdir(exist_ok=True)
RESULTS_DIR.mkdir(exist_ok=True)
# Global browser state
pw = None
browser = None
context = None
pages = []
active_tab = 0
console_logs = {}
network_logs = {}
# =========================
# HELPER FUNCTIONS
# =========================
def resolve_url(q: str) -> str:
q = q.strip()
if not q:
return "https://example.com"
if q.startswith(("http://", "https://")):
return q
if "." in q and " " not in q and not q.startswith("localhost"):
return "https://" + q
return f"https://www.google.com/search?q={q.replace(' ', '+')}"
async def setup_page(page: Page, page_id: int):
"""Attach console and network listeners to a page"""
pid = id(page)
console_logs[pid] = []
network_logs[pid] = []
def handle_console(msg):
console_logs[pid].append({
"type": msg.type,
"text": msg.text,
"args": [str(a) for a in msg.args[:5]]
})
def handle_request(req):
network_logs[pid].append({
"type": "request",
"url": req.url,
"method": req.method,
"resourceType": req.resource_type
})
def handle_response(res):
network_logs[pid].append({
"type": "response",
"url": res.url,
"status": res.status,
"ok": res.ok
})
page.on("console", handle_console)
page.on("request", handle_request)
page.on("response", handle_response)
await page.route("**/*", lambda route: route.continue_())
async def send_tabs(ws: WebSocket):
"""Send current tab list to client"""
tabs = []
for p in pages:
try:
if not p.is_closed():
tabs.append({"url": p.url, "title": await p.title()})
else:
tabs.append({"url": "about:blank", "title": "Closed"})
except:
tabs.append({"url": "about:blank", "title": "Error"})
await ws.send_json({"type": "tabs", "tabs": tabs, "active": active_tab})
def list_scripts():
"""List saved JS files"""
return [f.name for f in SCRIPTS_DIR.glob("*.js") if f.is_file()]
# =========================
# BROWSER LIFECYCLE
# =========================
async def start_browser():
global pw, browser, context, pages
print("πŸš€ Starting Playwright browser...")
pw = await async_playwright().start()
browser = await pw.chromium.launch(
headless=True,
args=["--no-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]
)
context = await browser.new_context(
viewport={"width": 1280, "height": 800},
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
)
page = await context.new_page()
await setup_page(page, 0)
await page.goto("https://example.com")
pages.append(page)
print(f"βœ… Browser ready with 1 page")
@app.on_event("startup")
async def startup():
await start_browser()
@app.on_event("shutdown")
async def shutdown():
print("πŸ›‘ Shutting down browser...")
if browser:
await browser.close()
if pw:
await pw.stop()
# =========================
# FRONTEND HTML
# =========================
HTML = """
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>πŸ•·οΈ HF Scraping Browser</title>
<style>
*{box-sizing:border-box;margin:0;padding:0}
body{font-family:system-ui,sans-serif;background:#0f0f13;color:#e0e0e0;overflow:hidden;height:100vh;display:flex;flex-direction:column}
#toolbar{display:flex;gap:8px;padding:8px;background:#1a1a24;border-bottom:1px solid #333;align-items:center;flex-wrap:wrap}
#toolbar input{flex:1;min-width:200px;padding:8px 12px;background:#252535;border:1px solid #444;border-radius:4px;color:white;font-size:14px}
#toolbar button{padding:8px 14px;background:#3a3a5a;border:none;border-radius:4px;color:white;cursor:pointer;font-size:13px}
#toolbar button:hover{background:#4a4a6a}
#toolbar button.primary{background:#5d5dff}
#toolbar button.primary:hover{background:#7070ff}
#main{display:flex;flex:1;overflow:hidden}
#browser{flex:1;display:flex;flex-direction:column}
#canvas{flex:1;background:#000;cursor:crosshair}
#sidebar{width:380px;background:#15151e;border-left:1px solid #333;display:flex;flex-direction:column;overflow:hidden}
.sidebar-tab{padding:10px 14px;background:#1e1e2a;border-bottom:1px solid #333;cursor:pointer;font-size:13px}
.sidebar-tab.active{background:#2a2a3a;border-bottom-color:#5d5dff;color:#5d5dff}
.sidebar-content{flex:1;overflow-y:auto;padding:12px;display:none}
.sidebar-content.active{display:block}
.log-entry{padding:6px 8px;margin:4px 0;background:#1e1e2a;border-radius:3px;font-size:12px;font-family:monospace;white-space:pre-wrap;word-break:break-all}
.log-entry.console{border-left:3px solid #5d5dff}
.log-entry.network{border-left:3px solid #4caf50}
#js-editor{width:100%;height:180px;background:#1e1e2a;border:1px solid #444;border-radius:4px;color:#e0e0e0;padding:10px;font-family:monospace;font-size:13px;resize:vertical}
#js-output{background:#0d0d12;padding:10px;border-radius:4px;margin-top:8px;font-family:monospace;font-size:12px;max-height:180px;overflow-y:auto;white-space:pre-wrap}
#file-list{max-height:120px;overflow-y:auto}
.file-item{padding:6px 8px;margin:3px 0;background:#1e1e2a;border-radius:3px;font-size:12px;display:flex;justify-content:space-between}
.file-item button{padding:2px 6px;font-size:11px}
#tabs-bar{display:flex;gap:2px;padding:4px 8px;background:#1a1a24;border-top:1px solid #333;overflow-x:auto}
.tab{padding:6px 12px;background:#252535;border-radius:4px 4px 0 0;cursor:pointer;font-size:12px;white-space:nowrap;display:flex;align-items:center;gap:6px}
.tab.active{background:#3a3a5a;color:#5d5dff}
.tab-close{cursor:pointer;opacity:0.7}
.tab-close:hover{opacity:1;color:#f44}
.status{font-size:11px;color:#888;padding:4px 8px}
.hidden{display:none!important}
::-webkit-scrollbar{width:6px}
::-webkit-scrollbar-track{background:#1a1a24}
::-webkit-scrollbar-thumb{background:#444;border-radius:3px}
</style>
</head>
<body>
<div id="toolbar">
<button onclick="nav('back')">←</button>
<button onclick="nav('forward')">β†’</button>
<button onclick="nav('reload')">⟳</button>
<input id="url" placeholder="Enter URL or search..." onkeydown="if(event.key==='Enter')goto()">
<button class="primary" onclick="goto()">Go</button>
<button onclick="toggleSidebar()">☰ Panels</button>
<button onclick="executeJS()">β–Ά Run JS</button>
<button onclick="downloadPage()">πŸ’Ύ Save HTML</button>
<div class="status" id="status">Connecting...</div>
</div>
<div id="main">
<div id="browser">
<canvas id="canvas"></canvas>
<div id="tabs-bar"></div>
</div>
<div id="sidebar">
<div style="display:flex">
<div class="sidebar-tab active" onclick="showPanel('console')">Console</div>
<div class="sidebar-tab" onclick="showPanel('network')">Network</div>
<div class="sidebar-tab" onclick="showPanel('scripts')">Scripts</div>
</div>
<div id="panel-console" class="sidebar-content active"></div>
<div id="panel-network" class="sidebar-content"><div id="network-list"></div></div>
<div id="panel-scripts" class="sidebar-content">
<textarea id="js-editor" placeholder="// Write JS here&#10;return document.title;">return document.title;</textarea>
<div style="display:flex;gap:8px;margin:8px 0">
<button onclick="executeJS()" style="flex:1" class="primary">Execute</button>
<button onclick="loadSample('scraper')">Sample</button>
</div>
<div id="js-output">Results here...</div>
<hr style="border-color:#333;margin:12px 0">
<div style="display:flex;gap:4px;margin-bottom:8px">
<input type="file" id="js-file" accept=".js" style="display:none" onchange="uploadJS(this)">
<button onclick="document.getElementById('js-file').click()">πŸ“ Upload</button>
<button onclick="saveJS()">πŸ’Ύ Save</button>
</div>
<div id="file-list"></div>
</div>
</div>
</div>
<script>
const protocol = location.protocol === 'https:' ? 'wss:' : 'ws:';
const ws = new WebSocket(`${protocol}//${location.host}/ws`);
const canvas = document.getElementById('canvas');
const ctx = canvas.getContext('2d');
function resize(){
canvas.width = Math.max(300, window.innerWidth - 380);
canvas.height = window.innerHeight - 80;
}
window.onresize = resize;
resize();
ws.onopen = () => {
console.log('βœ… WS connected');
setStatus('Connected');
send({t:'init'});
};
ws.onclose = () => { setStatus('Disconnected'); setTimeout(()=>location.reload(),2000); };
ws.onerror = (e) => { console.error('WS error',e); setStatus('Connection error'); };
ws.onmessage = (e) => {
if(e.data instanceof Blob){
const img = new Image();
img.onload = () => ctx.drawImage(img,0,0,canvas.width,canvas.height);
img.src = URL.createObjectURL(e.data);
} else {
try{ handleWSMessage(JSON.parse(e.data)); }
catch(err){ console.error('Parse error',err); }
}
};
function send(data){
if(ws.readyState===1) ws.send(JSON.stringify(data));
else setTimeout(()=>send(data),100);
}
function handleWSMessage(msg){
if(msg.type==='console') appendLog('panel-console',msg.data,'console');
else if(msg.type==='network') appendLog('panel-network',msg.data,'network');
else if(msg.type==='js-result'){
const out=document.getElementById('js-output');
out.textContent = typeof msg.data==='object'?JSON.stringify(msg.data,null,2):String(msg.data);
setStatus('βœ… JS done');
}
else if(msg.type==='html') downloadBlob(msg.data,'page.html','text/html');
else if(msg.type==='file-list') renderFileList(msg.files);
else if(msg.type==='tabs') renderTabs(msg.tabs,msg.active);
else if(msg.type==='status') setStatus(msg.text);
else if(msg.type==='error') setStatus('❌ '+msg.message);
}
function appendLog(panelId,entry,cls){
const panel=document.getElementById(panelId);
const div=document.createElement('div');
div.className=`log-entry ${cls}`;
div.textContent=`[${entry.type?.toUpperCase()||'LOG'}] ${entry.text||entry.url||JSON.stringify(entry)}`;
panel.insertBefore(div,panel.firstChild);
if(panel.children.length>100) panel.removeChild(panel.lastChild);
}
function renderFileList(files){
document.getElementById('file-list').innerHTML =
(files||[]).map(f=>`<div class="file-item"><span>πŸ“„ ${f}</span><button onclick="loadScript('${f}')">Load</button></div>`).join('')
|| '<div style="color:#666;font-size:12px">No scripts</div>';
}
function renderTabs(tabs,active){
document.getElementById('tabs-bar').innerHTML =
(tabs||[]).map((t,i)=>`<div class="tab ${i===active?'active':''}" onclick="switchTab(${i})">
${t.title?.slice(0,15)||'Tab'}${(t.title?.length||0)>15?'...':''}
<span class="tab-close" onclick="event.stopPropagation();closeTab(${i})">Γ—</span></div>`).join('');
}
function showPanel(name){
document.querySelectorAll('.sidebar-tab').forEach(t=>t.classList.remove('active'));
document.querySelectorAll('.sidebar-content').forEach(c=>c.classList.remove('active'));
event.target.classList.add('active');
document.getElementById(`panel-${name}`).classList.add('active');
if(name==='scripts') send({t:'list-files'});
}
function setStatus(t){ document.getElementById('status').textContent=t; console.log('[Status]',t); }
function goto(){
const q=document.getElementById('url').value.trim();
if(!q){ setStatus('⚠️ Enter URL'); return; }
setStatus(`πŸ” Loading: ${q.slice(0,40)}...`);
send({t:'goto',q});
}
function nav(a){ setStatus(`πŸ”„ ${a}...`); send({t:a}); }
function toggleSidebar(){
const sb=document.getElementById('sidebar');
sb.classList.toggle('hidden');
resize();
}
canvas.onclick=(e)=>{
const r=canvas.getBoundingClientRect();
send({t:'click',x:e.clientX-r.left,y:e.clientY-r.top});
};
document.onkeydown=(e)=>{
if(['INPUT','TEXTAREA'].includes(e.target.tagName)) return;
send({t:'key',k:e.key});
};
function executeJS(){
const code=document.getElementById('js-editor').value;
send({t:'execute-js',code});
setStatus('⏳ Running JS...');
}
function loadSample(type){
const s={scraper:`// Extract links\nreturn Array.from(document.querySelectorAll('a')).map(a=>({text:a.innerText?.trim().slice(0,30),href:a.href})).filter(l=>l.href).slice(0,20);`};
document.getElementById('js-editor').value=s[type]||'';
}
function uploadJS(input){
const f=input.files[0]; if(!f) return;
const r=new FileReader();
r.onload=(e)=>{ document.getElementById('js-editor').value=e.target.result; setStatus('πŸ“„ Loaded'); };
r.readAsText(f);
}
function saveJS(){
const name=prompt('Script name:','myscript'); if(!name) return;
send({t:'save-file',name:name+'.js',content:document.getElementById('js-editor').value});
setStatus('πŸ’Ύ Saved');
}
function loadScript(name){ send({t:'load-file',name}); setStatus(`πŸ“₯ Loading ${name}...`); }
function downloadPage(){ send({t:'get-html'}); setStatus('⏳ Preparing HTML...'); }
function downloadBlob(b64,filename,mime){
const bytes=Uint8Array.from(atob(b64),c=>c.charCodeAt(0));
const a=document.createElement('a');
a.href=URL.createObjectURL(new Blob([bytes],{type:mime}));
a.download=filename; a.click();
}
function switchTab(i){ send({t:'switch-tab',index:i}); }
function closeTab(i){ send({t:'close-tab',index:i}); }
window.onload=()=>document.getElementById('url').focus();
</script>
</body>
</html>
"""
# =========================
# WEBSOCKET HANDLER
# =========================
@app.websocket("/ws")
async def websocket_endpoint(ws: WebSocket):
global active_tab
await ws.accept()
print(f"βœ… WS accepted from {ws.client}")
await send_tabs(ws)
async def stream():
while True:
try:
if not pages or active_tab >= len(pages):
await asyncio.sleep(0.1)
continue
page = pages[active_tab]
if page.is_closed():
active_tab = max(0, active_tab - 1)
await send_tabs(ws)
continue
img = await asyncio.wait_for(
page.screenshot(type="jpeg", quality=65, scale="css"),
timeout=5.0
)
await ws.send_bytes(img)
await asyncio.sleep(0.033)
except asyncio.TimeoutError:
await asyncio.sleep(0.1)
except Exception as e:
print(f"❌ Stream error: {e}")
await asyncio.sleep(0.2)
stream_task = asyncio.create_task(stream())
try:
while True:
msg = json.loads(await ws.receive_text())
t = msg.get("t")
if not pages:
await ws.send_json({"type":"error","message":"No pages"})
continue
page = pages[active_tab] if active_tab < len(pages) else pages[0]
print(f"πŸ“¨ {t} | tab={active_tab}")
if t == "init":
await send_tabs(ws)
elif t == "goto":
q = msg.get("q","").strip()
if not q:
await ws.send_json({"type":"status","text":"⚠️ Empty URL"})
continue
url = resolve_url(q)
print(f"🌐 Going to: {url}")
try:
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
title = await page.title()
await ws.send_json({"type":"status","text":f"βœ… {title[:40]}"})
await send_tabs(ws)
except Exception as e:
await ws.send_json({"type":"error","message":f"Nav failed: {str(e)[:80]}"})
elif t == "back":
try: await page.go_back(timeout=15000)
except Exception as e: await ws.send_json({"type":"error","message":f"Back: {e}"})
elif t == "forward":
try: await page.go_forward(timeout=15000)
except Exception as e: await ws.send_json({"type":"error","message":f"Forward: {e}"})
elif t == "reload":
try: await page.reload(wait_until="domcontentloaded", timeout=30000)
except Exception as e: await ws.send_json({"type":"error","message":f"Reload: {e}"})
elif t == "click":
try: await page.mouse.click(msg["x"], msg["y"], delay=10)
except: pass
elif t == "key":
try: await page.keyboard.press(msg["k"])
except: pass
elif t == "new-tab":
try:
np = await context.new_page()
await setup_page(np, len(pages))
pages.append(np)
active_tab = len(pages) - 1
await send_tabs(ws)
except Exception as e:
await ws.send_json({"type":"error","message":f"Tab: {e}"})
elif t == "switch-tab":
i = msg.get("index",0)
if 0 <= i < len(pages):
active_tab = i
await pages[active_tab].bring_to_front()
await send_tabs(ws)
elif t == "close-tab":
i = msg.get("index",0)
if len(pages) > 1 and 0 <= i < len(pages):
await pages[i].close()
del pages[i]
if active_tab >= len(pages): active_tab = len(pages)-1
await send_tabs(ws)
elif t == "execute-js":
code = msg.get("code","")
try:
result = await page.evaluate(f"(async()=>{{try{{{code}}}catch(e){{return{{__err__:e.message}}}}}})()")
if isinstance(result,dict) and result.get("__err__"):
await ws.send_json({"type":"js-result","data":{"error":result["__err__"]}})
else:
await ws.send_json({"type":"js-result","data":result})
except Exception as e:
await ws.send_json({"type":"js-result","data":{"error":str(e)}})
elif t == "save-file":
name = "".join(c for c in msg.get("name","script.js") if c.isalnum() or c in "._-")
(SCRIPTS_DIR / name).write_text(msg.get("content",""))
await ws.send_json({"type":"file-list","files":list_scripts()})
elif t == "load-file":
path = SCRIPTS_DIR / msg.get("name","")
if path.exists() and path.suffix==".js":
await ws.send_json({"type":"js-result","data":path.read_text()})
else:
await ws.send_json({"type":"error","message":"Not found"})
elif t == "list-files":
await ws.send_json({"type":"file-list","files":list_scripts()})
elif t == "get-html":
try:
html = await page.content()
b64 = base64.b64encode(html.encode('utf-8',errors='ignore')).decode()
await ws.send_json({"type":"html","data":b64})
except Exception as e:
await ws.send_json({"type":"error","message":f"HTML: {e}"})
elif t == "clear-logs":
pid = id(page)
console_logs[pid] = []
network_logs[pid] = []
# Forward logs
pid = id(page)
if console_logs.get(pid):
for log in console_logs[pid][-3:]:
await ws.send_json({"type":"console","data":log})
console_logs[pid] = []
if network_logs.get(pid):
for log in network_logs[pid][-5:]:
await ws.send_json({"type":"network","data":log})
network_logs[pid] = []
except WebSocketDisconnect:
print("πŸ”Œ Client disconnected")
except Exception as e:
print(f"πŸ’₯ WS error: {e}")
import traceback; traceback.print_exc()
finally:
stream_task.cancel()
try: await stream_task
except: pass
# =========================
# HTTP ENDPOINTS
# =========================
@app.get("/", response_class=HTMLResponse)
async def home():
return HTML
@app.get("/health")
async def health():
return {
"status": "ok",
"tabs": len(pages),
"active": active_tab,
"alive": sum(1 for p in pages if not p.is_closed())
}
@app.get("/scripts/{filename}")
async def get_script(filename: str):
path = SCRIPTS_DIR / filename
if path.exists() and path.suffix == ".js":
return PlainTextResponse(path.read_text(), media_type="application/javascript")
return JSONResponse({"error":"Not found"}, status_code=404)
@app.post("/upload-script")
async def upload_script(file: UploadFile = None, name: str = Form(None)):
if not file:
return JSONResponse({"error":"No file"}, status_code=400)
filename = (name or file.filename).replace(".js","") + ".js"
safe = "".join(c for c in filename if c.isalnum() or c in "._-")
content = await file.read()
(SCRIPTS_DIR / safe).write_bytes(content)
return {"status":"saved","file":safe}
# =========================
# RUN
# =========================
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="info")