Yuvan666 commited on
Commit
be3cb04
·
0 Parent(s):

feat: VNC Mirror with live browser streaming - Xvfb, x11vnc, noVNC stack

Browse files
Files changed (6) hide show
  1. Dockerfile +65 -0
  2. README.md +40 -0
  3. agent.py +163 -0
  4. app.py +101 -0
  5. requirements.txt +12 -0
  6. supervisord.conf +48 -0
Dockerfile ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # ALTYZEN Agent Mirror - VNC Live Streaming
3
+ # Separate Space for Visual Debugging
4
+ # =============================================================================
5
+
6
+ FROM python:3.11-slim
7
+
8
+ ENV DEBIAN_FRONTEND=noninteractive
9
+ ENV DISPLAY=:99
10
+
11
+ # Install all dependencies
12
+ RUN apt-get update && apt-get install -y --no-install-recommends \
13
+ xvfb \
14
+ fluxbox \
15
+ x11vnc \
16
+ websockify \
17
+ supervisor \
18
+ net-tools \
19
+ curl \
20
+ wget \
21
+ git \
22
+ libnss3 \
23
+ libnspr4 \
24
+ libatk1.0-0 \
25
+ libatk-bridge2.0-0 \
26
+ libcups2 \
27
+ libdrm2 \
28
+ libxkbcommon0 \
29
+ libxcomposite1 \
30
+ libxdamage1 \
31
+ libxfixes3 \
32
+ libxrandr2 \
33
+ libgbm1 \
34
+ libasound2 \
35
+ libpango-1.0-0 \
36
+ libcairo2 \
37
+ libatspi2.0-0 \
38
+ libgtk-3-0 \
39
+ fonts-liberation \
40
+ fonts-noto-color-emoji \
41
+ && rm -rf /var/lib/apt/lists/*
42
+
43
+ # Install noVNC from GitHub
44
+ RUN git clone --depth 1 https://github.com/novnc/noVNC.git /opt/novnc && \
45
+ git clone --depth 1 https://github.com/novnc/websockify.git /opt/novnc/utils/websockify && \
46
+ ln -sf /opt/novnc/vnc.html /opt/novnc/index.html
47
+
48
+ WORKDIR /app
49
+
50
+ COPY requirements.txt .
51
+ RUN pip install --no-cache-dir --upgrade pip && \
52
+ pip install --no-cache-dir -r requirements.txt
53
+
54
+ RUN playwright install chromium --with-deps
55
+
56
+ COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
57
+ COPY app.py .
58
+ COPY agent.py .
59
+
60
+ RUN mkdir -p /var/log/supervisor /var/run /root/.fluxbox
61
+ RUN echo "session.screen0.toolbar.visible: false" > /root/.fluxbox/init
62
+
63
+ EXPOSE 7860
64
+
65
+ CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ALTYZEN Agent Mirror - VNC
3
+ emoji: 📺
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ app_port: 7860
10
+ ---
11
+
12
+ # ALTYZEN Agent Mirror - VNC Live Streaming
13
+
14
+ **Watch the AI agent browse the web in real-time!**
15
+
16
+ This Space runs a VISIBLE browser on a virtual display and streams it via noVNC.
17
+
18
+ ## 🎥 How to Watch
19
+
20
+ 1. Open this Space URL
21
+ 2. Click "Connect" in the noVNC interface
22
+ 3. Send a validation request and watch the browser work!
23
+
24
+ ## 📡 API
25
+
26
+ ```bash
27
+ curl -X POST https://YOUR-SPACE.hf.space/run-task \
28
+ -H "Content-Type: application/json" \
29
+ -d '{"task_id": "test", "task_type": "validate_order", "data": {"email": "test@example.com", "phone": "+1234567890", "zip": "10001", "city": "New York", "state": "NY"}}'
30
+ ```
31
+
32
+ ## 🔧 Environment Variables
33
+
34
+ - `OPENROUTER_API_KEY` - Required for LLM
35
+
36
+ ## 📺 Embed in Dashboard
37
+
38
+ ```html
39
+ <iframe src="https://YOUR-SPACE.hf.space/vnc.html?autoconnect=true" width="100%" height="600"></iframe>
40
+ ```
agent.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ VNC Mirror Agent - VISIBLE Browser Mode
3
+ =======================================
4
+ headless=False so you can SEE the browser in the VNC stream!
5
+ Includes fallback model support.
6
+ """
7
+
8
+ import os
9
+ import logging
10
+ from typing import Dict, Any
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+
15
+ if os.getenv("OPENROUTER_API_KEY") and not os.getenv("OPENAI_API_KEY"):
16
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
17
+
18
+ from langchain_openai import ChatOpenAI
19
+ from browser_use import Agent, Controller
20
+ from browser_use.browser.browser import Browser, BrowserConfig
21
+
22
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
23
+ logger = logging.getLogger(__name__)
24
+
25
+ controller = Controller()
26
+
27
+
28
+ async def validate_order(order_data: Dict[str, Any]) -> Dict[str, Any]:
29
+ """Validates order with VISIBLE browser - watch it in VNC!"""
30
+ logger.info(f"🚀 Starting validation - WATCH THE VNC STREAM!")
31
+
32
+ logs = []
33
+ def log(msg):
34
+ logger.info(msg)
35
+ logs.append(msg)
36
+
37
+ email = order_data.get('email', '')
38
+ phone = order_data.get('phone', '')
39
+ zip_code = order_data.get('zip', '')
40
+ city = order_data.get('city', '')
41
+ state = order_data.get('state', '')
42
+ task_id = order_data.get('task_id', 'unknown')
43
+
44
+ log(f"📧 Email: {email}")
45
+ log(f"📞 Phone: {phone}")
46
+ log(f"📍 Geo: {zip_code}, {city}, {state}")
47
+ log(f"📺 Display: {os.environ.get('DISPLAY', 'NOT SET')} - VISIBLE MODE!")
48
+
49
+ task = f"""
50
+ You are a Validation Expert. Perform these 3 steps:
51
+
52
+ STEP 1: EMAIL VALIDATION (Browser)
53
+ - Go to 'https://email-checker.net/'
54
+ - Input '{email}' and check result.
55
+ - Extract: 'Valid', 'Invalid', or 'Risky'.
56
+
57
+ STEP 2: PHONE VALIDATION (Browser)
58
+ - Use a phone validator tool.
59
+ - Input '{phone}' and check status.
60
+
61
+ STEP 3: GEO VALIDATION (Internal Knowledge ONLY)
62
+ - Does Zip '{zip_code}' belong to City '{city}' in State '{state}'?
63
+ - Return 'Match' or 'Mismatch'.
64
+
65
+ OUTPUT JSON:
66
+ {{
67
+ "email_status": "Valid/Invalid/Risky",
68
+ "phone_status": "Valid/Invalid",
69
+ "geo_match": true/false,
70
+ "summary": "explanation"
71
+ }}
72
+ """
73
+
74
+ api_key = os.getenv("OPENROUTER_API_KEY")
75
+ base_url = "https://openrouter.ai/api/v1"
76
+
77
+ if not api_key:
78
+ return {"task_id": task_id, "decision": "UNKNOWN", "error": "No API key", "logs": logs}
79
+
80
+ llm_primary = ChatOpenAI(
81
+ model="nvidia/nemotron-nano-12b-v2-vl:free",
82
+ api_key=api_key,
83
+ base_url=base_url,
84
+ temperature=0.1,
85
+ default_headers={"HTTP-Referer": "https://altyzen.com", "X-Title": "Altyzen VNC Mirror"}
86
+ )
87
+
88
+ llm_fallback = ChatOpenAI(
89
+ model="google/gemini-2.0-flash-exp:free",
90
+ api_key=api_key,
91
+ base_url=base_url,
92
+ temperature=0.1,
93
+ default_headers={"HTTP-Referer": "https://altyzen.com", "X-Title": "Altyzen VNC Mirror"}
94
+ )
95
+
96
+ # VISIBLE BROWSER for VNC streaming!
97
+ log("🖥️ Launching VISIBLE browser on display :99")
98
+ browser = Browser(config=BrowserConfig(headless=False, disable_security=True))
99
+ result = None
100
+
101
+ try:
102
+ log("🤖 Attempt 1: Using Nvidia Nemotron...")
103
+ agent = Agent(task=task, llm=llm_primary, browser=browser, controller=controller, use_vision=True, validate_output=False)
104
+ history = await agent.run()
105
+ result = history.final_result()
106
+ log("✅ Nvidia Nemotron completed!")
107
+ except Exception as e:
108
+ log(f"⚠️ Primary failed: {str(e)[:100]}")
109
+ log("🔄 Switching to Gemini fallback...")
110
+
111
+ try:
112
+ agent = Agent(task=task, llm=llm_fallback, browser=browser, controller=controller, use_vision=True, validate_output=False)
113
+ history = await agent.run()
114
+ result = history.final_result()
115
+ log("✅ Gemini completed!")
116
+ except Exception as fallback_err:
117
+ log(f"❌ Fallback also failed: {str(fallback_err)[:100]}")
118
+ result = None
119
+
120
+ try:
121
+ await browser.close()
122
+ log("🔌 Browser closed")
123
+ except:
124
+ pass
125
+
126
+ parsed = _parse_result(result, order_data)
127
+ parsed["logs"] = logs
128
+ parsed["task_id"] = task_id
129
+ return parsed
130
+
131
+
132
+ def _parse_result(result, order_data):
133
+ import json
134
+ if result is None:
135
+ return {"decision": "UNKNOWN", "email_valid": False, "phone_valid": False, "geo_valid": False, "reasoning": "All models failed"}
136
+
137
+ parsed = {}
138
+ if isinstance(result, str):
139
+ try:
140
+ if "{" in result:
141
+ json_start = result.find("{")
142
+ json_end = result.rfind("}") + 1
143
+ parsed = json.loads(result[json_start:json_end])
144
+ except:
145
+ parsed = {"raw": result}
146
+ elif isinstance(result, dict):
147
+ parsed = result
148
+
149
+ email_valid = "valid" in str(parsed.get("email_status", "")).lower() and "invalid" not in str(parsed.get("email_status", "")).lower()
150
+ phone_valid = "valid" in str(parsed.get("phone_status", "")).lower() and "invalid" not in str(parsed.get("phone_status", "")).lower()
151
+ geo_valid = parsed.get("geo_match", False) if isinstance(parsed.get("geo_match"), bool) else str(parsed.get("geo_match", "")).lower() == "true"
152
+
153
+ decision = "APPROVED" if email_valid and phone_valid and geo_valid else "BLOCKED"
154
+
155
+ return {
156
+ "order_id": order_data.get("order_id", "UNKNOWN"),
157
+ "decision": decision,
158
+ "email_valid": email_valid,
159
+ "phone_valid": phone_valid,
160
+ "geo_valid": geo_valid,
161
+ "reasoning": parsed.get("summary", "Validation completed"),
162
+ "raw_result": parsed
163
+ }
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ VNC Mirror - Live Browser Streaming
3
+ ====================================
4
+ This runs the browser in VISIBLE mode on virtual display :99
5
+ The browser is streamed via noVNC on port 7860
6
+ """
7
+
8
+ import os
9
+ import logging
10
+ from datetime import datetime
11
+ from typing import Dict, Any, Optional
12
+
13
+ from fastapi import FastAPI, HTTPException
14
+ from fastapi.middleware.cors import CORSMiddleware
15
+ from pydantic import BaseModel
16
+
17
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
18
+ logger = logging.getLogger(__name__)
19
+
20
+ from agent import validate_order
21
+
22
+ app = FastAPI(title="VNC Mirror - Agent Viewer", version="1.0.0")
23
+
24
+ app.add_middleware(
25
+ CORSMiddleware,
26
+ allow_origins=["*"],
27
+ allow_credentials=True,
28
+ allow_methods=["*"],
29
+ allow_headers=["*"],
30
+ )
31
+
32
+
33
+ class TaskRequest(BaseModel):
34
+ task_id: str
35
+ task_type: str
36
+ data: Dict[str, Any]
37
+
38
+
39
+ class TaskResponse(BaseModel):
40
+ task_id: str
41
+ status: str
42
+ result: Optional[Dict[str, Any]] = None
43
+ error: Optional[str] = None
44
+ logs: Optional[list] = None
45
+ execution_time_ms: int = 0
46
+
47
+
48
+ @app.get("/health")
49
+ async def health():
50
+ return {
51
+ "status": "healthy",
52
+ "display": os.environ.get("DISPLAY", "NOT SET"),
53
+ "vnc": "active",
54
+ "openrouter_key": "SET" if os.getenv("OPENROUTER_API_KEY") else "NOT SET"
55
+ }
56
+
57
+
58
+ @app.get("/api/status")
59
+ async def api_status():
60
+ return {"ready": True, "vnc_port": 7860, "mode": "visual"}
61
+
62
+
63
+ @app.post("/run-task", response_model=TaskResponse)
64
+ async def run_task(request: TaskRequest):
65
+ start_time = datetime.now()
66
+ logger.info(f"📥 Task: {request.task_id} ({request.task_type}) - Watch the browser in VNC!")
67
+ logger.info(f"📦 Data: {request.data}")
68
+
69
+ try:
70
+ if request.task_type in ["validate_order", "validate_email"]:
71
+ order_data = {**request.data, "task_id": request.task_id}
72
+ result = await validate_order(order_data)
73
+ execution_time = int((datetime.now() - start_time).total_seconds() * 1000)
74
+ logger.info(f"✅ Task {request.task_id} completed in {execution_time}ms")
75
+
76
+ return TaskResponse(
77
+ task_id=request.task_id,
78
+ status="success",
79
+ result=result,
80
+ logs=result.get("logs", []),
81
+ execution_time_ms=execution_time
82
+ )
83
+ else:
84
+ raise HTTPException(status_code=400, detail=f"Unknown task_type: {request.task_type}")
85
+
86
+ except Exception as e:
87
+ execution_time = int((datetime.now() - start_time).total_seconds() * 1000)
88
+ logger.error(f"❌ Task {request.task_id} failed: {e}")
89
+ return TaskResponse(task_id=request.task_id, status="error", error=str(e), execution_time_ms=execution_time)
90
+
91
+
92
+ @app.on_event("startup")
93
+ async def startup():
94
+ logger.info(f"🖥️ VNC Mirror starting on display {os.environ.get('DISPLAY', ':99')}")
95
+ logger.info(f"📍 OpenRouter: {'SET' if os.getenv('OPENROUTER_API_KEY') else 'NOT SET'}")
96
+ logger.info("📺 Watch the browser at /vnc.html")
97
+
98
+
99
+ if __name__ == "__main__":
100
+ import uvicorn
101
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # VNC Mirror - Requirements
2
+ fastapi>=0.100.0
3
+ uvicorn[standard]>=0.23.0
4
+ python-multipart>=0.0.6
5
+ python-dotenv>=1.0.0
6
+ browser-use>=0.1.0
7
+ playwright>=1.40.0
8
+ langchain-openai>=0.1.0
9
+ openai>=1.0.0
10
+ aiofiles>=23.0.0
11
+ httpx>=0.25.0
12
+ pydantic>=2.0.0
supervisord.conf ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ; =============================================================================
2
+ ; Supervisord - VNC Mirror Space
3
+ ; =============================================================================
4
+
5
+ [supervisord]
6
+ nodaemon=true
7
+ logfile=/var/log/supervisor/supervisord.log
8
+ pidfile=/var/run/supervisord.pid
9
+ childlogdir=/var/log/supervisor
10
+ user=root
11
+
12
+ [program:xvfb]
13
+ command=/usr/bin/Xvfb :99 -screen 0 1280x720x24 -ac +extension GLX +render -noreset
14
+ autorestart=true
15
+ priority=100
16
+ stdout_logfile=/var/log/supervisor/xvfb.log
17
+ stderr_logfile=/var/log/supervisor/xvfb_err.log
18
+
19
+ [program:fluxbox]
20
+ command=/bin/bash -c "sleep 2 && DISPLAY=:99 /usr/bin/fluxbox"
21
+ autorestart=true
22
+ priority=200
23
+ stdout_logfile=/var/log/supervisor/fluxbox.log
24
+ stderr_logfile=/var/log/supervisor/fluxbox_err.log
25
+
26
+ [program:x11vnc]
27
+ command=/bin/bash -c "sleep 3 && /usr/bin/x11vnc -display :99 -forever -shared -nopw -rfbport 5900 -xkb"
28
+ autorestart=true
29
+ priority=300
30
+ stdout_logfile=/var/log/supervisor/x11vnc.log
31
+ stderr_logfile=/var/log/supervisor/x11vnc_err.log
32
+
33
+ [program:novnc]
34
+ command=/bin/bash -c "sleep 5 && /opt/novnc/utils/novnc_proxy --vnc localhost:5900 --listen 7860 --web /opt/novnc"
35
+ autorestart=true
36
+ priority=400
37
+ stdout_logfile=/var/log/supervisor/novnc.log
38
+ stderr_logfile=/var/log/supervisor/novnc_err.log
39
+
40
+ [program:agent]
41
+ command=/bin/bash -c "sleep 10 && DISPLAY=:99 python /app/app.py"
42
+ directory=/app
43
+ autorestart=true
44
+ priority=500
45
+ startretries=10
46
+ startsecs=15
47
+ stdout_logfile=/var/log/supervisor/agent.log
48
+ stderr_logfile=/var/log/supervisor/agent_err.log