arudradey commited on
Commit
d2f500c
Β·
verified Β·
1 Parent(s): ceac7fb

Update src/app/page.tsx

Browse files
Files changed (1) hide show
  1. src/app/page.tsx +344 -280
src/app/page.tsx CHANGED
@@ -2,17 +2,18 @@
2
 
3
  import { useState, useRef, useEffect, useCallback } from "react";
4
 
5
- // ─── Types ────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
6
  interface ActionResult {
7
- type:
8
- | "click"
9
- | "type"
10
- | "scroll"
11
- | "navigate"
12
- | "keypress"
13
- | "hover"
14
- | "answer"
15
- | "wait";
16
  description: string;
17
  x?: number;
18
  y?: number;
@@ -22,6 +23,7 @@ interface ActionResult {
22
  scrollX?: number;
23
  scrollY?: number;
24
  answer?: string;
 
25
  }
26
 
27
  interface HistoryEntry {
@@ -45,7 +47,13 @@ interface ClickableElement {
45
  href?: string;
46
  }
47
 
48
- // ─── Action type color/icon mapping ───────────────────────────────────────────
 
 
 
 
 
 
49
  const ACTION_STYLES: Record<
50
  string,
51
  { bg: string; text: string; label: string; icon: string }
@@ -100,207 +108,271 @@ const ACTION_STYLES: Record<
100
  },
101
  };
102
 
103
- // ─── Component ────────────────────────────────────────────────────────────────
104
- export default function AgentBrowser() {
105
- const [url, setUrl] = useState("https://google.com");
106
- const [inputUrl, setInputUrl] = useState("https://google.com");
107
  const [prompt, setPrompt] = useState("");
 
108
  const [screenshot, setScreenshot] = useState<string | null>(null);
109
- const [clickableElements, setClickableElements] = useState<
110
- ClickableElement[]
111
- >([]);
 
 
 
 
 
112
  const [history, setHistory] = useState<HistoryEntry[]>([]);
113
  const [isLoading, setIsLoading] = useState(false);
114
  const [isBrowserReady, setIsBrowserReady] = useState(false);
115
  const [isInitializing, setIsInitializing] = useState(false);
116
- const [currentAction, setCurrentAction] = useState<string>("");
117
  const [showHighlights, setShowHighlights] = useState(true);
118
- const [screenshotDims, setScreenshotDims] = useState({
119
- width: 1280,
120
- height: 720,
121
- });
122
- const [autoMode, setAutoMode] = useState(false);
123
- const [autoSteps, setAutoSteps] = useState(5);
124
- const [autoCurrentStep, setAutoCurrentStep] = useState(0);
125
 
126
  const imgRef = useRef<HTMLImageElement>(null);
127
- const promptRef = useRef<HTMLTextAreaElement>(null);
128
  const historyEndRef = useRef<HTMLDivElement>(null);
129
 
130
- // ─── Scroll history to bottom ───────────────────────────────────────────
131
  useEffect(() => {
132
  historyEndRef.current?.scrollIntoView({ behavior: "smooth" });
133
  }, [history]);
134
 
135
- // ─── Initialize browser ─────────────────────────────────────────────────
136
- const initBrowser = useCallback(async (targetUrl: string) => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  setIsInitializing(true);
138
- setCurrentAction("Launching Chromium browser...");
 
139
  try {
140
  const res = await fetch("/api/browser", {
141
  method: "POST",
142
  headers: { "Content-Type": "application/json" },
143
- body: JSON.stringify({ url: targetUrl, action: "init" }),
144
  });
 
145
  const data = await res.json();
146
- if (!res.ok) throw new Error(data.error || "Failed to init browser");
 
 
 
 
147
  setIsBrowserReady(true);
148
- setCurrentAction("Browser ready. Taking screenshot...");
149
  await captureScreenshot();
150
- } catch (e: unknown) {
151
- setCurrentAction(
152
- `Error: ${e instanceof Error ? e.message : "Unknown error"}`
153
- );
154
  } finally {
155
  setIsInitializing(false);
156
  }
157
- }, []);
 
 
 
 
 
 
 
 
158
 
159
- // ─── Capture screenshot + clickable elements ────────────────────────────
160
- const captureScreenshot = useCallback(async () => {
161
- try {
162
- const res = await fetch("/api/screenshot", { method: "GET" });
163
  const data = await res.json();
164
- if (!res.ok) throw new Error(data.error || "Screenshot failed");
165
- setScreenshot(data.screenshot);
166
- setClickableElements(data.clickableElements || []);
167
- setScreenshotDims({ width: data.width || 1280, height: data.height || 720 });
168
- } catch (e) {
169
- console.error("Screenshot error:", e);
170
- }
171
- }, []);
172
 
173
- // ─── Execute an action on the browser ───────────────────────────────────
 
 
 
 
 
 
 
 
174
  const executeAction = useCallback(async (action: ActionResult) => {
175
  const res = await fetch("/api/action", {
176
  method: "POST",
177
  headers: { "Content-Type": "application/json" },
178
  body: JSON.stringify(action),
179
  });
 
180
  const data = await res.json();
181
- if (!res.ok) throw new Error(data.error || "Action failed");
 
 
 
 
182
  return data;
183
  }, []);
184
 
185
- // ─── Main: send prompt to Gemini ────────────────────────────────────────
186
- const runAgent = useCallback(
187
- async (overridePrompt?: string) => {
188
- const finalPrompt = overridePrompt || prompt;
189
- if (!finalPrompt.trim() || !isBrowserReady || isLoading) return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
- const entryId = crypto.randomUUID();
192
- setIsLoading(true);
193
- setCurrentAction("Capturing screenshot with highlighted elements...");
194
 
195
- const newEntry: HistoryEntry = {
 
 
 
 
 
 
 
 
196
  id: entryId,
197
- prompt: finalPrompt,
198
  action: null,
199
- screenshot: screenshot,
200
  timestamp: new Date(),
201
  status: "pending",
202
- };
203
- setHistory((h) => [...h, newEntry]);
204
-
205
- try {
206
- // 1. Capture fresh screenshot
207
- await captureScreenshot();
208
-
209
- // 2. Ask Gemini
210
- setCurrentAction("Sending to Gemini Vision...");
211
- const geminiRes = await fetch("/api/gemini", {
212
- method: "POST",
213
- headers: { "Content-Type": "application/json" },
214
- body: JSON.stringify({
215
- prompt: finalPrompt,
216
- screenshot,
217
- clickableElements,
218
- }),
219
- });
220
- const geminiData = await geminiRes.json();
221
- if (!geminiRes.ok)
222
- throw new Error(geminiData.error || "Gemini API failed");
223
-
224
- const action: ActionResult = geminiData.action;
225
-
226
- // 3. Execute action
227
- if (action.type !== "answer") {
228
- setCurrentAction(`Executing: ${action.type} β€” ${action.description}`);
229
- await executeAction(action);
230
- await new Promise((r) => setTimeout(r, 800)); // wait for page
231
- await captureScreenshot();
232
- }
233
-
234
- // 4. Update history
235
- setHistory((h) =>
236
- h.map((e) =>
237
- e.id === entryId
238
- ? { ...e, action, screenshot, status: "success" }
239
- : e
240
- )
241
- );
242
- if (!overridePrompt) setPrompt("");
243
- setCurrentAction("Done βœ“");
244
- } catch (err: unknown) {
245
- const msg = err instanceof Error ? err.message : "Unknown error";
246
- setHistory((h) =>
247
- h.map((e) =>
248
- e.id === entryId ? { ...e, status: "error", error: msg } : e
249
- )
250
- );
251
- setCurrentAction(`Error: ${msg}`);
252
- } finally {
253
- setIsLoading(false);
254
- }
255
- },
256
- [
257
- prompt,
258
- isBrowserReady,
259
- isLoading,
260
- screenshot,
261
- clickableElements,
262
- captureScreenshot,
263
- executeAction,
264
- ]
265
- );
266
 
267
- // ─── Navigate to URL ─────────────────────────────────────────────────────
268
- const handleNavigate = async () => {
269
- if (!inputUrl.trim()) return;
270
- let nav = inputUrl;
271
- if (!/^https?:\/\//i.test(nav)) nav = "https://" + nav;
272
- setUrl(nav);
273
- if (!isBrowserReady) {
274
- await initBrowser(nav);
275
- } else {
276
- setIsLoading(true);
277
- setCurrentAction(`Navigating to ${nav}...`);
278
- await fetch("/api/action", {
279
  method: "POST",
280
  headers: { "Content-Type": "application/json" },
281
- body: JSON.stringify({ type: "navigate", url: nav, description: `Navigate to ${nav}` }),
 
 
 
 
282
  });
283
- await new Promise((r) => setTimeout(r, 1200));
284
- await captureScreenshot();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  setIsLoading(false);
286
- setCurrentAction("Navigated βœ“");
287
  }
288
- };
289
 
290
- // ─── Keyboard shortcut: Enter to submit ──────────────────────────────────
291
- const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
 
292
  if (e.key === "Enter" && !e.shiftKey) {
293
  e.preventDefault();
294
  runAgent();
295
  }
296
  };
297
 
298
- // ─── Calculate overlay positions for highlighted clickable elements ──────
299
  const getOverlayStyle = (el: ClickableElement) => {
300
  if (!imgRef.current) return {};
 
301
  const rect = imgRef.current.getBoundingClientRect();
302
  const scaleX = rect.width / screenshotDims.width;
303
  const scaleY = rect.height / screenshotDims.height;
 
304
  return {
305
  left: el.x * scaleX,
306
  top: el.y * scaleY,
@@ -310,89 +382,85 @@ export default function AgentBrowser() {
310
  };
311
 
312
  return (
313
- <div className="min-h-screen bg-[#0f0f13] flex flex-col">
314
- {/* ── Header ── */}
315
- <header className="border-b border-[#2a2a3a] bg-[#0f0f13]/90 backdrop-blur-md sticky top-0 z-50 px-4 py-3">
316
  <div className="max-w-7xl mx-auto flex items-center gap-3">
317
  <div className="flex items-center gap-2 shrink-0">
318
  <span className="text-xl">πŸ€–</span>
319
- <span className="font-semibold text-sm text-white hidden sm:block">
320
- Agentic Browser
321
- </span>
322
- <span className="text-[10px] bg-indigo-500/20 text-indigo-400 px-2 py-0.5 rounded-full font-medium">
323
- Gemini
324
- </span>
 
 
325
  </div>
326
 
327
- {/* URL Bar */}
328
- <div className="flex-1 flex gap-2">
329
  <input
330
- className="flex-1 bg-[#1a1a24] border border-[#2a2a3a] rounded-lg px-3 py-2 text-sm text-white placeholder:text-gray-500 focus:outline-none focus:border-indigo-500 transition-colors"
331
  value={inputUrl}
332
  onChange={(e) => setInputUrl(e.target.value)}
333
- onKeyDown={(e) => e.key === "Enter" && handleNavigate()}
 
 
334
  placeholder="https://example.com"
 
335
  />
 
336
  <button
337
  onClick={handleNavigate}
338
- disabled={isInitializing}
339
- className="bg-indigo-600 hover:bg-indigo-500 disabled:opacity-50 text-white px-4 py-2 rounded-lg text-sm font-medium transition-colors shrink-0"
340
  >
341
- {isInitializing ? "Loading…" : isBrowserReady ? "Go" : "Launch"}
342
  </button>
343
  </div>
344
 
345
- {/* Highlight toggle */}
346
  <button
347
  onClick={() => setShowHighlights((v) => !v)}
348
- title="Toggle element highlights"
349
- className={`shrink-0 p-2 rounded-lg text-sm transition-colors ${
350
  showHighlights
351
  ? "bg-indigo-500/20 text-indigo-400"
352
  : "bg-[#1a1a24] text-gray-500"
353
  }`}
 
354
  >
355
  🎯
356
  </button>
357
  </div>
358
  </header>
359
 
360
- {/* ── Main Layout ── */}
361
  <main className="flex-1 max-w-7xl mx-auto w-full p-4 flex flex-col lg:flex-row gap-4">
362
- {/* ── Left: Browser Viewport ── */}
363
- <div className="flex-1 flex flex-col gap-3 min-w-0">
364
- {/* Status bar */}
365
  {currentAction && (
366
- <div className="flex items-center gap-2 bg-[#1a1a24] border border-[#2a2a3a] rounded-lg px-3 py-2 text-xs text-gray-400">
367
- {isLoading && (
368
- <span className="inline-block w-3 h-3 border-2 border-indigo-500 border-t-transparent rounded-full animate-spin shrink-0" />
369
  )}
370
  <span className="truncate">{currentAction}</span>
371
  </div>
372
  )}
373
 
374
- {/* Viewport */}
375
- <div className="screenshot-container bg-[#1a1a24] aspect-video relative flex items-center justify-center">
376
- {!isBrowserReady && (
377
- <div className="flex flex-col items-center gap-3 text-gray-500">
378
- <span className="text-4xl">🌐</span>
379
  <p className="text-sm">
380
- Enter a URL above and click{" "}
381
- <strong className="text-indigo-400">Launch</strong>
382
  </p>
383
- <p className="text-xs text-gray-600">
384
- Chromium will start headlessly in the background
385
  </p>
386
  </div>
387
  )}
388
 
389
  {isInitializing && (
390
- <div className="absolute inset-0 bg-[#0f0f13]/80 flex items-center justify-center z-10 rounded-lg">
391
  <div className="flex flex-col items-center gap-3">
392
- <div className="w-8 h-8 border-2 border-indigo-500 border-t-transparent rounded-full animate-spin" />
393
  <span className="text-sm text-gray-400">
394
- Starting browser
395
- <span className="loading-dots" />
396
  </span>
397
  </div>
398
  </div>
@@ -405,15 +473,14 @@ export default function AgentBrowser() {
405
  ref={imgRef}
406
  src={`data:image/png;base64,${screenshot}`}
407
  alt="Browser screenshot"
408
- className="w-full h-full object-contain rounded-lg"
409
  />
410
 
411
- {/* Clickable element highlights */}
412
  {showHighlights &&
413
  clickableElements.map((el, i) => (
414
  <div
415
- key={i}
416
- className="highlight-overlay"
417
  style={getOverlayStyle(el)}
418
  title={`${el.tag}: ${el.text || el.href || ""}`}
419
  />
@@ -422,83 +489,71 @@ export default function AgentBrowser() {
422
  )}
423
  </div>
424
 
425
- {/* ── Prompt Input ── */}
426
- <div className="bg-[#1a1a24] border border-[#2a2a3a] rounded-xl p-3 flex flex-col gap-2">
427
  <textarea
428
- ref={promptRef}
429
  value={prompt}
430
  onChange={(e) => setPrompt(e.target.value)}
431
- onKeyDown={handleKeyDown}
432
- placeholder="Tell the AI what to do… (e.g. 'Search for Next.js tutorials', 'Click the login button', 'What does this page say?')"
433
- rows={3}
434
- className="w-full bg-transparent text-sm text-white placeholder:text-gray-600 resize-none focus:outline-none leading-relaxed"
435
- disabled={isLoading || !isBrowserReady}
436
  />
437
- <div className="flex items-center justify-between gap-2 flex-wrap">
438
- <div className="flex items-center gap-3 text-xs text-gray-500">
439
- <span>↡ Enter to send &nbsp;Β·&nbsp; Shift+↡ newline</span>
440
- <label className="flex items-center gap-1.5 cursor-pointer">
441
- <input
442
- type="checkbox"
443
- checked={autoMode}
444
- onChange={(e) => setAutoMode(e.target.checked)}
445
- className="accent-indigo-500"
446
- />
447
- Auto-run
448
- </label>
449
- {autoMode && (
450
- <input
451
- type="number"
452
- value={autoSteps}
453
- onChange={(e) => setAutoSteps(Number(e.target.value))}
454
- min={1}
455
- max={20}
456
- className="w-12 bg-[#0f0f13] border border-[#2a2a3a] rounded px-1 py-0.5 text-xs text-white"
457
- />
458
- )}
459
  </div>
460
- <button
461
- onClick={() => runAgent()}
462
- disabled={isLoading || !isBrowserReady || !prompt.trim()}
463
- className="bg-indigo-600 hover:bg-indigo-500 disabled:opacity-40 disabled:cursor-not-allowed text-white px-5 py-2 rounded-lg text-sm font-medium transition-colors flex items-center gap-2"
464
- >
465
- {isLoading ? (
466
- <>
467
- <span className="w-3.5 h-3.5 border-2 border-white border-t-transparent rounded-full animate-spin" />
468
- Running
469
- </>
470
- ) : (
471
- <>Run Agent</>
472
- )}
473
- </button>
474
  </div>
475
  </div>
476
- </div>
477
 
478
- {/* ── Right: Action History ── */}
479
- <div className="w-full lg:w-80 xl:w-96 flex flex-col gap-3 shrink-0">
 
 
 
 
 
 
 
480
  <div className="flex items-center justify-between">
481
  <h2 className="text-sm font-semibold text-gray-300">
482
  Action History
483
  </h2>
 
484
  {history.length > 0 && (
485
  <button
486
  onClick={() => setHistory([])}
487
- className="text-xs text-gray-600 hover:text-gray-400 transition-colors"
488
  >
489
  Clear
490
  </button>
491
  )}
492
  </div>
493
 
494
- <div className="flex-1 overflow-y-auto flex flex-col gap-2 max-h-[70vh] pr-1">
495
  {history.length === 0 && (
496
- <div className="text-center py-12 text-gray-600 text-sm">
497
- <p className="text-3xl mb-2">πŸ—‚οΈ</p>
498
  <p>No actions yet</p>
499
- <p className="text-xs mt-1 text-gray-700">
500
- Launch a browser and give the AI a task
501
- </p>
502
  </div>
503
  )}
504
 
@@ -506,67 +561,90 @@ export default function AgentBrowser() {
506
  const style = entry.action
507
  ? ACTION_STYLES[entry.action.type] || ACTION_STYLES.answer
508
  : null;
 
509
  return (
510
  <div
511
  key={entry.id}
512
- className="bg-[#1a1a24] border border-[#2a2a3a] rounded-xl p-3 flex flex-col gap-2 text-xs"
513
  >
514
- {/* Prompt */}
515
  <div className="flex items-start gap-2">
516
  <span className="text-base shrink-0">πŸ‘€</span>
517
- <p className="text-gray-300 leading-relaxed line-clamp-3">
518
  {entry.prompt}
519
  </p>
520
  </div>
521
 
522
- {/* Status */}
523
  {entry.status === "pending" && (
524
- <div className="flex items-center gap-1.5 text-gray-500">
525
- <span className="w-3 h-3 border border-gray-500 border-t-transparent rounded-full animate-spin" />
526
- Processing…
527
  </div>
528
  )}
529
 
530
  {entry.status === "error" && (
531
- <div className="text-red-400 bg-red-500/10 rounded-lg px-2 py-1">
532
  ⚠ {entry.error}
533
  </div>
534
  )}
535
 
536
  {entry.status === "success" && entry.action && style && (
537
  <>
538
- <div className="flex items-center gap-2">
539
  <span
540
- className={`action-badge ${style.bg} ${style.text}`}
541
  >
542
- {style.icon} {style.label}
 
543
  </span>
544
- <span className="text-gray-400 truncate">
545
  {entry.action.description}
546
  </span>
547
  </div>
548
 
549
- {/* Action details */}
550
- <div className="bg-[#0f0f13] rounded-lg px-2 py-1.5 font-mono text-[10px] text-gray-500 space-y-0.5">
551
  {entry.action.type === "click" && (
552
  <p>
553
  x={entry.action.x} y={entry.action.y}
554
  </p>
555
  )}
 
556
  {entry.action.type === "type" && (
557
- <p>"{entry.action.text}"</p>
 
 
 
 
 
 
 
 
558
  )}
 
559
  {entry.action.type === "scroll" && (
560
  <p>
561
- Ξ”x={entry.action.scrollX} Ξ”y={entry.action.scrollY}
 
562
  </p>
563
  )}
 
564
  {entry.action.type === "navigate" && (
565
- <p className="truncate">{entry.action.url}</p>
566
  )}
 
567
  {entry.action.type === "keypress" && (
568
- <p>key: {entry.action.key}</p>
569
  )}
 
 
 
 
 
 
 
 
 
 
 
570
  {entry.action.type === "answer" && (
571
  <p className="whitespace-pre-wrap text-indigo-300">
572
  {entry.action.answer}
@@ -576,30 +654,16 @@ export default function AgentBrowser() {
576
  </>
577
  )}
578
 
579
- <p className="text-gray-700 text-[10px]">
580
  {entry.timestamp.toLocaleTimeString()}
581
  </p>
582
  </div>
583
  );
584
  })}
 
585
  <div ref={historyEndRef} />
586
  </div>
587
-
588
- {/* Element count badge */}
589
- {isBrowserReady && (
590
- <div className="bg-[#1a1a24] border border-[#2a2a3a] rounded-lg px-3 py-2 text-xs text-gray-500 flex items-center justify-between">
591
- <span>
592
- 🎯 {clickableElements.length} clickable elements detected
593
- </span>
594
- <button
595
- onClick={captureScreenshot}
596
- className="text-indigo-400 hover:text-indigo-300 transition-colors"
597
- >
598
- Refresh
599
- </button>
600
- </div>
601
- )}
602
- </div>
603
  </main>
604
  </div>
605
  );
 
2
 
3
  import { useState, useRef, useEffect, useCallback } from "react";
4
 
5
+ type ActionType =
6
+ | "click"
7
+ | "type"
8
+ | "scroll"
9
+ | "navigate"
10
+ | "keypress"
11
+ | "hover"
12
+ | "answer"
13
+ | "wait";
14
+
15
  interface ActionResult {
16
+ type: ActionType;
 
 
 
 
 
 
 
 
17
  description: string;
18
  x?: number;
19
  y?: number;
 
23
  scrollX?: number;
24
  scrollY?: number;
25
  answer?: string;
26
+ ms?: number;
27
  }
28
 
29
  interface HistoryEntry {
 
47
  href?: string;
48
  }
49
 
50
+ interface ScreenshotResponse {
51
+ screenshot: string;
52
+ clickableElements: ClickableElement[];
53
+ width: number;
54
+ height: number;
55
+ }
56
+
57
  const ACTION_STYLES: Record<
58
  string,
59
  { bg: string; text: string; label: string; icon: string }
 
108
  },
109
  };
110
 
111
+ export default function AgentBrowserPage() {
112
+ const [url, setUrl] = useState("https://example.com");
113
+ const [inputUrl, setInputUrl] = useState("https://example.com");
 
114
  const [prompt, setPrompt] = useState("");
115
+
116
  const [screenshot, setScreenshot] = useState<string | null>(null);
117
+ const [clickableElements, setClickableElements] = useState<ClickableElement[]>(
118
+ []
119
+ );
120
+ const [screenshotDims, setScreenshotDims] = useState({
121
+ width: 1280,
122
+ height: 720,
123
+ });
124
+
125
  const [history, setHistory] = useState<HistoryEntry[]>([]);
126
  const [isLoading, setIsLoading] = useState(false);
127
  const [isBrowserReady, setIsBrowserReady] = useState(false);
128
  const [isInitializing, setIsInitializing] = useState(false);
129
+ const [currentAction, setCurrentAction] = useState("");
130
  const [showHighlights, setShowHighlights] = useState(true);
 
 
 
 
 
 
 
131
 
132
  const imgRef = useRef<HTMLImageElement>(null);
 
133
  const historyEndRef = useRef<HTMLDivElement>(null);
134
 
 
135
  useEffect(() => {
136
  historyEndRef.current?.scrollIntoView({ behavior: "smooth" });
137
  }, [history]);
138
 
139
+ const captureScreenshot = useCallback(async (): Promise<ScreenshotResponse> => {
140
+ const res = await fetch("/api/screenshot", {
141
+ method: "GET",
142
+ cache: "no-store",
143
+ });
144
+
145
+ const data = await res.json();
146
+
147
+ if (!res.ok) {
148
+ throw new Error(data.error || "Failed to capture screenshot");
149
+ }
150
+
151
+ const result: ScreenshotResponse = {
152
+ screenshot: data.screenshot,
153
+ clickableElements: data.clickableElements || [],
154
+ width: data.width || 1280,
155
+ height: data.height || 720,
156
+ };
157
+
158
+ setScreenshot(result.screenshot);
159
+ setClickableElements(result.clickableElements);
160
+ setScreenshotDims({
161
+ width: result.width,
162
+ height: result.height,
163
+ });
164
+
165
+ return result;
166
+ }, []);
167
+
168
+ const initBrowser = useCallback(async () => {
169
  setIsInitializing(true);
170
+ setCurrentAction("Launching browser on about:blank...");
171
+
172
  try {
173
  const res = await fetch("/api/browser", {
174
  method: "POST",
175
  headers: { "Content-Type": "application/json" },
176
+ body: JSON.stringify({ action: "init" }),
177
  });
178
+
179
  const data = await res.json();
180
+
181
+ if (!res.ok) {
182
+ throw new Error(data.error || "Failed to initialize browser");
183
+ }
184
+
185
  setIsBrowserReady(true);
186
+ setCurrentAction("Browser ready βœ“");
187
  await captureScreenshot();
 
 
 
 
188
  } finally {
189
  setIsInitializing(false);
190
  }
191
+ }, [captureScreenshot]);
192
+
193
+ const navigateBrowser = useCallback(
194
+ async (targetUrl: string) => {
195
+ const res = await fetch("/api/browser", {
196
+ method: "POST",
197
+ headers: { "Content-Type": "application/json" },
198
+ body: JSON.stringify({ action: "navigate", url: targetUrl }),
199
+ });
200
 
 
 
 
 
201
  const data = await res.json();
 
 
 
 
 
 
 
 
202
 
203
+ if (!res.ok) {
204
+ throw new Error(data.error || "Navigation failed");
205
+ }
206
+
207
+ return data;
208
+ },
209
+ []
210
+ );
211
+
212
  const executeAction = useCallback(async (action: ActionResult) => {
213
  const res = await fetch("/api/action", {
214
  method: "POST",
215
  headers: { "Content-Type": "application/json" },
216
  body: JSON.stringify(action),
217
  });
218
+
219
  const data = await res.json();
220
+
221
+ if (!res.ok) {
222
+ throw new Error(data.error || "Action failed");
223
+ }
224
+
225
  return data;
226
  }, []);
227
 
228
+ const handleNavigate = useCallback(async () => {
229
+ if (!inputUrl.trim()) return;
230
+
231
+ let nav = inputUrl.trim();
232
+ if (!/^https?:\/\//i.test(nav)) {
233
+ nav = `https://${nav}`;
234
+ }
235
+
236
+ setUrl(nav);
237
+ setIsLoading(true);
238
+
239
+ try {
240
+ if (!isBrowserReady) {
241
+ await initBrowser();
242
+ }
243
+
244
+ setCurrentAction(`Navigating to ${nav}...`);
245
+ await navigateBrowser(nav);
246
+ await captureScreenshot();
247
+ setCurrentAction("Navigated βœ“");
248
+ } catch (e: unknown) {
249
+ const msg = e instanceof Error ? e.message : "Unknown error";
250
+ setCurrentAction(`Error: ${msg}`);
251
+ } finally {
252
+ setIsLoading(false);
253
+ }
254
+ }, [inputUrl, isBrowserReady, initBrowser, navigateBrowser, captureScreenshot]);
255
 
256
+ const runAgent = useCallback(async () => {
257
+ if (!prompt.trim() || !isBrowserReady || isLoading) return;
 
258
 
259
+ const entryId = crypto.randomUUID();
260
+ const entryPrompt = prompt;
261
+
262
+ setIsLoading(true);
263
+ setCurrentAction("Capturing latest screenshot...");
264
+
265
+ setHistory((prev) => [
266
+ ...prev,
267
+ {
268
  id: entryId,
269
+ prompt: entryPrompt,
270
  action: null,
271
+ screenshot,
272
  timestamp: new Date(),
273
  status: "pending",
274
+ },
275
+ ]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
+ try {
278
+ const fresh = await captureScreenshot();
279
+
280
+ setCurrentAction("Sending screenshot + prompt to Gemini...");
281
+
282
+ const geminiRes = await fetch("/api/gemini", {
 
 
 
 
 
 
283
  method: "POST",
284
  headers: { "Content-Type": "application/json" },
285
+ body: JSON.stringify({
286
+ prompt: entryPrompt,
287
+ screenshot: fresh.screenshot,
288
+ clickableElements: fresh.clickableElements,
289
+ }),
290
  });
291
+
292
+ const geminiData = await geminiRes.json();
293
+
294
+ if (!geminiRes.ok) {
295
+ throw new Error(geminiData.error || "Gemini request failed");
296
+ }
297
+
298
+ const action: ActionResult = geminiData.action;
299
+
300
+ if (action.type === "answer") {
301
+ setHistory((prev) =>
302
+ prev.map((item) =>
303
+ item.id === entryId
304
+ ? {
305
+ ...item,
306
+ action,
307
+ screenshot: fresh.screenshot,
308
+ status: "success",
309
+ }
310
+ : item
311
+ )
312
+ );
313
+ setPrompt("");
314
+ setCurrentAction("Answer generated βœ“");
315
+ return;
316
+ }
317
+
318
+ setCurrentAction(`Executing ${action.type}...`);
319
+ await executeAction(action);
320
+
321
+ await new Promise((resolve) => setTimeout(resolve, 800));
322
+ const after = await captureScreenshot();
323
+
324
+ setHistory((prev) =>
325
+ prev.map((item) =>
326
+ item.id === entryId
327
+ ? {
328
+ ...item,
329
+ action,
330
+ screenshot: after.screenshot,
331
+ status: "success",
332
+ }
333
+ : item
334
+ )
335
+ );
336
+
337
+ setPrompt("");
338
+ setCurrentAction("Done βœ“");
339
+ } catch (e: unknown) {
340
+ const msg = e instanceof Error ? e.message : "Unknown error";
341
+
342
+ setHistory((prev) =>
343
+ prev.map((item) =>
344
+ item.id === entryId
345
+ ? {
346
+ ...item,
347
+ status: "error",
348
+ error: msg,
349
+ }
350
+ : item
351
+ )
352
+ );
353
+
354
+ setCurrentAction(`Error: ${msg}`);
355
+ } finally {
356
  setIsLoading(false);
 
357
  }
358
+ }, [prompt, isBrowserReady, isLoading, screenshot, captureScreenshot, executeAction]);
359
 
360
+ const handlePromptKeyDown = (
361
+ e: React.KeyboardEvent<HTMLTextAreaElement>
362
+ ) => {
363
  if (e.key === "Enter" && !e.shiftKey) {
364
  e.preventDefault();
365
  runAgent();
366
  }
367
  };
368
 
 
369
  const getOverlayStyle = (el: ClickableElement) => {
370
  if (!imgRef.current) return {};
371
+
372
  const rect = imgRef.current.getBoundingClientRect();
373
  const scaleX = rect.width / screenshotDims.width;
374
  const scaleY = rect.height / screenshotDims.height;
375
+
376
  return {
377
  left: el.x * scaleX,
378
  top: el.y * scaleY,
 
382
  };
383
 
384
  return (
385
+ <div className="min-h-screen bg-[#0f0f13] text-[#e2e2f0] flex flex-col">
386
+ <header className="sticky top-0 z-50 border-b border-[#2a2a3a] bg-[#0f0f13]/90 backdrop-blur px-4 py-3">
 
387
  <div className="max-w-7xl mx-auto flex items-center gap-3">
388
  <div className="flex items-center gap-2 shrink-0">
389
  <span className="text-xl">πŸ€–</span>
390
+ <div className="hidden sm:flex flex-col leading-none">
391
+ <span className="text-sm font-semibold text-white">
392
+ Agentic Browser
393
+ </span>
394
+ <span className="text-[10px] text-gray-500">
395
+ Gemini + Playwright
396
+ </span>
397
+ </div>
398
  </div>
399
 
400
+ <div className="flex-1 flex gap-2 min-w-0">
 
401
  <input
 
402
  value={inputUrl}
403
  onChange={(e) => setInputUrl(e.target.value)}
404
+ onKeyDown={(e) => {
405
+ if (e.key === "Enter") handleNavigate();
406
+ }}
407
  placeholder="https://example.com"
408
+ className="flex-1 min-w-0 bg-[#1a1a24] border border-[#2a2a3a] rounded-lg px-3 py-2 text-sm text-white placeholder:text-gray-500 focus:outline-none focus:border-indigo-500"
409
  />
410
+
411
  <button
412
  onClick={handleNavigate}
413
+ disabled={isLoading || isInitializing}
414
+ className="shrink-0 rounded-lg bg-indigo-600 hover:bg-indigo-500 disabled:opacity-50 px-4 py-2 text-sm font-medium text-white transition-colors"
415
  >
416
+ {!isBrowserReady ? "Launch" : "Go"}
417
  </button>
418
  </div>
419
 
 
420
  <button
421
  onClick={() => setShowHighlights((v) => !v)}
422
+ className={`shrink-0 rounded-lg px-3 py-2 text-sm transition-colors ${
 
423
  showHighlights
424
  ? "bg-indigo-500/20 text-indigo-400"
425
  : "bg-[#1a1a24] text-gray-500"
426
  }`}
427
+ title="Toggle highlights"
428
  >
429
  🎯
430
  </button>
431
  </div>
432
  </header>
433
 
 
434
  <main className="flex-1 max-w-7xl mx-auto w-full p-4 flex flex-col lg:flex-row gap-4">
435
+ <section className="flex-1 min-w-0 flex flex-col gap-4">
 
 
436
  {currentAction && (
437
+ <div className="rounded-lg border border-[#2a2a3a] bg-[#1a1a24] px-3 py-2 text-xs text-gray-400 flex items-center gap-2">
438
+ {(isLoading || isInitializing) && (
439
+ <span className="inline-block h-3 w-3 rounded-full border-2 border-indigo-500 border-t-transparent animate-spin shrink-0" />
440
  )}
441
  <span className="truncate">{currentAction}</span>
442
  </div>
443
  )}
444
 
445
+ <div className="relative bg-[#1a1a24] border border-[#2a2a3a] rounded-xl overflow-hidden aspect-video flex items-center justify-center">
446
+ {!isBrowserReady && !screenshot && (
447
+ <div className="text-center text-gray-500 px-6">
448
+ <div className="text-4xl mb-3">🌐</div>
 
449
  <p className="text-sm">
450
+ Launch the browser first, then open any site.
 
451
  </p>
452
+ <p className="text-xs text-gray-600 mt-1">
453
+ Default test URL: https://example.com
454
  </p>
455
  </div>
456
  )}
457
 
458
  {isInitializing && (
459
+ <div className="absolute inset-0 bg-[#0f0f13]/80 z-10 flex items-center justify-center">
460
  <div className="flex flex-col items-center gap-3">
461
+ <div className="w-8 h-8 rounded-full border-2 border-indigo-500 border-t-transparent animate-spin" />
462
  <span className="text-sm text-gray-400">
463
+ Starting browser...
 
464
  </span>
465
  </div>
466
  </div>
 
473
  ref={imgRef}
474
  src={`data:image/png;base64,${screenshot}`}
475
  alt="Browser screenshot"
476
+ className="w-full h-full object-contain"
477
  />
478
 
 
479
  {showHighlights &&
480
  clickableElements.map((el, i) => (
481
  <div
482
+ key={`${el.tag}-${i}-${el.x}-${el.y}`}
483
+ className="highlight-overlay absolute"
484
  style={getOverlayStyle(el)}
485
  title={`${el.tag}: ${el.text || el.href || ""}`}
486
  />
 
489
  )}
490
  </div>
491
 
492
+ <div className="rounded-xl border border-[#2a2a3a] bg-[#1a1a24] p-3 flex flex-col gap-3">
 
493
  <textarea
 
494
  value={prompt}
495
  onChange={(e) => setPrompt(e.target.value)}
496
+ onKeyDown={handlePromptKeyDown}
497
+ placeholder="Tell the AI what to do... example: Click the first result, type hello in the search box, or tell me what this page says."
498
+ rows={4}
499
+ disabled={!isBrowserReady || isLoading}
500
+ className="w-full resize-none bg-transparent text-sm text-white placeholder:text-gray-600 focus:outline-none"
501
  />
502
+
503
+ <div className="flex flex-wrap items-center justify-between gap-2">
504
+ <div className="text-xs text-gray-500">
505
+ Enter = send Β· Shift+Enter = new line
506
+ </div>
507
+
508
+ <div className="flex items-center gap-2">
509
+ <button
510
+ onClick={() => captureScreenshot()}
511
+ disabled={!isBrowserReady || isLoading}
512
+ className="rounded-lg border border-[#2a2a3a] bg-[#0f0f13] px-3 py-2 text-sm text-gray-300 hover:bg-[#14141c] disabled:opacity-40"
513
+ >
514
+ Refresh
515
+ </button>
516
+
517
+ <button
518
+ onClick={runAgent}
519
+ disabled={!isBrowserReady || isLoading || !prompt.trim()}
520
+ className="rounded-lg bg-indigo-600 hover:bg-indigo-500 disabled:opacity-40 px-5 py-2 text-sm font-medium text-white transition-colors"
521
+ >
522
+ {isLoading ? "Running..." : "Run Agent"}
523
+ </button>
524
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
  </div>
526
  </div>
 
527
 
528
+ {isBrowserReady && (
529
+ <div className="rounded-lg border border-[#2a2a3a] bg-[#1a1a24] px-3 py-2 text-xs text-gray-500 flex items-center justify-between">
530
+ <span>Current URL: {url}</span>
531
+ <span>Detected elements: {clickableElements.length}</span>
532
+ </div>
533
+ )}
534
+ </section>
535
+
536
+ <aside className="w-full lg:w-80 xl:w-96 shrink-0 flex flex-col gap-3">
537
  <div className="flex items-center justify-between">
538
  <h2 className="text-sm font-semibold text-gray-300">
539
  Action History
540
  </h2>
541
+
542
  {history.length > 0 && (
543
  <button
544
  onClick={() => setHistory([])}
545
+ className="text-xs text-gray-500 hover:text-gray-300"
546
  >
547
  Clear
548
  </button>
549
  )}
550
  </div>
551
 
552
+ <div className="flex flex-col gap-2 max-h-[70vh] overflow-y-auto pr-1">
553
  {history.length === 0 && (
554
+ <div className="rounded-xl border border-[#2a2a3a] bg-[#1a1a24] p-6 text-center text-sm text-gray-600">
555
+ <div className="text-3xl mb-2">πŸ—‚οΈ</div>
556
  <p>No actions yet</p>
 
 
 
557
  </div>
558
  )}
559
 
 
561
  const style = entry.action
562
  ? ACTION_STYLES[entry.action.type] || ACTION_STYLES.answer
563
  : null;
564
+
565
  return (
566
  <div
567
  key={entry.id}
568
+ className="rounded-xl border border-[#2a2a3a] bg-[#1a1a24] p-3 text-xs flex flex-col gap-2"
569
  >
 
570
  <div className="flex items-start gap-2">
571
  <span className="text-base shrink-0">πŸ‘€</span>
572
+ <p className="text-gray-300 leading-relaxed whitespace-pre-wrap break-words">
573
  {entry.prompt}
574
  </p>
575
  </div>
576
 
 
577
  {entry.status === "pending" && (
578
+ <div className="flex items-center gap-2 text-gray-500">
579
+ <span className="inline-block h-3 w-3 rounded-full border border-gray-500 border-t-transparent animate-spin" />
580
+ Processing...
581
  </div>
582
  )}
583
 
584
  {entry.status === "error" && (
585
+ <div className="rounded-lg bg-red-500/10 px-2 py-1 text-red-400">
586
  ⚠ {entry.error}
587
  </div>
588
  )}
589
 
590
  {entry.status === "success" && entry.action && style && (
591
  <>
592
+ <div className="flex items-center gap-2 flex-wrap">
593
  <span
594
+ className={`inline-flex items-center gap-1 rounded-full px-2 py-1 text-[10px] font-semibold uppercase tracking-wide ${style.bg} ${style.text}`}
595
  >
596
+ <span>{style.icon}</span>
597
+ <span>{style.label}</span>
598
  </span>
599
+ <span className="text-gray-400 break-words">
600
  {entry.action.description}
601
  </span>
602
  </div>
603
 
604
+ <div className="rounded-lg bg-[#0f0f13] px-2 py-2 font-mono text-[10px] text-gray-500 space-y-1">
 
605
  {entry.action.type === "click" && (
606
  <p>
607
  x={entry.action.x} y={entry.action.y}
608
  </p>
609
  )}
610
+
611
  {entry.action.type === "type" && (
612
+ <>
613
+ {entry.action.x !== undefined &&
614
+ entry.action.y !== undefined && (
615
+ <p>
616
+ x={entry.action.x} y={entry.action.y}
617
+ </p>
618
+ )}
619
+ <p>{entry.action.text}</p>
620
+ </>
621
  )}
622
+
623
  {entry.action.type === "scroll" && (
624
  <p>
625
+ Ξ”x={entry.action.scrollX ?? 0} Ξ”y=
626
+ {entry.action.scrollY ?? 0}
627
  </p>
628
  )}
629
+
630
  {entry.action.type === "navigate" && (
631
+ <p className="break-all">{entry.action.url}</p>
632
  )}
633
+
634
  {entry.action.type === "keypress" && (
635
+ <p>key={entry.action.key}</p>
636
  )}
637
+
638
+ {entry.action.type === "hover" && (
639
+ <p>
640
+ x={entry.action.x} y={entry.action.y}
641
+ </p>
642
+ )}
643
+
644
+ {entry.action.type === "wait" && (
645
+ <p>{entry.action.ms ?? 0}ms</p>
646
+ )}
647
+
648
  {entry.action.type === "answer" && (
649
  <p className="whitespace-pre-wrap text-indigo-300">
650
  {entry.action.answer}
 
654
  </>
655
  )}
656
 
657
+ <p className="text-[10px] text-gray-700">
658
  {entry.timestamp.toLocaleTimeString()}
659
  </p>
660
  </div>
661
  );
662
  })}
663
+
664
  <div ref={historyEndRef} />
665
  </div>
666
+ </aside>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667
  </main>
668
  </div>
669
  );