simoncck commited on
Commit
693e08d
·
verified ·
1 Parent(s): 2150d2f

Create server.js

Browse files
Files changed (1) hide show
  1. server.js +349 -0
server.js ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import base64
4
+ import asyncio
5
+ import uuid
6
+ from typing import Optional, Dict, Any, List
7
+ from contextlib import asynccontextmanager
8
+
9
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from pydantic import BaseModel
12
+ import uvicorn
13
+
14
+ from selenium import webdriver
15
+ from selenium.webdriver.common.by import By
16
+ from selenium.webdriver.common.keys import Keys
17
+ from selenium.webdriver.common.action_chains import ActionChains
18
+ from selenium.webdriver.support.ui import WebDriverWait
19
+ from selenium.webdriver.support import expected_conditions as EC
20
+ from selenium.webdriver.chrome.options import Options
21
+ from selenium.common.exceptions import TimeoutException, NoSuchElementException
22
+ from PIL import Image
23
+ import io
24
+
25
+ # Store active browser sessions
26
+ browser_sessions: Dict[str, webdriver.Chrome] = {}
27
+
28
+ @asynccontextmanager
29
+ async def lifespan(app: FastAPI):
30
+ # Startup
31
+ yield
32
+ # Shutdown - close all browser sessions
33
+ for session_id, driver in browser_sessions.items():
34
+ try:
35
+ driver.quit()
36
+ except:
37
+ pass
38
+ browser_sessions.clear()
39
+
40
+ app = FastAPI(
41
+ title="Browser Automation API",
42
+ description="A browser automation API similar to browser-use",
43
+ version="1.0.0",
44
+ lifespan=lifespan
45
+ )
46
+
47
+ # CORS middleware
48
+ app.add_middleware(
49
+ CORSMiddleware,
50
+ allow_origins=["*"],
51
+ allow_credentials=True,
52
+ allow_methods=["*"],
53
+ allow_headers=["*"],
54
+ )
55
+
56
+ # Pydantic models
57
+ class SessionResponse(BaseModel):
58
+ session_id: str
59
+ status: str
60
+
61
+ class NavigateRequest(BaseModel):
62
+ url: str
63
+
64
+ class ClickRequest(BaseModel):
65
+ selector: str
66
+ selector_type: str = "css" # css, xpath, id, class, name
67
+
68
+ class TypeRequest(BaseModel):
69
+ selector: str
70
+ text: str
71
+ selector_type: str = "css"
72
+ clear_first: bool = True
73
+
74
+ class ActionResponse(BaseModel):
75
+ success: bool
76
+ message: str
77
+ data: Optional[Dict[str, Any]] = None
78
+
79
+ class ScreenshotResponse(BaseModel):
80
+ success: bool
81
+ screenshot: str # base64 encoded image
82
+ message: str
83
+
84
+ class ElementInfo(BaseModel):
85
+ tag: str
86
+ text: str
87
+ attributes: Dict[str, str]
88
+
89
+ class PageInfo(BaseModel):
90
+ title: str
91
+ url: str
92
+ elements: List[ElementInfo]
93
+
94
+ def create_chrome_driver() -> webdriver.Chrome:
95
+ """Create a Chrome WebDriver instance with appropriate options"""
96
+ chrome_options = Options()
97
+ chrome_options.add_argument("--headless")
98
+ chrome_options.add_argument("--no-sandbox")
99
+ chrome_options.add_argument("--disable-dev-shm-usage")
100
+ chrome_options.add_argument("--disable-gpu")
101
+ chrome_options.add_argument("--window-size=1920,1080")
102
+ chrome_options.add_argument("--disable-extensions")
103
+ chrome_options.add_argument("--disable-plugins")
104
+ chrome_options.add_argument("--disable-images")
105
+ chrome_options.add_argument("--disable-javascript") # Remove this if you need JS
106
+
107
+ driver = webdriver.Chrome(options=chrome_options)
108
+ driver.set_page_load_timeout(30)
109
+ driver.implicitly_wait(10)
110
+
111
+ return driver
112
+
113
+ def get_element(driver: webdriver.Chrome, selector: str, selector_type: str = "css"):
114
+ """Get element by selector"""
115
+ try:
116
+ if selector_type == "css":
117
+ return driver.find_element(By.CSS_SELECTOR, selector)
118
+ elif selector_type == "xpath":
119
+ return driver.find_element(By.XPATH, selector)
120
+ elif selector_type == "id":
121
+ return driver.find_element(By.ID, selector)
122
+ elif selector_type == "class":
123
+ return driver.find_element(By.CLASS_NAME, selector)
124
+ elif selector_type == "name":
125
+ return driver.find_element(By.NAME, selector)
126
+ else:
127
+ raise ValueError(f"Unsupported selector type: {selector_type}")
128
+ except (NoSuchElementException, TimeoutException) as e:
129
+ raise HTTPException(status_code=404, detail=f"Element not found: {str(e)}")
130
+
131
+ @app.get("/")
132
+ async def root():
133
+ return {"message": "Browser Automation API is running"}
134
+
135
+ @app.post("/session/create", response_model=SessionResponse)
136
+ async def create_session():
137
+ """Create a new browser session"""
138
+ try:
139
+ session_id = str(uuid.uuid4())
140
+ driver = create_chrome_driver()
141
+ browser_sessions[session_id] = driver
142
+
143
+ return SessionResponse(
144
+ session_id=session_id,
145
+ status="created"
146
+ )
147
+ except Exception as e:
148
+ raise HTTPException(status_code=500, detail=f"Failed to create session: {str(e)}")
149
+
150
+ @app.delete("/session/{session_id}")
151
+ async def close_session(session_id: str):
152
+ """Close a browser session"""
153
+ if session_id not in browser_sessions:
154
+ raise HTTPException(status_code=404, detail="Session not found")
155
+
156
+ try:
157
+ browser_sessions[session_id].quit()
158
+ del browser_sessions[session_id]
159
+ return {"message": "Session closed successfully"}
160
+ except Exception as e:
161
+ return {"message": f"Session closed with warning: {str(e)}"}
162
+
163
+ @app.post("/session/{session_id}/navigate", response_model=ActionResponse)
164
+ async def navigate(session_id: str, request: NavigateRequest):
165
+ """Navigate to a URL"""
166
+ if session_id not in browser_sessions:
167
+ raise HTTPException(status_code=404, detail="Session not found")
168
+
169
+ driver = browser_sessions[session_id]
170
+
171
+ try:
172
+ driver.get(request.url)
173
+ WebDriverWait(driver, 10).until(
174
+ lambda d: d.execute_script("return document.readyState") == "complete"
175
+ )
176
+
177
+ return ActionResponse(
178
+ success=True,
179
+ message=f"Successfully navigated to {request.url}",
180
+ data={"current_url": driver.current_url, "title": driver.title}
181
+ )
182
+ except Exception as e:
183
+ return ActionResponse(
184
+ success=False,
185
+ message=f"Navigation failed: {str(e)}"
186
+ )
187
+
188
+ @app.post("/session/{session_id}/click", response_model=ActionResponse)
189
+ async def click_element(session_id: str, request: ClickRequest):
190
+ """Click an element"""
191
+ if session_id not in browser_sessions:
192
+ raise HTTPException(status_code=404, detail="Session not found")
193
+
194
+ driver = browser_sessions[session_id]
195
+
196
+ try:
197
+ element = get_element(driver, request.selector, request.selector_type)
198
+
199
+ # Scroll to element if needed
200
+ driver.execute_script("arguments[0].scrollIntoView(true);", element)
201
+
202
+ # Wait for element to be clickable
203
+ WebDriverWait(driver, 10).until(EC.element_to_be_clickable(element))
204
+
205
+ element.click()
206
+
207
+ return ActionResponse(
208
+ success=True,
209
+ message=f"Successfully clicked element: {request.selector}"
210
+ )
211
+ except Exception as e:
212
+ return ActionResponse(
213
+ success=False,
214
+ message=f"Click failed: {str(e)}"
215
+ )
216
+
217
+ @app.post("/session/{session_id}/type", response_model=ActionResponse)
218
+ async def type_text(session_id: str, request: TypeRequest):
219
+ """Type text into an element"""
220
+ if session_id not in browser_sessions:
221
+ raise HTTPException(status_code=404, detail="Session not found")
222
+
223
+ driver = browser_sessions[session_id]
224
+
225
+ try:
226
+ element = get_element(driver, request.selector, request.selector_type)
227
+
228
+ # Scroll to element
229
+ driver.execute_script("arguments[0].scrollIntoView(true);", element)
230
+
231
+ # Clear field if requested
232
+ if request.clear_first:
233
+ element.clear()
234
+
235
+ # Type text
236
+ element.send_keys(request.text)
237
+
238
+ return ActionResponse(
239
+ success=True,
240
+ message=f"Successfully typed text into element: {request.selector}"
241
+ )
242
+ except Exception as e:
243
+ return ActionResponse(
244
+ success=False,
245
+ message=f"Type failed: {str(e)}"
246
+ )
247
+
248
+ @app.get("/session/{session_id}/screenshot", response_model=ScreenshotResponse)
249
+ async def take_screenshot(session_id: str):
250
+ """Take a screenshot of the current page"""
251
+ if session_id not in browser_sessions:
252
+ raise HTTPException(status_code=404, detail="Session not found")
253
+
254
+ driver = browser_sessions[session_id]
255
+
256
+ try:
257
+ screenshot = driver.get_screenshot_as_png()
258
+ screenshot_b64 = base64.b64encode(screenshot).decode()
259
+
260
+ return ScreenshotResponse(
261
+ success=True,
262
+ screenshot=screenshot_b64,
263
+ message="Screenshot taken successfully"
264
+ )
265
+ except Exception as e:
266
+ return ScreenshotResponse(
267
+ success=False,
268
+ screenshot="",
269
+ message=f"Screenshot failed: {str(e)}"
270
+ )
271
+
272
+ @app.get("/session/{session_id}/page-info", response_model=PageInfo)
273
+ async def get_page_info(session_id: str):
274
+ """Get information about the current page"""
275
+ if session_id not in browser_sessions:
276
+ raise HTTPException(status_code=404, detail="Session not found")
277
+
278
+ driver = browser_sessions[session_id]
279
+
280
+ try:
281
+ # Get basic page info
282
+ title = driver.title
283
+ url = driver.current_url
284
+
285
+ # Get interactive elements
286
+ elements = []
287
+ interactive_tags = ["button", "input", "a", "select", "textarea"]
288
+
289
+ for tag in interactive_tags:
290
+ web_elements = driver.find_elements(By.TAG_NAME, tag)
291
+ for elem in web_elements[:10]: # Limit to first 10 of each type
292
+ try:
293
+ element_info = ElementInfo(
294
+ tag=elem.tag_name,
295
+ text=elem.text[:100] if elem.text else "", # Limit text length
296
+ attributes={
297
+ attr: elem.get_attribute(attr) or ""
298
+ for attr in ["id", "class", "name", "type", "href", "onclick"]
299
+ if elem.get_attribute(attr)
300
+ }
301
+ )
302
+ elements.append(element_info)
303
+ except:
304
+ continue
305
+
306
+ return PageInfo(
307
+ title=title,
308
+ url=url,
309
+ elements=elements
310
+ )
311
+ except Exception as e:
312
+ raise HTTPException(status_code=500, detail=f"Failed to get page info: {str(e)}")
313
+
314
+ @app.get("/session/{session_id}/execute-js")
315
+ async def execute_javascript(session_id: str, script: str):
316
+ """Execute JavaScript on the page"""
317
+ if session_id not in browser_sessions:
318
+ raise HTTPException(status_code=404, detail="Session not found")
319
+
320
+ driver = browser_sessions[session_id]
321
+
322
+ try:
323
+ result = driver.execute_script(script)
324
+ return ActionResponse(
325
+ success=True,
326
+ message="JavaScript executed successfully",
327
+ data={"result": result}
328
+ )
329
+ except Exception as e:
330
+ return ActionResponse(
331
+ success=False,
332
+ message=f"JavaScript execution failed: {str(e)}"
333
+ )
334
+
335
+ @app.get("/sessions")
336
+ async def list_sessions():
337
+ """List all active sessions"""
338
+ return {
339
+ "active_sessions": list(browser_sessions.keys()),
340
+ "total_sessions": len(browser_sessions)
341
+ }
342
+
343
+ if __name__ == "__main__":
344
+ uvicorn.run(
345
+ "app:app",
346
+ host="0.0.0.0",
347
+ port=7860,
348
+ log_level="info"
349
+ )