Wanderhalleylee commited on
Commit
0dcb476
·
verified ·
1 Parent(s): ab398b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +232 -197
app.py CHANGED
@@ -971,245 +971,280 @@ async def refresh_page(request: dict, token: str = Depends(verify_token)):
971
  # ===================== CLICK ELEMENT =====================
972
  @app.post("/click-element")
973
  async def click_element(request: dict, token: str = Depends(verify_token)):
974
- """Click at specific coordinates on the page"""
975
  try:
976
  session_id = request.get("session_id")
977
  x = request.get("x", 0)
978
  y = request.get("y", 0)
979
-
980
  if not session_id:
981
- raise HTTPException(status_code=400, detail="session_id é obrigatório")
982
-
983
  session = session_manager.get_session(session_id)
 
 
 
984
  driver = session["driver"]
985
-
986
- # Get viewport size to scale coordinates
987
- viewport = driver.execute_script("""
988
- return {
989
- width: window.innerWidth,
990
- height: window.innerHeight,
991
- scrollX: window.pageXOffset || document.documentElement.scrollLeft,
992
- scrollY: window.pageYOffset || document.documentElement.scrollTop
993
- };
994
- """)
995
-
996
- # Click using JavaScript at exact coordinates
997
- click_result = driver.execute_script("""
 
 
 
 
 
 
 
 
 
998
  var x = arguments[0];
999
  var y = arguments[1];
1000
- var el = document.elementFromPoint(x, y);
1001
- var result = {
1002
- tagName: '',
1003
- text: '',
1004
- type: '',
1005
- name: '',
1006
- id: '',
1007
- href: '',
1008
- clicked: false
1009
- };
1010
- if (el) {
1011
- result.tagName = el.tagName.toLowerCase();
1012
- result.text = (el.textContent || '').trim().substring(0, 100);
1013
- result.type = el.type || '';
1014
- result.name = el.name || '';
1015
- result.id = el.id || '';
1016
- result.href = el.href || '';
1017
-
1018
- // Focus if it's an input
1019
- if (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA' || el.tagName === 'SELECT') {
1020
- el.focus();
1021
- el.click();
1022
- } else {
1023
- el.click();
1024
  }
1025
- result.clicked = true;
 
 
 
 
 
 
 
 
 
 
1026
  }
1027
- return result;
1028
- """, int(x), int(y))
1029
-
1030
- time.sleep(0.5)
1031
-
1032
- # Check if page changed
1033
- new_url = driver.current_url
1034
- new_title = driver.title
1035
-
1036
- # Handle new tabs
1037
- if len(driver.window_handles) > 1:
1038
- driver.switch_to.window(driver.window_handles[-1])
1039
- new_url = driver.current_url
1040
- new_title = driver.title
1041
-
1042
- session["url"] = new_url
1043
- session["title"] = new_title
1044
-
1045
- screenshot = driver.get_screenshot_as_base64()
1046
-
1047
- return JSONResponse(content={
1048
- "status": "success",
1049
- "click": click_result,
1050
- "coordinates": {"x": x, "y": y},
1051
- "viewport": viewport,
1052
- "url": new_url,
1053
- "title": new_title,
 
 
 
 
 
 
 
 
 
 
 
 
 
1054
  "screenshot": screenshot
1055
- })
1056
-
1057
  except HTTPException:
1058
  raise
1059
  except Exception as e:
1060
- logger.error(f"[CLICK] Erro: {e}")
1061
  raise HTTPException(status_code=500, detail=str(e))
1062
 
1063
  # ===================== TYPE TEXT =====================
1064
  @app.post("/type-text")
1065
  async def type_text(request: dict, token: str = Depends(verify_token)):
1066
- """Type text into the currently focused element or find input and type"""
1067
  try:
1068
  session_id = request.get("session_id")
1069
  text = request.get("text", "")
1070
  press_enter = request.get("press_enter", False)
1071
- clear_first = request.get("clear_first", True)
1072
- selector = request.get("selector", "")
1073
-
1074
  if not session_id:
1075
- raise HTTPException(status_code=400, detail="session_id é obrigatório")
1076
- if not text and not press_enter:
1077
- raise HTTPException(status_code=400, detail="text é obrigatório")
1078
-
1079
  session = session_manager.get_session(session_id)
 
 
 
1080
  driver = session["driver"]
1081
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1082
  typed = False
1083
- element_info = {}
1084
-
1085
- # If selector provided, find and focus that element
1086
- if selector:
1087
- try:
1088
- el = driver.find_element(By.CSS_SELECTOR, selector)
1089
- el.click()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1090
  if clear_first:
1091
- el.clear()
 
 
 
 
 
 
 
 
1092
  if text:
1093
- el.send_keys(text)
 
1094
  if press_enter:
1095
- el.send_keys(Keys.ENTER)
 
1096
  typed = True
1097
  element_info = {
1098
- "tagName": el.tag_name,
1099
- "type": el.get_attribute("type") or "",
1100
- "name": el.get_attribute("name") or "",
1101
- "id": el.get_attribute("id") or "",
1102
- "method": "selector"
1103
  }
1104
- except Exception as se:
1105
- logger.warning(f"Selector '{selector}' failed: {se}")
1106
-
1107
- # Try typing into active/focused element
1108
- if not typed:
1109
- try:
1110
- result = driver.execute_script("""
1111
- var el = document.activeElement;
1112
- if (el && (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA' || el.isContentEditable)) {
1113
- return {
1114
- tagName: el.tagName.toLowerCase(),
1115
- type: el.type || '',
1116
- name: el.name || '',
1117
- id: el.id || '',
1118
- isInput: true
1119
- };
1120
- }
1121
- return { isInput: false };
1122
- """)
1123
-
1124
- if result.get("isInput"):
1125
- active = driver.switch_to.active_element
1126
- if clear_first:
1127
- active.clear()
1128
- if text:
1129
- active.send_keys(text)
1130
  if press_enter:
1131
- active.send_keys(Keys.ENTER)
 
1132
  typed = True
1133
- element_info = result
1134
- element_info["method"] = "active_element"
1135
- except Exception as ae:
1136
- logger.warning(f"Active element typing failed: {ae}")
1137
-
1138
- # Fallback: find first visible input and type
1139
- if not typed:
1140
- try:
1141
- inputs = driver.find_elements(By.CSS_SELECTOR,
1142
- "input[type='text']:not([style*='display: none']), "
1143
- "input[type='email']:not([style*='display: none']), "
1144
- "input[type='password']:not([style*='display: none']), "
1145
- "input:not([type]):not([style*='display: none']), "
1146
- "textarea:not([style*='display: none'])"
1147
- )
1148
- for inp in inputs:
1149
- try:
1150
- if inp.is_displayed() and inp.is_enabled():
1151
- inp.click()
1152
- if clear_first:
1153
- inp.clear()
1154
- if text:
1155
- inp.send_keys(text)
1156
- if press_enter:
1157
- inp.send_keys(Keys.ENTER)
1158
- typed = True
1159
- element_info = {
1160
- "tagName": inp.tag_name,
1161
- "type": inp.get_attribute("type") or "",
1162
- "name": inp.get_attribute("name") or "",
1163
- "id": inp.get_attribute("id") or "",
1164
- "method": "fallback_first_input"
1165
- }
1166
- break
1167
- except:
1168
- continue
1169
- except Exception as fe:
1170
- logger.warning(f"Fallback typing failed: {fe}")
1171
-
1172
- # Only press Enter if requested and nothing else was done
1173
- if not typed and press_enter:
1174
- try:
1175
- ActionChains(driver).send_keys(Keys.ENTER).perform()
1176
- typed = True
1177
- element_info = {"method": "action_chains_enter"}
1178
- except:
1179
- pass
1180
-
1181
- time.sleep(0.5)
1182
-
1183
- # Wait if enter was pressed (page might change)
1184
- if press_enter:
1185
- time.sleep(1.5)
1186
- try:
1187
- WebDriverWait(driver, 5).until(
1188
- lambda d: d.execute_script("return document.readyState") == "complete"
1189
- )
1190
- except:
1191
- pass
1192
-
1193
- session["url"] = driver.current_url
1194
- session["title"] = driver.title
1195
-
1196
- screenshot = driver.get_screenshot_as_base64()
1197
-
1198
- return JSONResponse(content={
1199
- "status": "success",
1200
  "typed": typed,
1201
- "text_sent": text,
1202
- "enter_pressed": press_enter,
 
1203
  "element": element_info,
1204
- "url": driver.current_url,
1205
- "title": driver.title,
1206
  "screenshot": screenshot
1207
- })
1208
-
1209
  except HTTPException:
1210
  raise
1211
  except Exception as e:
1212
- logger.error(f"[TYPE] Erro: {e}")
1213
  raise HTTPException(status_code=500, detail=str(e))
1214
 
1215
  # ============================================================
 
971
  # ===================== CLICK ELEMENT =====================
972
  @app.post("/click-element")
973
  async def click_element(request: dict, token: str = Depends(verify_token)):
974
+ """Click at specific coordinates using ActionChains for precision"""
975
  try:
976
  session_id = request.get("session_id")
977
  x = request.get("x", 0)
978
  y = request.get("y", 0)
979
+
980
  if not session_id:
981
+ raise HTTPException(status_code=400, detail="session_id required")
982
+
983
  session = session_manager.get_session(session_id)
984
+ if not session:
985
+ raise HTTPException(status_code=410, detail="Session expired")
986
+
987
  driver = session["driver"]
988
+
989
+ # Use ActionChains for precise clicking
990
+ from selenium.webdriver.common.action_chains import ActionChains
991
+
992
+ # Move to absolute position from top-left of viewport
993
+ # First scroll to top to reset position
994
+ viewport_width = driver.execute_script("return window.innerWidth;")
995
+ viewport_height = driver.execute_script("return window.innerHeight;")
996
+ scroll_top = driver.execute_script("return window.pageYOffset || document.documentElement.scrollTop;")
997
+ scroll_left = driver.execute_script("return window.pageXOffset || document.documentElement.scrollLeft;")
998
+
999
+ # The x,y from frontend are in 1920x1080 space
1000
+ # Scale to actual viewport
1001
+ scale_x = viewport_width / 1920.0
1002
+ scale_y = viewport_height / 1080.0
1003
+ actual_x = int(x * scale_x)
1004
+ actual_y = int(y * scale_y)
1005
+
1006
+ logger.info(f"[CLICK] Input: ({x},{y}) Viewport: {viewport_width}x{viewport_height} Scale: ({scale_x:.2f},{scale_y:.2f}) Actual: ({actual_x},{actual_y}) Scroll: ({scroll_left},{scroll_top})")
1007
+
1008
+ # Get element at coordinates using JavaScript
1009
+ clicked_info = driver.execute_script("""
1010
  var x = arguments[0];
1011
  var y = arguments[1];
1012
+ var elem = document.elementFromPoint(x, y);
1013
+ if (elem) {
1014
+ // Click the element
1015
+ elem.click();
1016
+
1017
+ // Also try focus if it's an input
1018
+ if (elem.tagName === 'INPUT' || elem.tagName === 'TEXTAREA' || elem.tagName === 'SELECT') {
1019
+ elem.focus();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1020
  }
1021
+
1022
+ return {
1023
+ tagName: elem.tagName,
1024
+ id: elem.id || '',
1025
+ name: elem.name || '',
1026
+ type: elem.type || '',
1027
+ text: (elem.textContent || '').substring(0, 100).trim(),
1028
+ href: elem.href || '',
1029
+ className: (elem.className || '').substring(0, 100),
1030
+ value: elem.value || ''
1031
+ };
1032
  }
1033
+ return null;
1034
+ """, actual_x, actual_y)
1035
+
1036
+ # If elementFromPoint didn't work, try ActionChains as fallback
1037
+ if not clicked_info:
1038
+ try:
1039
+ actions = ActionChains(driver)
1040
+ # Move to body first, then offset
1041
+ body = driver.find_element("tag name", "body")
1042
+ actions.move_to_element_with_offset(body, actual_x - viewport_width//2, actual_y - viewport_height//2).click().perform()
1043
+ clicked_info = {"tagName": "unknown", "text": "ActionChains click"}
1044
+ except Exception as ac_err:
1045
+ logger.warning(f"[CLICK] ActionChains fallback failed: {ac_err}")
1046
+ clicked_info = {"tagName": "unknown", "text": "Click at coordinates"}
1047
+
1048
+ import time
1049
+ time.sleep(0.3)
1050
+
1051
+ # Check for new tabs
1052
+ try:
1053
+ if len(driver.window_handles) > 1:
1054
+ driver.switch_to.window(driver.window_handles[-1])
1055
+ except:
1056
+ pass
1057
+
1058
+ current_url = driver.current_url
1059
+ current_title = driver.title
1060
+ session["url"] = current_url
1061
+ session["title"] = current_title
1062
+
1063
+ import base64
1064
+ screenshot = base64.b64encode(driver.get_screenshot_as_png()).decode('utf-8')
1065
+
1066
+ return {
1067
+ "status": "clicked",
1068
+ "clicked": clicked_info,
1069
+ "coordinates": {"x": x, "y": y, "actual_x": actual_x, "actual_y": actual_y},
1070
+ "viewport": {"width": viewport_width, "height": viewport_height},
1071
+ "url": current_url,
1072
+ "title": current_title,
1073
  "screenshot": screenshot
1074
+ }
1075
+
1076
  except HTTPException:
1077
  raise
1078
  except Exception as e:
1079
+ logger.error(f"[CLICK] Error: {e}")
1080
  raise HTTPException(status_code=500, detail=str(e))
1081
 
1082
  # ===================== TYPE TEXT =====================
1083
  @app.post("/type-text")
1084
  async def type_text(request: dict, token: str = Depends(verify_token)):
1085
+ """Type text or press special keys using ActionChains"""
1086
  try:
1087
  session_id = request.get("session_id")
1088
  text = request.get("text", "")
1089
  press_enter = request.get("press_enter", False)
1090
+ clear_first = request.get("clear_first", False)
1091
+ selector = request.get("selector", None)
1092
+
1093
  if not session_id:
1094
+ raise HTTPException(status_code=400, detail="session_id required")
1095
+
 
 
1096
  session = session_manager.get_session(session_id)
1097
+ if not session:
1098
+ raise HTTPException(status_code=410, detail="Session expired")
1099
+
1100
  driver = session["driver"]
1101
+
1102
+ from selenium.webdriver.common.action_chains import ActionChains
1103
+ from selenium.webdriver.common.keys import Keys
1104
+
1105
+ # Map special Selenium unicode chars to actual Keys
1106
+ special_keys = {
1107
+ '\uE004': Keys.TAB,
1108
+ '\uE00C': Keys.ESCAPE,
1109
+ '\uE007': Keys.ENTER,
1110
+ '\uE003': Keys.BACKSPACE,
1111
+ '\uE006': Keys.RETURN,
1112
+ '\uE012': Keys.ARROW_LEFT,
1113
+ '\uE013': Keys.ARROW_UP,
1114
+ '\uE014': Keys.ARROW_RIGHT,
1115
+ '\uE015': Keys.ARROW_DOWN,
1116
+ '\uE010': Keys.END,
1117
+ '\uE011': Keys.HOME,
1118
+ '\uE00D': Keys.SPACE,
1119
+ '\uE017': Keys.DELETE,
1120
+ }
1121
+
1122
  typed = False
1123
+ element_info = {"tagName": "none", "method": "none"}
1124
+
1125
+ # Check if text is a special key
1126
+ is_special = text in special_keys
1127
+
1128
+ if is_special:
1129
+ # Use ActionChains to press the actual key
1130
+ actions = ActionChains(driver)
1131
+ actions.send_keys(special_keys[text]).perform()
1132
+ typed = True
1133
+ key_name = {
1134
+ '\uE004': 'Tab', '\uE00C': 'Escape', '\uE007': 'Enter',
1135
+ '\uE003': 'Backspace', '\uE006': 'Return',
1136
+ '\uE012': 'ArrowLeft', '\uE013': 'ArrowUp',
1137
+ '\uE014': 'ArrowRight', '\uE015': 'ArrowDown',
1138
+ '\uE010': 'End', '\uE011': 'Home',
1139
+ '\uE00D': 'Space', '\uE017': 'Delete'
1140
+ }.get(text, 'Special')
1141
+ element_info = {"tagName": "body", "method": "ActionChains", "key": key_name}
1142
+ logger.info(f"[TYPE] Special key: {key_name}")
1143
+ else:
1144
+ # Find element to type into
1145
+ target = None
1146
+
1147
+ # Try selector first
1148
+ if selector:
1149
+ try:
1150
+ target = driver.find_element("css selector", selector)
1151
+ except:
1152
+ pass
1153
+
1154
+ # Try active element
1155
+ if not target:
1156
+ try:
1157
+ target = driver.switch_to.active_element
1158
+ tag = target.tag_name.lower() if target.tag_name else ""
1159
+ if tag not in ["input", "textarea", "select", "div", "span"]:
1160
+ # Active element is not typeable, check if it has contenteditable
1161
+ editable = target.get_attribute("contenteditable")
1162
+ if editable != "true":
1163
+ target = None
1164
+ except:
1165
+ target = None
1166
+
1167
+ # Fallback: find first visible input
1168
+ if not target:
1169
+ try:
1170
+ inputs = driver.find_elements("css selector", "input:not([type='hidden']):not([type='submit']):not([type='button']):not([type='checkbox']):not([type='radio']), textarea")
1171
+ for inp in inputs:
1172
+ if inp.is_displayed() and inp.is_enabled():
1173
+ target = inp
1174
+ break
1175
+ except:
1176
+ pass
1177
+
1178
+ if target:
1179
+ try:
1180
+ target.click()
1181
+ except:
1182
+ pass
1183
+
1184
  if clear_first:
1185
+ try:
1186
+ target.clear()
1187
+ except:
1188
+ try:
1189
+ target.send_keys(Keys.CONTROL + "a")
1190
+ target.send_keys(Keys.DELETE)
1191
+ except:
1192
+ pass
1193
+
1194
  if text:
1195
+ target.send_keys(text)
1196
+
1197
  if press_enter:
1198
+ target.send_keys(Keys.ENTER)
1199
+
1200
  typed = True
1201
  element_info = {
1202
+ "tagName": target.tag_name or "unknown",
1203
+ "type": target.get_attribute("type") or "",
1204
+ "name": target.get_attribute("name") or "",
1205
+ "id": target.get_attribute("id") or "",
1206
+ "method": "direct"
1207
  }
1208
+ logger.info(f"[TYPE] Typed into <{element_info['tagName']}> name={element_info['name']} id={element_info['id']}")
1209
+ else:
1210
+ # Last resort: ActionChains send_keys to whatever is focused
1211
+ if text:
1212
+ actions = ActionChains(driver)
1213
+ actions.send_keys(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1214
  if press_enter:
1215
+ actions.send_keys(Keys.ENTER)
1216
+ actions.perform()
1217
  typed = True
1218
+ element_info = {"tagName": "active", "method": "ActionChains"}
1219
+ logger.info("[TYPE] Used ActionChains fallback")
1220
+
1221
+ import time
1222
+ time.sleep(0.3)
1223
+
1224
+ current_url = driver.current_url
1225
+ current_title = driver.title
1226
+ session["url"] = current_url
1227
+ session["title"] = current_title
1228
+
1229
+ import base64
1230
+ screenshot = base64.b64encode(driver.get_screenshot_as_png()).decode('utf-8')
1231
+
1232
+ return {
1233
+ "status": "typed" if typed else "no_target",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1234
  "typed": typed,
1235
+ "text_sent": text if not is_special else "",
1236
+ "press_enter": press_enter,
1237
+ "special_key": is_special,
1238
  "element": element_info,
1239
+ "url": current_url,
1240
+ "title": current_title,
1241
  "screenshot": screenshot
1242
+ }
1243
+
1244
  except HTTPException:
1245
  raise
1246
  except Exception as e:
1247
+ logger.error(f"[TYPE] Error: {e}")
1248
  raise HTTPException(status_code=500, detail=str(e))
1249
 
1250
  # ============================================================