tx3bas commited on
Commit
bd146a7
·
verified ·
1 Parent(s): 31cacf9

Update extract.py

Browse files
Files changed (1) hide show
  1. extract.py +22 -1
extract.py CHANGED
@@ -1,21 +1,42 @@
1
  from selenium import webdriver
 
 
 
2
  from selenium.common.exceptions import WebDriverException
3
  from bs4 import BeautifulSoup
4
  import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def extract_data(user_input, mode):
7
  options = webdriver.ChromeOptions()
8
  options.add_argument('--headless')
9
  options.add_argument('--no-sandbox')
10
  options.add_argument('--disable-dev-shm-usage')
 
11
 
12
  try:
13
  wd = webdriver.Chrome(options=options)
14
  wd.set_window_size(1080, 720)
 
15
  # Construir la URL de búsqueda
16
  url_busqueda = f"https://app.neilpatel.com/es/traffic_analyzer/keywords?domain={user_input}&lang=es&locId=2724&mode={mode}"
17
  wd.get(url_busqueda)
18
- time.sleep(15) # Espera 15 segundos para que la página se cargue completamente
 
 
19
 
20
  # Obtener el contenido de la página
21
  page_content = wd.page_source
 
1
  from selenium import webdriver
2
+ from selenium.webdriver.common.by import By
3
+ from selenium.webdriver.common.keys import Keys
4
+ from selenium.webdriver.chrome.service import Service
5
  from selenium.common.exceptions import WebDriverException
6
  from bs4 import BeautifulSoup
7
  import time
8
+ import random
9
+
10
+ # Lista de User Agents para rotar
11
+ user_agents = [
12
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
13
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
14
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
15
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0",
16
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
17
+ "Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0"
18
+ ]
19
+
20
+ def get_random_user_agent():
21
+ return random.choice(user_agents)
22
 
23
  def extract_data(user_input, mode):
24
  options = webdriver.ChromeOptions()
25
  options.add_argument('--headless')
26
  options.add_argument('--no-sandbox')
27
  options.add_argument('--disable-dev-shm-usage')
28
+ options.add_argument(f"user-agent={get_random_user_agent()}")
29
 
30
  try:
31
  wd = webdriver.Chrome(options=options)
32
  wd.set_window_size(1080, 720)
33
+
34
  # Construir la URL de búsqueda
35
  url_busqueda = f"https://app.neilpatel.com/es/traffic_analyzer/keywords?domain={user_input}&lang=es&locId=2724&mode={mode}"
36
  wd.get(url_busqueda)
37
+
38
+ # Espera aleatoria para simular el comportamiento humano
39
+ time.sleep(random.uniform(10, 20))
40
 
41
  # Obtener el contenido de la página
42
  page_content = wd.page_source