sintamar commited on
Commit
c34f3d2
·
verified ·
1 Parent(s): 5ab6dbf

Update extract.py

Browse files
Files changed (1) hide show
  1. extract.py +4 -1
extract.py CHANGED
@@ -2,6 +2,7 @@ from selenium import webdriver
2
  from selenium.common.exceptions import WebDriverException
3
  from PIL import Image
4
  from io import BytesIO
 
5
 
6
  def take_webdata(url):
7
  options = webdriver.ChromeOptions()
@@ -18,6 +19,8 @@ def take_webdata(url):
18
  page_title = wd.title
19
  screenshot = wd.get_screenshot_as_png()
20
  html = wd.execute_script("return document.documentElement.outerHTML;")
 
 
21
 
22
  except WebDriverException as e:
23
  return page_title
@@ -25,4 +28,4 @@ def take_webdata(url):
25
  if wd:
26
  wd.quit()
27
 
28
- return html , page_title
 
2
  from selenium.common.exceptions import WebDriverException
3
  from PIL import Image
4
  from io import BytesIO
5
+ from bs4 import BeautifulSoup
6
 
7
  def take_webdata(url):
8
  options = webdriver.ChromeOptions()
 
19
  page_title = wd.title
20
  screenshot = wd.get_screenshot_as_png()
21
  html = wd.execute_script("return document.documentElement.outerHTML;")
22
+ soup = BeautifulSoup(html, "html.parser")
23
+ tournament_div = soup.find("div", id="tournament-table", class_="tournament-table-standings")
24
 
25
  except WebDriverException as e:
26
  return page_title
 
28
  if wd:
29
  wd.quit()
30
 
31
+ return html , tournament_div