Spaces:
Sleeping
Sleeping
Update extract.py
Browse files- extract.py +4 -1
extract.py
CHANGED
|
@@ -2,6 +2,7 @@ from selenium import webdriver
|
|
| 2 |
from selenium.common.exceptions import WebDriverException
|
| 3 |
from PIL import Image
|
| 4 |
from io import BytesIO
|
|
|
|
| 5 |
|
| 6 |
def take_webdata(url):
|
| 7 |
options = webdriver.ChromeOptions()
|
|
@@ -18,6 +19,8 @@ def take_webdata(url):
|
|
| 18 |
page_title = wd.title
|
| 19 |
screenshot = wd.get_screenshot_as_png()
|
| 20 |
html = wd.execute_script("return document.documentElement.outerHTML;")
|
|
|
|
|
|
|
| 21 |
|
| 22 |
except WebDriverException as e:
|
| 23 |
return page_title
|
|
@@ -25,4 +28,4 @@ def take_webdata(url):
|
|
| 25 |
if wd:
|
| 26 |
wd.quit()
|
| 27 |
|
| 28 |
-
return html ,
|
|
|
|
| 2 |
from selenium.common.exceptions import WebDriverException
|
| 3 |
from PIL import Image
|
| 4 |
from io import BytesIO
|
| 5 |
+
from bs4 import BeautifulSoup
|
| 6 |
|
| 7 |
def take_webdata(url):
|
| 8 |
options = webdriver.ChromeOptions()
|
|
|
|
| 19 |
page_title = wd.title
|
| 20 |
screenshot = wd.get_screenshot_as_png()
|
| 21 |
html = wd.execute_script("return document.documentElement.outerHTML;")
|
| 22 |
+
soup = BeautifulSoup(html, "html.parser")
|
| 23 |
+
tournament_div = soup.find("div", id="tournament-table", class_="tournament-table-standings")
|
| 24 |
|
| 25 |
except WebDriverException as e:
|
| 26 |
return page_title
|
|
|
|
| 28 |
if wd:
|
| 29 |
wd.quit()
|
| 30 |
|
| 31 |
+
return html , tournament_div
|