Spaces:
Sleeping
Sleeping
Update extract.py
Browse files- extract.py +6 -2
extract.py
CHANGED
|
@@ -4,6 +4,7 @@ from PIL import Image
|
|
| 4 |
from io import BytesIO
|
| 5 |
from bs4 import BeautifulSoup
|
| 6 |
|
|
|
|
| 7 |
def take_webdata(url):
|
| 8 |
options = webdriver.ChromeOptions()
|
| 9 |
options.add_argument('--headless')
|
|
@@ -20,7 +21,10 @@ def take_webdata(url):
|
|
| 20 |
screenshot = wd.get_screenshot_as_png()
|
| 21 |
html = wd.execute_script("return document.documentElement.outerHTML;")
|
| 22 |
soup = BeautifulSoup(html, "html.parser")
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
except WebDriverException as e:
|
| 26 |
return page_title
|
|
@@ -28,4 +32,4 @@ def take_webdata(url):
|
|
| 28 |
if wd:
|
| 29 |
wd.quit()
|
| 30 |
|
| 31 |
-
return html ,
|
|
|
|
| 4 |
from io import BytesIO
|
| 5 |
from bs4 import BeautifulSoup
|
| 6 |
|
| 7 |
+
|
| 8 |
def take_webdata(url):
|
| 9 |
options = webdriver.ChromeOptions()
|
| 10 |
options.add_argument('--headless')
|
|
|
|
| 21 |
screenshot = wd.get_screenshot_as_png()
|
| 22 |
html = wd.execute_script("return document.documentElement.outerHTML;")
|
| 23 |
soup = BeautifulSoup(html, "html.parser")
|
| 24 |
+
div_find = soup.find("div", id="tournament-table", class_="tournament-table-standings")
|
| 25 |
+
table_find = div_find.find("table") if div_find else None
|
| 26 |
+
#tournament_div = soup.find("div", id="tournament-table", class_="tournament-table-standings")
|
| 27 |
+
|
| 28 |
|
| 29 |
except WebDriverException as e:
|
| 30 |
return page_title
|
|
|
|
| 32 |
if wd:
|
| 33 |
wd.quit()
|
| 34 |
|
| 35 |
+
return html , table_find
|