selenium_exam / extract.py
sintamar's picture
Update extract.py
fd9b833 verified
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from PIL import Image
from io import BytesIO
from bs4 import BeautifulSoup
def take_webdata(url):
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
try:
wd = webdriver.Chrome(options=options)
wd.set_window_size(1080, 720) # Adjust the window size here
wd.get(url)
wd.implicitly_wait(5)
# Get the page title
page_title = wd.title
screenshot = wd.get_screenshot_as_png()
html = wd.execute_script("return document.documentElement.outerHTML;")
soup = BeautifulSoup(html, "html.parser")
#div_find = soup.find("div", id="tournament-table", class_="tournament-table-standings")
#table_find = div_find.find("table") if div_find else None
rows = soup.find("div", class_="ui-table__row ")
data = []
for row in rows:
rank = row.select_one(".tableCellRank")
team = row.select_one(".tableCellParticipant__name")
mp = row.select_one("span.table__cell:nth-of-type(3)")
w = row.select_one("span.table__cell:nth-of-type(4)")
d = row.select_one("span.table__cell:nth-of-type(5)")
l = row.select_one("span.table__cell:nth-of-type(6)")
g = row.select_one(".table__cell--score")
sg = row.select_one(".table__cell--goalsForAgainstDiff")
pts = row.select_one(".table__cell--points")
data.append({
"Peringkat": rank.text.strip() if rank else "",
"Tim": team.text.strip() if team else "",
"Main": mp.text.strip() if mp else "",
"Menang": w.text.strip() if w else "",
"Seri": d.text.strip() if d else "",
"Kalah": l.text.strip() if l else "",
"Gol": g.text.strip() if g else "",
"Selisih Gol": sg.text.strip() if sg else "",
"Poin": pts.text.strip() if pts else ""
})
# === 5. Buat DataFrame ===
df = pd.DataFrame(data)
except WebDriverException as e:
return page_title
finally:
if wd:
wd.quit()
return html , df