Spaces:
Sleeping
Sleeping
File size: 2,376 Bytes
b6e91ad 1bbce87 c34f3d2 b6e91ad 9765196 984bb90 b6e91ad 8c1f8f7 984bb90 b6e91ad 9910ac3 c34f3d2 d09dd0b fd9b833 fb70667 9765196 fb70667 6b74d80 fb70667 b6e91ad 6b74d80 b6e91ad fb70667 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from PIL import Image
from io import BytesIO
from bs4 import BeautifulSoup
def take_webdata(url):
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
try:
wd = webdriver.Chrome(options=options)
wd.set_window_size(1080, 720) # Adjust the window size here
wd.get(url)
wd.implicitly_wait(5)
# Get the page title
page_title = wd.title
screenshot = wd.get_screenshot_as_png()
html = wd.execute_script("return document.documentElement.outerHTML;")
soup = BeautifulSoup(html, "html.parser")
#div_find = soup.find("div", id="tournament-table", class_="tournament-table-standings")
#table_find = div_find.find("table") if div_find else None
rows = soup.find("div", class_="ui-table__row ")
data = []
for row in rows:
rank = row.select_one(".tableCellRank")
team = row.select_one(".tableCellParticipant__name")
mp = row.select_one("span.table__cell:nth-of-type(3)")
w = row.select_one("span.table__cell:nth-of-type(4)")
d = row.select_one("span.table__cell:nth-of-type(5)")
l = row.select_one("span.table__cell:nth-of-type(6)")
g = row.select_one(".table__cell--score")
sg = row.select_one(".table__cell--goalsForAgainstDiff")
pts = row.select_one(".table__cell--points")
data.append({
"Peringkat": rank.text.strip() if rank else "",
"Tim": team.text.strip() if team else "",
"Main": mp.text.strip() if mp else "",
"Menang": w.text.strip() if w else "",
"Seri": d.text.strip() if d else "",
"Kalah": l.text.strip() if l else "",
"Gol": g.text.strip() if g else "",
"Selisih Gol": sg.text.strip() if sg else "",
"Poin": pts.text.strip() if pts else ""
})
# === 5. Buat DataFrame ===
df = pd.DataFrame(data)
except WebDriverException as e:
return page_title
finally:
if wd:
wd.quit()
return html , df |