Spaces:
Sleeping
Sleeping
| from selenium import webdriver | |
| from selenium.common.exceptions import WebDriverException | |
| from PIL import Image | |
| from io import BytesIO | |
| from bs4 import BeautifulSoup | |
| def take_webdata(url): | |
| options = webdriver.ChromeOptions() | |
| options.add_argument('--headless') | |
| options.add_argument('--no-sandbox') | |
| options.add_argument('--disable-dev-shm-usage') | |
| try: | |
| wd = webdriver.Chrome(options=options) | |
| wd.set_window_size(1080, 720) # Adjust the window size here | |
| wd.get(url) | |
| wd.implicitly_wait(5) | |
| # Get the page title | |
| page_title = wd.title | |
| screenshot = wd.get_screenshot_as_png() | |
| html = wd.execute_script("return document.documentElement.outerHTML;") | |
| soup = BeautifulSoup(html, "html.parser") | |
| #div_find = soup.find("div", id="tournament-table", class_="tournament-table-standings") | |
| #table_find = div_find.find("table") if div_find else None | |
| rows = soup.find("div", class_="ui-table__row ") | |
| data = [] | |
| for row in rows: | |
| rank = row.select_one(".tableCellRank") | |
| team = row.select_one(".tableCellParticipant__name") | |
| mp = row.select_one("span.table__cell:nth-of-type(3)") | |
| w = row.select_one("span.table__cell:nth-of-type(4)") | |
| d = row.select_one("span.table__cell:nth-of-type(5)") | |
| l = row.select_one("span.table__cell:nth-of-type(6)") | |
| g = row.select_one(".table__cell--score") | |
| sg = row.select_one(".table__cell--goalsForAgainstDiff") | |
| pts = row.select_one(".table__cell--points") | |
| data.append({ | |
| "Peringkat": rank.text.strip() if rank else "", | |
| "Tim": team.text.strip() if team else "", | |
| "Main": mp.text.strip() if mp else "", | |
| "Menang": w.text.strip() if w else "", | |
| "Seri": d.text.strip() if d else "", | |
| "Kalah": l.text.strip() if l else "", | |
| "Gol": g.text.strip() if g else "", | |
| "Selisih Gol": sg.text.strip() if sg else "", | |
| "Poin": pts.text.strip() if pts else "" | |
| }) | |
| # === 5. Buat DataFrame === | |
| df = pd.DataFrame(data) | |
| except WebDriverException as e: | |
| return page_title | |
| finally: | |
| if wd: | |
| wd.quit() | |
| return html , df |