import streamlit as st from io import BytesIO import pandas as pd from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.common.exceptions import WebDriverException from io import BytesIO from bs4 import BeautifulSoup def main(): st.title("Website Content Exctractor") # Get website URL from user input url = st.text_input("Enter a URL:", "") if st.button("Proceed"): if not url: st.warning("URL is empty.") else: visualize(url) def visualize(url): try: # Fetch and display the website content with st.spinner("loading website data ..."): # innerHTML = get_innerHTML(url) html_content, xtarget_dropdown, xurl = take_webdata(url) #st.subheader("Website title:") if xtarget_dropdown: st.code(xtarget_dropdown, language='html') if xurl: st.code(xurl, language='html') else: st.warning("tidak ditemukan.") else: st.warning("tidak ditemukan.") except Exception as e: st.error(f"Error: {e}") def take_webdata(url): options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') try: wd = webdriver.Chrome(options=options) wd.set_window_size(1080, 720) # Adjust the window size here wd.get(url) wd.implicitly_wait(15) # Get the page title page_title = wd.title #screenshot = wd.get_screenshot_as_png() #WebDriverWait(wd, 20).until(EC.presence_of_element_located((By.ID, "tournament-table"))) html = wd.execute_script("return document.documentElement.outerHTML;") soup = BeautifulSoup(html, "html.parser") target_dropdown = soup.find('div', class_='dropdown-menu', attrs={'aria-labelledby': 'navbar-match'}) if target_dropdown: klasemenlink = target_dropdown.find('a', class_='dropdown-item',string='KLASEMEN') if klasemenlink: urlx = klasemenlink.get('href') #for link in links: #urlx = link.get('href') #text = link.get_text(strip=True) #print(f"Text: {text}") #print(f"URL: {url}") #print("---") else: print("Dropdown menu tidak ditemukan") #div_find = soup.find("div", id="tournament-table", class_="tournament-table-standings") #rows = div_find.select("div[class*=ui-table__row]") #rows = div_find.find("ui-table__row ") if div_find else None #rows = soup.find("div", class_="ui-table__row ") except WebDriverException as e: return page_title finally: if wd: wd.quit() return html ,target_dropdown, urlx if __name__ == "__main__": main()