selenium_exam / app.py
sintamar's picture
Update app.py
cb9f3f3 verified
import streamlit as st
from io import BytesIO
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import WebDriverException
from io import BytesIO
from bs4 import BeautifulSoup
def main():
st.title("Website Content Exctractor")
# Get website URL from user input
url = st.text_input("Enter a URL:", "")
if st.button("Proceed"):
if not url:
st.warning("URL is empty.")
else:
visualize(url)
def visualize(url):
try:
# Fetch and display the website content
with st.spinner("loading website data ..."):
# innerHTML = get_innerHTML(url)
html_content, xtarget_dropdown, xurl = take_webdata(url)
#st.subheader("Website title:")
if xtarget_dropdown:
st.code(xtarget_dropdown, language='html')
if xurl:
st.code(xurl, language='html')
else:
st.warning("tidak ditemukan.")
else:
st.warning("tidak ditemukan.")
except Exception as e:
st.error(f"Error: {e}")
def take_webdata(url):
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
try:
wd = webdriver.Chrome(options=options)
wd.set_window_size(1080, 720) # Adjust the window size here
wd.get(url)
wd.implicitly_wait(15)
# Get the page title
page_title = wd.title
#screenshot = wd.get_screenshot_as_png()
#WebDriverWait(wd, 20).until(EC.presence_of_element_located((By.ID, "tournament-table")))
html = wd.execute_script("return document.documentElement.outerHTML;")
soup = BeautifulSoup(html, "html.parser")
target_dropdown = soup.find('div', class_='dropdown-menu', attrs={'aria-labelledby': 'navbar-match'})
if target_dropdown:
klasemenlink = target_dropdown.find('a', class_='dropdown-item',string='KLASEMEN')
if klasemenlink:
urlx = klasemenlink.get('href')
#for link in links:
#urlx = link.get('href')
#text = link.get_text(strip=True)
#print(f"Text: {text}")
#print(f"URL: {url}")
#print("---")
else:
print("Dropdown menu tidak ditemukan")
#div_find = soup.find("div", id="tournament-table", class_="tournament-table-standings")
#rows = div_find.select("div[class*=ui-table__row]")
#rows = div_find.find("ui-table__row ") if div_find else None
#rows = soup.find("div", class_="ui-table__row ")
except WebDriverException as e:
return page_title
finally:
if wd:
wd.quit()
return html ,target_dropdown, urlx
if __name__ == "__main__":
main()