selenium_exam / app.py
sintamar's picture
Update app.py
bfcf1e8 verified
raw
history blame
3.07 kB
import streamlit as st
from io import BytesIO
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import WebDriverException
from io import BytesIO
from bs4 import BeautifulSoup
def main():
st.title("Website Content Exctractor")
# Get website URL from user input
url = st.text_input("Enter a URL:", "")
if st.button("Proceed"):
if not url:
st.warning("URL is empty.")
else:
visualize(url)
def visualize(url):
try:
# Fetch and display the website content
with st.spinner("loading website data ..."):
# innerHTML = get_innerHTML(url)
html_content, xtarget_dropdown, xurl = take_webdata(url)
#st.subheader("Website title:")
if xtarget_dropdown:
st.code(xtarget_dropdown, language='html')
if xurl:
st.code(xurl, language='html')
else:
st.warning("tidak ditemukan.")
else:
st.warning("tidak ditemukan.")
except Exception as e:
st.error(f"Error: {e}")
def take_webdata(url):
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
try:
wd = webdriver.Chrome(options=options)
wd.set_window_size(1080, 720) # Adjust the window size here
wd.get(url)
wd.implicitly_wait(15)
# Get the page title
page_title = wd.title
#screenshot = wd.get_screenshot_as_png()
#WebDriverWait(wd, 20).until(EC.presence_of_element_located((By.ID, "tournament-table")))
html = wd.execute_script("return document.documentElement.outerHTML;")
soup = BeautifulSoup(html, "html.parser")
target_dropdown = soup.find('div', class_='dropdown-menu', attrs={'aria-labelledby': 'navbar-match'})
if target_dropdown:
links = target_dropdown.find_all('a', class_='dropdown-item')
for link in links:
urlx = link.get('href')
text = link.get_text(strip=True)
#print(f"Text: {text}")
#print(f"URL: {url}")
#print("---")
else:
print("Dropdown menu tidak ditemukan")
#div_find = soup.find("div", id="tournament-table", class_="tournament-table-standings")
#rows = div_find.select("div[class*=ui-table__row]")
#rows = div_find.find("ui-table__row ") if div_find else None
#rows = soup.find("div", class_="ui-table__row ")
except WebDriverException as e:
return page_title
finally:
if wd:
wd.quit()
return html ,target_dropdown, links
if __name__ == "__main__":
main()