selenium_exam

Sleeping

App Files Files Community

sintamar commited on Oct 17, 2025

Commit

7c0ed84

verified ·

1 Parent(s): fd9b833

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -7

app.py CHANGED Viewed

@@ -1,8 +1,12 @@
 import streamlit as st
-from extract import take_webdata
 from PIL import Image
 from io import BytesIO
 import pandas as pd
 def main():
     st.title("Website Content Exctractor")
@@ -29,20 +33,68 @@ def visualize(url):
                 # Konversi tabel HTML ke DataFrame Pandas
                 #df = pd.read_html(str(parse_element))[0]
                 #st.dataframe(df)  # tampilkan tabel dengan format rapi Streamlit
-                #st.components.v1.html(str(parse_element), height=600, scrolling=True)
                 # === 6. Tampilkan di Streamlit ===
                 st.dataframe(xdataframe)
             else:
-                st.warning("Tabel tidak ditemukan.")
     except Exception as e:
         st.error(f"Error: {e}")
 if __name__ == "__main__":
     main()

 import streamlit as st
 from PIL import Image
 from io import BytesIO
 import pandas as pd
+from selenium import webdriver
+from selenium.common.exceptions import WebDriverException
+from PIL import Image
+from io import BytesIO
+from bs4 import BeautifulSoup
 def main():
     st.title("Website Content Exctractor")
                 # Konversi tabel HTML ke DataFrame Pandas
                 #df = pd.read_html(str(parse_element))[0]
                 #st.dataframe(df)  # tampilkan tabel dengan format rapi Streamlit
+                #st.components.v1.html(str(parse_element), height=600, scrolling=True)
                 # === 6. Tampilkan di Streamlit ===
                 st.dataframe(xdataframe)
             else:
+                st.warning("Tabel tidak ditemukan.")
     except Exception as e:
         st.error(f"Error: {e}")
+def take_webdata(url):
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--no-sandbox')
+    options.add_argument('--disable-dev-shm-usage')
+    try:
+        wd = webdriver.Chrome(options=options)
+        wd.set_window_size(1080, 720)  # Adjust the window size here
+        wd.get(url)
+        wd.implicitly_wait(5)
+        # Get the page title
+        page_title = wd.title
+        #screenshot = wd.get_screenshot_as_png()
+        html = wd.execute_script("return document.documentElement.outerHTML;")
+        soup = BeautifulSoup(html, "html.parser")
+        div_find = soup.find("div", id="tournament-table", class_="tournament-table-standings")
+        #table_find = div_find.find("table") if div_find else None
+        rows = soup.find("div", class_="ui-table__row")
+        data = []
+        for row in rows:
+            rank = row.select_one(".tableCellRank")
+            team = row.select_one(".tableCellParticipant__name")
+            mp = row.select_one("span.table__cell:nth-of-type(3)")
+            w = row.select_one("span.table__cell:nth-of-type(4)")
+            d = row.select_one("span.table__cell:nth-of-type(5)")
+            l = row.select_one("span.table__cell:nth-of-type(6)")
+            g = row.select_one(".table__cell--score")
+            sg = row.select_one(".table__cell--goalsForAgainstDiff")
+            pts = row.select_one(".table__cell--points")
+            data.append({
+                "Peringkat": rank.text.strip() if rank else "",
+                "Tim": team.text.strip() if team else "",
+                "Main": mp.text.strip() if mp else "",
+                "Menang": w.text.strip() if w else "",
+                "Seri": d.text.strip() if d else "",
+                "Kalah": l.text.strip() if l else "",
+                "Gol": g.text.strip() if g else "",
+                "Selisih Gol": sg.text.strip() if sg else "",
+                "Poin": pts.text.strip() if pts else ""
+            })
+        # === 5. Buat DataFrame ===
+        df = pd.DataFrame(data)
+    except WebDriverException as e:
+        return page_title
+    finally:
+        if wd:
+            wd.quit()
+    return html , df
 if __name__ == "__main__":
     main()