sintamar commited on
Commit
7c0ed84
·
verified ·
1 Parent(s): fd9b833

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -7
app.py CHANGED
@@ -1,8 +1,12 @@
1
  import streamlit as st
2
- from extract import take_webdata
3
  from PIL import Image
4
  from io import BytesIO
5
  import pandas as pd
 
 
 
 
 
6
 
7
  def main():
8
  st.title("Website Content Exctractor")
@@ -29,20 +33,68 @@ def visualize(url):
29
  # Konversi tabel HTML ke DataFrame Pandas
30
  #df = pd.read_html(str(parse_element))[0]
31
  #st.dataframe(df) # tampilkan tabel dengan format rapi Streamlit
32
- #st.components.v1.html(str(parse_element), height=600, scrolling=True)
33
-
34
-
35
  # === 6. Tampilkan di Streamlit ===
36
  st.dataframe(xdataframe)
37
  else:
38
- st.warning("Tabel tidak ditemukan.")
39
-
40
-
41
 
42
  except Exception as e:
43
  st.error(f"Error: {e}")
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
 
46
 
47
  if __name__ == "__main__":
48
  main()
 
1
  import streamlit as st
 
2
  from PIL import Image
3
  from io import BytesIO
4
  import pandas as pd
5
+ from selenium import webdriver
6
+ from selenium.common.exceptions import WebDriverException
7
+ from PIL import Image
8
+ from io import BytesIO
9
+ from bs4 import BeautifulSoup
10
 
11
  def main():
12
  st.title("Website Content Exctractor")
 
33
  # Konversi tabel HTML ke DataFrame Pandas
34
  #df = pd.read_html(str(parse_element))[0]
35
  #st.dataframe(df) # tampilkan tabel dengan format rapi Streamlit
36
+ #st.components.v1.html(str(parse_element), height=600, scrolling=True)
 
 
37
  # === 6. Tampilkan di Streamlit ===
38
  st.dataframe(xdataframe)
39
  else:
40
+ st.warning("Tabel tidak ditemukan.")
 
 
41
 
42
  except Exception as e:
43
  st.error(f"Error: {e}")
44
 
45
+ def take_webdata(url):
46
+ options = webdriver.ChromeOptions()
47
+ options.add_argument('--headless')
48
+ options.add_argument('--no-sandbox')
49
+ options.add_argument('--disable-dev-shm-usage')
50
+
51
+ try:
52
+ wd = webdriver.Chrome(options=options)
53
+ wd.set_window_size(1080, 720) # Adjust the window size here
54
+ wd.get(url)
55
+ wd.implicitly_wait(5)
56
+ # Get the page title
57
+ page_title = wd.title
58
+ #screenshot = wd.get_screenshot_as_png()
59
+ html = wd.execute_script("return document.documentElement.outerHTML;")
60
+ soup = BeautifulSoup(html, "html.parser")
61
+ div_find = soup.find("div", id="tournament-table", class_="tournament-table-standings")
62
+ #table_find = div_find.find("table") if div_find else None
63
+ rows = soup.find("div", class_="ui-table__row")
64
+ data = []
65
+ for row in rows:
66
+ rank = row.select_one(".tableCellRank")
67
+ team = row.select_one(".tableCellParticipant__name")
68
+ mp = row.select_one("span.table__cell:nth-of-type(3)")
69
+ w = row.select_one("span.table__cell:nth-of-type(4)")
70
+ d = row.select_one("span.table__cell:nth-of-type(5)")
71
+ l = row.select_one("span.table__cell:nth-of-type(6)")
72
+ g = row.select_one(".table__cell--score")
73
+ sg = row.select_one(".table__cell--goalsForAgainstDiff")
74
+ pts = row.select_one(".table__cell--points")
75
+
76
+ data.append({
77
+ "Peringkat": rank.text.strip() if rank else "",
78
+ "Tim": team.text.strip() if team else "",
79
+ "Main": mp.text.strip() if mp else "",
80
+ "Menang": w.text.strip() if w else "",
81
+ "Seri": d.text.strip() if d else "",
82
+ "Kalah": l.text.strip() if l else "",
83
+ "Gol": g.text.strip() if g else "",
84
+ "Selisih Gol": sg.text.strip() if sg else "",
85
+ "Poin": pts.text.strip() if pts else ""
86
+ })
87
+
88
+ # === 5. Buat DataFrame ===
89
+ df = pd.DataFrame(data)
90
+
91
+ except WebDriverException as e:
92
+ return page_title
93
+ finally:
94
+ if wd:
95
+ wd.quit()
96
 
97
+ return html , df
98
 
99
  if __name__ == "__main__":
100
  main()