Decoder24 commited on
Commit
7d7d9fa
Β·
verified Β·
1 Parent(s): c7194d2

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. .history/README_20251005095904.md +0 -0
  3. .history/README_20251005100625.md +0 -0
  4. .history/README_20251005103318.md +0 -0
  5. .history/README_20251005103328.md +0 -0
  6. .history/README_20251005103511.md +0 -0
  7. .history/README_20251005103517.md +0 -0
  8. .history/README_20251007193812.md +0 -0
  9. .history/README_20251007193817.md +0 -0
  10. .history/README_20251007193828.md +0 -0
  11. .history/README_20251007193832.md +0 -0
  12. .history/fbrefdata_example_20251004173710.py +49 -0
  13. .history/fbrefdata_example_20251004180332.py +0 -0
  14. .history/fbrefdata_example_20251004180335.py +43 -0
  15. .history/fbrefdata_example_20251004180434.py +60 -0
  16. .history/fbrefdata_example_20251004180520.py +74 -0
  17. .history/fbrefdata_example_20251004180621.py +67 -0
  18. .history/fbrefdata_example_20251004184139.py +72 -0
  19. .history/fbrefdata_example_20251004185739.py +65 -0
  20. .history/fbrefdata_example_20251004185920.py +68 -0
  21. .history/fbrefdata_example_20251004190022.py +0 -0
  22. .history/fbrefdata_example_20251004190027.py +69 -0
  23. .history/fbrefdata_example_20251004190339.py +82 -0
  24. .history/fbrefdata_example_20251004190507.py +85 -0
  25. .history/fbrefdata_example_20251004190633.py +90 -0
  26. .history/fbrefdata_example_20251004190944.py +91 -0
  27. .history/fbrefdata_example_20251004191947.py +107 -0
  28. .history/fbrefdata_example_20251005091604.py +104 -0
  29. .history/fbrefdata_example_20251005091825.py +104 -0
  30. .history/fbrefdata_example_20251005091830.py +104 -0
  31. .history/fbrefdata_example_20251005091835.py +104 -0
  32. .history/fbrefdata_example_20251005091839.py +104 -0
  33. .history/fbrefdata_example_20251005091854.py +104 -0
  34. .history/fbrefdata_example_20251005091857.py +104 -0
  35. .history/fbrefdata_example_20251005091858.py +104 -0
  36. .history/fbrefdata_example_20251005092140.py +106 -0
  37. .history/fbrefdata_example_20251005092144.py +105 -0
  38. .history/fbrefdata_example_20251005092150.py +105 -0
  39. .history/fbrefdata_example_20251005092800.py +106 -0
  40. .history/fbrefdata_example_20251005092803.py +105 -0
  41. .history/fbrefdata_example_20251005092809.py +105 -0
  42. .history/fbrefdata_example_20251005092817.py +106 -0
  43. .history/fbrefdata_example_20251005092820.py +106 -0
  44. .history/fbrefdata_example_20251005092822.py +105 -0
  45. .history/fbrefdata_example_20251005092904.py +131 -0
  46. .history/fbrefdata_example_20251005093119.py +61 -0
  47. .history/fbrefdata_example_20251005093129.py +61 -0
  48. .history/fbrefdata_example_20251005093230.py +131 -0
  49. .history/historical_data_20251005104339.py +0 -0
  50. .history/historical_data_20251005104343.py +140 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ visual/debug_screenshot.png filter=lfs diff=lfs merge=lfs -text
37
+ visual/debug_team_stats.png filter=lfs diff=lfs merge=lfs -text
38
+ visual/top10_passing_accuracy.png filter=lfs diff=lfs merge=lfs -text
.history/README_20251005095904.md ADDED
Binary file (60 Bytes). View file
 
.history/README_20251005100625.md ADDED
Binary file (9.64 kB). View file
 
.history/README_20251005103318.md ADDED
Binary file (9.65 kB). View file
 
.history/README_20251005103328.md ADDED
Binary file (9.57 kB). View file
 
.history/README_20251005103511.md ADDED
Binary file (5.72 kB). View file
 
.history/README_20251005103517.md ADDED
Binary file (9.57 kB). View file
 
.history/README_20251007193812.md ADDED
Binary file (9.75 kB). View file
 
.history/README_20251007193817.md ADDED
Binary file (9.74 kB). View file
 
.history/README_20251007193828.md ADDED
Binary file (9.73 kB). View file
 
.history/README_20251007193832.md ADDED
Binary file (9.7 kB). View file
 
.history/fbrefdata_example_20251004173710.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from io import StringIO
4
+
5
+ def pull_premier_league_team_passing():
6
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
7
+ print(f"Downloading team passing stats from {url} ...")
8
+
9
+ # Add a User-Agent header to mimic a browser
10
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
11
+ response = requests.get(url, headers=headers)
12
+ response.raise_for_status()
13
+
14
+ df = pd.read_html(StringIO(response.text))[0]
15
+
16
+ # Flatten columns
17
+ df.columns = ["_".join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
18
+
19
+ # Rename the weird columns
20
+ df = df.rename(columns={
21
+ "Unnamed: 0_level_0_Squad": "Squad",
22
+ "Unnamed: 1_level_0_# Pl": "Players",
23
+ "Unnamed: 2_level_0_90s": "90s",
24
+ "Unnamed: 17_level_0_Ast": "Ast",
25
+ "Unnamed: 18_level_0_xAG": "xAG",
26
+ "Unnamed: 21_level_0_KP": "KP",
27
+ "Unnamed: 22_level_0_1/3": "1/3",
28
+ "Unnamed: 23_level_0_PPA": "PPA",
29
+ "Unnamed: 24_level_0_CrsPA": "CrsPA",
30
+ "Unnamed: 25_level_0_PrgP": "PrgP"
31
+ })
32
+
33
+ return df
34
+
35
+ def filter_teams(df, teams):
36
+ return df[df["Squad"].isin(teams)]
37
+
38
+ def main():
39
+ df = pull_premier_league_team_passing()
40
+
41
+ teams = ["Arsenal", "Nott'ham Forest"]
42
+ df_filtered = filter_teams(df, teams)
43
+
44
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
45
+ print("=" * 70)
46
+ print(df_filtered[["Squad", "Total_Cmp", "Total_Att", "Total_Cmp%", "Total_TotDist"]])
47
+
48
+ if __name__ == "__main__":
49
+ main()
.history/fbrefdata_example_20251004180332.py ADDED
File without changes
.history/fbrefdata_example_20251004180335.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from io import StringIO
4
+
5
+ def pull_premier_league_team_passing():
6
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
7
+ print(f"Downloading team passing stats from {url} ...")
8
+
9
+ # Use a more comprehensive set of headers to mimic a real browser
10
+ headers = {
11
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
12
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
13
+ 'Accept-Language': 'en-US,en;q=0.5',
14
+ 'Accept-Encoding': 'gzip, deflate, br',
15
+ 'Connection': 'keep-alive',
16
+ 'Upgrade-Insecure-Requests': '1',
17
+ 'DNT': '1' # Do Not Track request header
18
+ }
19
+
20
+ response = requests.get(url, headers=headers)
21
+ response.raise_for_status()
22
+
23
+ # The rest of your function remains the same
24
+ df = pd.read_html(StringIO(response.text))[0]
25
+
26
+ # Flatten columns
27
+ df.columns = ["_".join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
28
+
29
+ # Rename the weird columns
30
+ df = df.rename(columns={
31
+ "Unnamed: 0_level_0_Squad": "Squad",
32
+ "Unnamed: 1_level_0_# Pl": "Players",
33
+ "Unnamed: 2_level_0_90s": "90s",
34
+ "Unnamed: 17_level_0_Ast": "Ast",
35
+ "Unnamed: 18_level_0_xAG": "xAG",
36
+ "Unnamed: 21_level_0_KP": "KP",
37
+ "Unnamed: 22_level_0_1/3": "1/3",
38
+ "Unnamed: 23_level_0_PPA": "PPA",
39
+ "Unnamed: 24_level_0_CrsPA": "CrsPA",
40
+ "Unnamed: 25_level_0_PrgP": "PrgP"
41
+ })
42
+
43
+ return df
.history/fbrefdata_example_20251004180434.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from io import StringIO
4
+
5
+ def pull_premier_league_team_passing():
6
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
7
+ print(f"Downloading team passing stats from {url} ...")
8
+
9
+ # Gunakan headers yang lebih lengkap untuk meniru browser asli
10
+ headers = {
11
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
12
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
13
+ 'Accept-Language': 'en-US,en;q=0.5',
14
+ 'Accept-Encoding': 'gzip, deflate, br',
15
+ 'Connection': 'keep-alive',
16
+ 'Upgrade-Insecure-Requests': '1',
17
+ 'DNT': '1'
18
+ }
19
+
20
+ response = requests.get(url, headers=headers)
21
+ response.raise_for_status()
22
+
23
+ df = pd.read_html(StringIO(response.text))[0]
24
+
25
+ # Meratakan kolom
26
+ df.columns = ["_".join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
27
+
28
+ # Mengganti nama kolom yang aneh
29
+ df = df.rename(columns={
30
+ "Unnamed: 0_level_0_Squad": "Squad",
31
+ "Unnamed: 1_level_0_# Pl": "Players",
32
+ "Unnamed: 2_level_0_90s": "90s",
33
+ "Unnamed: 17_level_0_Ast": "Ast",
34
+ "Unnamed: 18_level_0_xAG": "xAG",
35
+ "Unnamed: 21_level_0_KP": "KP",
36
+ "Unnamed: 22_level_0_1/3": "1/3",
37
+ "Unnamed: 23_level_0_PPA": "PPA",
38
+ "Unnamed: 24_level_0_CrsPA": "CrsPA",
39
+ "Unnamed: 25_level_0_PrgP": "PrgP"
40
+ })
41
+
42
+ return df
43
+
44
+ def filter_teams(df, teams):
45
+ return df[df["Squad"].isin(teams)]
46
+
47
+ def main():
48
+ df = pull_premier_league_team_passing()
49
+
50
+ teams = ["Arsenal", "Nott'ham Forest"]
51
+ df_filtered = filter_teams(df, teams)
52
+
53
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
54
+ print("=" * 70)
55
+ # Menampilkan kolom yang relevan dari DataFrame yang sudah difilter
56
+ print(df_filtered[["Squad", "Total_Cmp", "Total_Att", "Total_Cmp%", "Total_TotDist"]])
57
+
58
+ # Bagian ini PENTING untuk menjalankan fungsi main()
59
+ if __name__ == "__main__":
60
+ main()
.history/fbrefdata_example_20251004180520.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from io import StringIO
4
+ import random
5
+ import time
6
+
7
+ def pull_premier_league_team_passing():
8
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
9
+ print(f"Downloading team passing stats from {url} ...")
10
+
11
+ # List of User-Agent strings
12
+ user_agents = [
13
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
14
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0',
15
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
16
+ ]
17
+
18
+ # Randomly select a User-Agent
19
+ headers = {'User-Agent': random.choice(user_agents)}
20
+
21
+ try:
22
+ response = requests.get(url, headers=headers)
23
+ response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
24
+ except requests.exceptions.HTTPError as e:
25
+ print(f"HTTP error occurred: {e}")
26
+ return None
27
+ except requests.exceptions.RequestException as e:
28
+ print(f"An error occurred: {e}")
29
+ return None
30
+
31
+ df = pd.read_html(StringIO(response.text))[0]
32
+
33
+ # Meratakan kolom
34
+ df.columns = ["_".join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
35
+
36
+ # Mengganti nama kolom yang aneh
37
+ df = df.rename(columns={
38
+ "Unnamed: 0_level_0_Squad": "Squad",
39
+ "Unnamed: 1_level_0_# Pl": "Players",
40
+ "Unnamed: 2_level_0_90s": "90s",
41
+ "Unnamed: 17_level_0_Ast": "Ast",
42
+ "Unnamed: 18_level_0_xAG": "xAG",
43
+ "Unnamed: 21_level_0_KP": "KP",
44
+ "Unnamed: 22_level_0_1/3": "1/3",
45
+ "Unnamed: 23_level_0_PPA": "PPA",
46
+ "Unnamed: 24_level_0_CrsPA": "CrsPA",
47
+ "Unnamed: 25_level_0_PrgP": "PrgP"
48
+ })
49
+
50
+ # Delay before returning (adjust as needed)
51
+ time.sleep(random.uniform(1, 3)) # Delay between 1 and 3 seconds
52
+
53
+ return df
54
+
55
+ def filter_teams(df, teams):
56
+ return df[df["Squad"].isin(teams)]
57
+
58
+ def main():
59
+ df = pull_premier_league_team_passing()
60
+
61
+ if df is not None:
62
+ teams = ["Arsenal", "Nott'ham Forest"]
63
+ df_filtered = filter_teams(df, teams)
64
+
65
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
66
+ print("=" * 70)
67
+ # Menampilkan kolom yang relevan dari DataFrame yang sudah difilter
68
+ print(df_filtered[["Squad", "Total_Cmp", "Total_Att", "Total_Cmp%", "Total_TotDist"]])
69
+ else:
70
+ print("Failed to retrieve data.")
71
+
72
+ # Bagian ini PENTING untuk menjalankan fungsi main()
73
+ if __name__ == "__main__":
74
+ main()
.history/fbrefdata_example_20251004180621.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from webdriver_manager.chrome import ChromeDriverManager
7
+
8
+ def pull_premier_league_team_passing():
9
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
10
+ print(f"Opening browser to download team passing stats from {url} ...")
11
+
12
+ # Inisialisasi driver Chrome secara otomatis
13
+ # Browser akan terbuka, mengambil data, lalu menutup sendiri
14
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
15
+
16
+ # Buka URL
17
+ driver.get(url)
18
+
19
+ # Beri waktu 3 detik agar halaman dan semua elemennya (termasuk tabel)
20
+ # termuat dengan sempurna
21
+ time.sleep(3)
22
+
23
+ # Ambil sumber HTML dari halaman yang sudah dimuat oleh browser
24
+ html_source = driver.page_source
25
+
26
+ # Tutup browser setelah selesai
27
+ driver.quit()
28
+
29
+ print("Data downloaded. Processing with pandas...")
30
+
31
+ # Sekarang kita proses HTML yang didapat dengan pandas, sama seperti sebelumnya
32
+ df = pd.read_html(StringIO(html_source))[0]
33
+
34
+ # Meratakan kolom
35
+ df.columns = ["_".join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
36
+
37
+ # Mengganti nama kolom yang aneh
38
+ df = df.rename(columns={
39
+ "Unnamed: 0_level_0_Squad": "Squad",
40
+ "Unnamed: 1_level_0_# Pl": "Players",
41
+ "Unnamed: 2_level_0_90s": "90s",
42
+ "Unnamed: 17_level_0_Ast": "Ast",
43
+ "Unnamed: 18_level_0_xAG": "xAG",
44
+ "Unnamed: 21_level_0_KP": "KP",
45
+ "Unnamed: 22_level_0_1/3": "1/3",
46
+ "Unnamed: 23_level_0_PPA": "PPA",
47
+ "Unnamed: 24_level_0_CrsPA": "CrsPA",
48
+ "Unnamed: 25_level_0_PrgP": "PrgP"
49
+ })
50
+
51
+ return df
52
+
53
+ def filter_teams(df, teams):
54
+ return df[df["Squad"].isin(teams)]
55
+
56
+ def main():
57
+ df = pull_premier_league_team_passing()
58
+
59
+ teams = ["Arsenal", "Nott'ham Forest"]
60
+ df_filtered = filter_teams(df, teams)
61
+
62
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
63
+ print("=" * 70)
64
+ print(df_filtered[["Squad", "Total_Cmp", "Total_Att", "Total_Cmp%", "Total_TotDist"]])
65
+
66
+ if __name__ == "__main__":
67
+ main()
.history/fbrefdata_example_20251004184139.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+
9
+ def pull_premier_league_team_passing():
10
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
11
+ print(f"Opening browser to download team passing stats from {url} ...")
12
+
13
+ # === BAGIAN BARU: Menambahkan Opsi Chrome ===
14
+ options = ChromeOptions()
15
+ options.add_argument("--start-maximized") # Memastikan jendela browser terbuka maksimal
16
+ options.add_argument("--no-sandbox") # Opsi ini seringkali diperlukan saat menjalankan di lingkungan otomatis
17
+ options.add_argument("--disable-dev-shm-usage") # Mengatasi masalah sumber daya yang terbatas
18
+ options.add_experimental_option("excludeSwitches", ["enable-automation"]) # Menghilangkan notifikasi "Chrome is being controlled..."
19
+ options.add_experimental_option('useAutomationExtension', False)
20
+ # ============================================
21
+
22
+ # Inisialisasi driver Chrome dengan OPSI yang sudah kita buat
23
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
24
+
25
+ # Buka URL
26
+ driver.get(url)
27
+
28
+ # Beri waktu agar halaman termuat dengan sempurna
29
+ time.sleep(5) # Waktu tunggu sedikit diperpanjang menjadi 5 detik untuk amannya
30
+
31
+ # Ambil sumber HTML dari halaman
32
+ html_source = driver.page_source
33
+
34
+ # Tutup browser setelah selesai
35
+ driver.quit()
36
+
37
+ print("Data downloaded. Processing with pandas...")
38
+
39
+ df = pd.read_html(StringIO(html_source))[0]
40
+
41
+ df.columns = ["_".join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
42
+
43
+ df = df.rename(columns={
44
+ "Unnamed: 0_level_0_Squad": "Squad",
45
+ "Unnamed: 1_level_0_# Pl": "Players",
46
+ "Unnamed: 2_level_0_90s": "90s",
47
+ "Unnamed: 17_level_0_Ast": "Ast",
48
+ "Unnamed: 18_level_0_xAG": "xAG",
49
+ "Unnamed: 21_level_0_KP": "KP",
50
+ "Unnamed: 22_level_0_1/3": "1/3",
51
+ "Unnamed: 23_level_0_PPA": "PPA",
52
+ "Unnamed: 24_level_0_CrsPA": "CrsPA",
53
+ "Unnamed: 25_level_0_PrgP": "PrgP"
54
+ })
55
+
56
+ return df
57
+
58
+ def filter_teams(df, teams):
59
+ return df[df["Squad"].isin(teams)]
60
+
61
+ def main():
62
+ df = pull_premier_league_team_passing()
63
+
64
+ teams = ["Arsenal", "Nott'ham Forest"]
65
+ df_filtered = filter_teams(df, teams)
66
+
67
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
68
+ print("=" * 70)
69
+ print(df_filtered[["Squad", "Total_Cmp", "Total_Att", "Total_Cmp%", "Total_TotDist"]])
70
+
71
+ if __name__ == "__main__":
72
+ main()
.history/fbrefdata_example_20251004185739.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+
9
+ def pull_premier_league_team_passing():
10
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
11
+ print(f"Opening browser to download team passing stats from {url} ...")
12
+
13
+ options = ChromeOptions()
14
+ options.add_argument("--start-maximized")
15
+ options.add_argument("--no-sandbox")
16
+ options.add_argument("--disable-dev-shm-usage")
17
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
18
+ options.add_experimental_option('useAutomationExtension', False)
19
+
20
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
21
+
22
+ driver.get(url)
23
+ time.sleep(5)
24
+
25
+ html_source = driver.page_source
26
+ driver.quit()
27
+
28
+ print("Data downloaded. Processing with pandas...")
29
+
30
+ df = pd.read_html(StringIO(html_source))[0]
31
+ df.columns = ["_".join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
32
+
33
+ # !!!!!!!!!! INI BAGIAN PENTING UNTUK DEBUG !!!!!!!!!!
34
+ print("\nDEBUG: Column names are:")
35
+ print(df.columns)
36
+ print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
37
+ # !!!!!!!!!! AKHIR BAGIAN DEBUG !!!!!!!!!!
38
+
39
+ df = df.rename(columns={
40
+ "Unnamed: 0_level_0_Squad": "Squad",
41
+ "Unnamed: 1_level_0_# Pl": "Players",
42
+ "Unnamed: 2_level_0_90s": "90s",
43
+ "Unnamed: 17_level_0_Ast": "Ast",
44
+ "Unnamed: 18_level_0_xAG": "xAG",
45
+ "Unnamed: 21_level_0_KP": "KP",
46
+ "Unnamed: 22_level_0_1/3": "1/3",
47
+ "Unnamed: 23_level_0_PPA": "PPA",
48
+ "Unnamed: 24_level_0_CrsPA": "CrsPA",
49
+ "Unnamed: 25_level_0_PrgP": "PrgP"
50
+ })
51
+ return df
52
+
53
+ def filter_teams(df, teams):
54
+ return df[df["Squad"].isin(teams)]
55
+
56
+ def main():
57
+ df = pull_premier_league_team_passing()
58
+ teams = ["Arsenal", "Nott'ham Forest"]
59
+ df_filtered = filter_teams(df, teams)
60
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
61
+ print("=" * 70)
62
+ print(df_filtered[["Squad", "Total_Cmp", "Total_Att", "Total_Cmp%", "Total_TotDist"]])
63
+
64
+ if __name__ == "__main__":
65
+ main()
.history/fbrefdata_example_20251004185920.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+
9
+ def pull_premier_league_team_passing():
10
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
11
+ print(f"Opening browser to download team passing stats from {url} ...")
12
+
13
+ options = ChromeOptions()
14
+ options.add_argument("--start-maximized")
15
+ options.add_argument("--no-sandbox")
16
+ options.add_argument("--disable-dev-shm-usage")
17
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
18
+ options.add_experimental_option('useAutomationExtension', False)
19
+
20
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
21
+
22
+ driver.get(url)
23
+ time.sleep(5)
24
+
25
+ html_source = driver.page_source
26
+
27
+ print("Data downloaded. Processing with pandas...")
28
+
29
+ # Specify the header rows
30
+ df = pd.read_html(StringIO(html_source), header=[0, 1])[0]
31
+
32
+ # Flatten the multi-level header
33
+ df.columns = ["_".join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
34
+ driver.quit()
35
+
36
+ # !!!!!!!!!! INI BAGIAN PENTING UNTUK DEBUG !!!!!!!!!!
37
+ print("\nDEBUG: Column names are:")
38
+ print(df.columns)
39
+ print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
40
+ # !!!!!!!!!! AKHIR BAGIAN DEBUG !!!!!!!!!!
41
+
42
+ df = df.rename(columns={
43
+ "Unnamed: 0_level_0_Squad": "Squad",
44
+ "Unnamed: 1_level_0_# Pl": "Players",
45
+ "Unnamed: 2_level_0_90s": "90s",
46
+ "Unnamed: 17_level_0_Ast": "Ast",
47
+ "Unnamed: 18_level_0_xAG": "xAG",
48
+ "Unnamed: 21_level_0_KP": "KP",
49
+ "Unnamed: 22_level_0_1/3": "1/3",
50
+ "Unnamed: 23_level_0_PPA": "PPA",
51
+ "Unnamed: 24_level_0_CrsPA": "CrsPA",
52
+ "Unnamed: 25_level_0_PrgP": "PrgP"
53
+ })
54
+ return df
55
+
56
+ def filter_teams(df, teams):
57
+ return df[df["Squad"].isin(teams)]
58
+
59
+ def main():
60
+ df = pull_premier_league_team_passing()
61
+ teams = ["Arsenal", "Nott'ham Forest"]
62
+ df_filtered = filter_teams(df, teams)
63
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
64
+ print("=" * 70)
65
+ print(df_filtered[["Squad", "Total_Cmp", "Total_Att", "Total_Cmp%", "Total_TotDist"]])
66
+
67
+ if __name__ == "__main__":
68
+ main()
.history/fbrefdata_example_20251004190022.py ADDED
File without changes
.history/fbrefdata_example_20251004190027.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+
9
+ def pull_premier_league_team_passing():
10
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
11
+ print(f"Opening browser to download team passing stats from {url} ...")
12
+
13
+ options = ChromeOptions()
14
+ options.add_argument("--start-maximized")
15
+ options.add_argument("--no-sandbox")
16
+ options.add_argument("--disable-dev-shm-usage")
17
+ options.add_argument("--headless") # Menjalankan browser di background agar tidak muncul jendela
18
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
19
+ options.add_experimental_option('useAutomationExtension', False)
20
+
21
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
22
+
23
+ driver.get(url)
24
+ time.sleep(3) # Cukup 3 detik jika headless
25
+
26
+ html_source = driver.page_source
27
+ driver.quit()
28
+
29
+ print("Data downloaded. Processing with pandas...")
30
+
31
+ # Ambil tabel pertama dari HTML
32
+ df = pd.read_html(StringIO(html_source))[0]
33
+
34
+ # ==============================================================================
35
+ # === BAGIAN LAMA DIHAPUS DAN DIGANTI DENGAN YANG LEBIH SEDERHANA INI ===
36
+ # ==============================================================================
37
+ # Berdasarkan struktur tabel di FBRef, kita tahu kolom yang kita mau ada di indeks:
38
+ # 1: Squad, 5: Total Cmp, 6: Total Att, 7: Total Cmp%, 8: Total TotDist
39
+
40
+ # 1. Pilih hanya kolom yang kita butuhkan berdasarkan nomor indeksnya
41
+ df = df[[1, 5, 6, 7, 8]]
42
+
43
+ # 2. Beri nama baru untuk kolom-kolom tersebut
44
+ df.columns = ['Squad', 'Total_Cmp', 'Total_Att', 'Total_Cmp%', 'Total_TotDist']
45
+
46
+ # 3. Hapus baris terakhir yang biasanya berisi total/rata-rata liga
47
+ df = df.iloc[:-1]
48
+ # ==============================================================================
49
+ # ==============================================================================
50
+
51
+ return df
52
+
53
+ def filter_teams(df, teams):
54
+ # Fungsi ini sekarang akan berhasil karena kolom 'Squad' sudah ada
55
+ return df[df["Squad"].isin(teams)]
56
+
57
+ def main():
58
+ df = pull_premier_league_team_passing()
59
+
60
+ teams = ["Arsenal", "Nott'ham Forest"]
61
+ df_filtered = filter_teams(df, teams)
62
+
63
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
64
+ print("=" * 70)
65
+ # Karena df_filtered sekarang hanya berisi kolom yang kita mau, kita bisa print langsung
66
+ print(df_filtered)
67
+
68
+ if __name__ == "__main__":
69
+ main()
.history/fbrefdata_example_20251004190339.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ # Imports baru untuk menunggu dengan cerdas
9
+ from selenium.webdriver.common.by import By
10
+ from selenium.webdriver.support.ui import WebDriverWait
11
+ from selenium.webdriver.support import expected_conditions as EC
12
+ from selenium.common.exceptions import TimeoutException
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+
18
+ options = ChromeOptions()
19
+ options.add_argument("--start-maximized")
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ # options.add_argument("--headless") # Headless kita matikan dulu untuk debug, agar terlihat apa yang terjadi
23
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
24
+ options.add_experimental_option('useAutomationExtension', False)
25
+
26
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
27
+ driver.get(url)
28
+
29
+ # ==============================================================================
30
+ # === LOGIKA BARU: MENUNGGU CERDAS DAN INTERAKSI HALAMAN ===
31
+ # ==============================================================================
32
+ try:
33
+ # Tunggu max 10 detik sampai tombol cookie muncul, lalu klik
34
+ wait = WebDriverWait(driver, 10)
35
+ # Mencari tombol berdasarkan XPath yang berisi teks 'Accept All Cookies'
36
+ accept_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
37
+ accept_button.click()
38
+ print("Cookie banner accepted.")
39
+ except TimeoutException:
40
+ # Jika tombol tidak muncul dalam 10 detik, anggap saja tidak ada banner
41
+ print("No cookie banner found or it took too long.")
42
+
43
+ try:
44
+ # Sekarang, tunggu max 10 detik sampai tabelnya benar-benar muncul
45
+ wait = WebDriverWait(driver, 10)
46
+ # Kita tunggu sampai elemen div yang membungkus tabelnya terlihat
47
+ wait.until(EC.visibility_of_element_located((By.ID, "div_stats_passing")))
48
+ print("Stats table is now visible.")
49
+ except TimeoutException:
50
+ print("The stats table could not be found on the page.")
51
+ driver.quit()
52
+ return None # Keluar dari fungsi jika tabel tidak ditemukan
53
+ # ==============================================================================
54
+
55
+ html_source = driver.page_source
56
+ driver.quit()
57
+
58
+ print("Data downloaded. Processing with pandas...")
59
+
60
+ df = pd.read_html(StringIO(html_source))[0]
61
+
62
+ df = df[[1, 5, 6, 7, 8]]
63
+ df.columns = ['Squad', 'Total_Cmp', 'Total_Att', 'Total_Cmp%', 'Total_TotDist']
64
+ df = df.iloc[:-1]
65
+
66
+ return df
67
+
68
+ def filter_teams(df, teams):
69
+ return df[df["Squad"].isin(teams)]
70
+
71
+ def main():
72
+ df = pull_premier_league_team_passing()
73
+ # Pastikan df tidak None sebelum melanjutkan
74
+ if df is not None:
75
+ teams = ["Arsenal", "Nott'ham Forest"]
76
+ df_filtered = filter_teams(df, teams)
77
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
78
+ print("=" * 70)
79
+ print(df_filtered)
80
+
81
+ if __name__ == "__main__":
82
+ main()
.history/fbrefdata_example_20251004190507.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+ def pull_premier_league_team_passing():
14
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
15
+ print(f"Opening browser to download team passing stats from {url} ...")
16
+
17
+ options = ChromeOptions()
18
+ options.add_argument("--start-maximized")
19
+ options.add_argument("--no-sandbox")
20
+ options.add_argument("--disable-dev-shm-usage")
21
+ options.add_argument("--headless") # Kita nyalakan lagi headless agar cepat
22
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
23
+ options.add_experimental_option('useAutomationExtension', False)
24
+
25
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
26
+ driver.get(url)
27
+
28
+ try:
29
+ wait = WebDriverWait(driver, 10)
30
+ accept_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
31
+ accept_button.click()
32
+ print("Cookie banner accepted.")
33
+ except TimeoutException:
34
+ print("No cookie banner found or it took too long.")
35
+
36
+ try:
37
+ wait = WebDriverWait(driver, 10)
38
+ wait.until(EC.visibility_of_element_located((By.ID, "div_stats_passing")))
39
+ print("Stats table is now visible.")
40
+ except TimeoutException:
41
+ print("The stats table could not be found on the page.")
42
+ driver.quit()
43
+ return None
44
+
45
+ html_source = driver.page_source
46
+ driver.quit()
47
+
48
+ print("Data downloaded. Processing with pandas...")
49
+
50
+ # ==============================================================================
51
+ # === BAGIAN INVESTIGASI BARU ===
52
+ # ==============================================================================
53
+ # 1. Baca SEMUA tabel di halaman, jangan hanya ambil yang pertama [0]
54
+ all_tables = pd.read_html(StringIO(html_source))
55
+ print(f"\nDEBUG: Found {len(all_tables)} tables on the page.")
56
+
57
+ # 2. Cetak ukuran (baris, kolom) dari setiap tabel yang ditemukan
58
+ for i, table in enumerate(all_tables):
59
+ print(f"DEBUG: Table [{i}] has shape: {table.shape}")
60
+
61
+ # 3. Kita akan pilih tabel pertama untuk sementara agar bisa melihat output debug
62
+ # Ini akan menyebabkan error lagi, tapi itu tidak apa-apa.
63
+ df = all_tables[0]
64
+ # ==============================================================================
65
+
66
+ df = df[[1, 5, 6, 7, 8]]
67
+ df.columns = ['Squad', 'Total_Cmp', 'Total_Att', 'Total_Cmp%', 'Total_TotDist']
68
+ df = df.iloc[:-1]
69
+
70
+ return df
71
+
72
+ def filter_teams(df, teams):
73
+ return df[df["Squad"].isin(teams)]
74
+
75
+ def main():
76
+ df = pull_premier_league_team_passing()
77
+ if df is not None:
78
+ teams = ["Arsenal", "Nott'ham Forest"]
79
+ df_filtered = filter_teams(df, teams)
80
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
81
+ print("=" * 70)
82
+ print(df_filtered)
83
+
84
+ if __name__ == "__main__":
85
+ main()
.history/fbrefdata_example_20251004190633.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+ def pull_premier_league_team_passing():
14
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
15
+ print(f"Opening browser to download team passing stats from {url} ...")
16
+
17
+ options = ChromeOptions()
18
+ options.add_argument("--start-maximized")
19
+ options.add_argument("--no-sandbox")
20
+ options.add_argument("--disable-dev-shm-usage")
21
+ # options.add_argument("--headless") # Headless kita matikan agar bisa melihat prosesnya
22
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
23
+ options.add_experimental_option('useAutomationExtension', False)
24
+
25
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
26
+ driver.get(url)
27
+
28
+ try:
29
+ wait = WebDriverWait(driver, 10)
30
+ accept_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
31
+ accept_button.click()
32
+ print("Cookie banner accepted.")
33
+ except TimeoutException:
34
+ print("No cookie banner found or it took too long.")
35
+
36
+ try:
37
+ wait = WebDriverWait(driver, 10)
38
+ wait.until(EC.visibility_of_element_located((By.ID, "div_stats_passing")))
39
+ print("Stats table is now visible.")
40
+ html_source = driver.page_source
41
+ df = pd.read_html(StringIO(html_source))[1] # Mengambil tabel kedua [1]
42
+
43
+ except TimeoutException:
44
+ print("The stats table could not be found on the page. Saving debug files...")
45
+ # ==============================================================================
46
+ # === BAGIAN DEBUG BARU: SIMPAN BUKTI KEGAGALAN ===
47
+ # ==============================================================================
48
+ # Simpan screenshot dari apa yang browser lihat
49
+ driver.save_screenshot('debug_screenshot.png')
50
+ # Simpan kode HTML yang sedang ditampilkan
51
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
52
+ f.write(driver.page_source)
53
+ # ==============================================================================
54
+ driver.quit()
55
+ return None
56
+
57
+ driver.quit()
58
+ print("Data downloaded. Processing with pandas...")
59
+
60
+ # ... (sisa kode proses pandas) ...
61
+ # Saya juga melakukan perbaikan kecil berdasarkan investigasi sebelumnya,
62
+ # yaitu mencoba mengambil tabel kedua [1] bukan [0]
63
+
64
+ all_tables = pd.read_html(StringIO(html_source))
65
+
66
+ # Kita asumsikan tabel utama adalah yang paling banyak kolomnya
67
+ # Ini cara yang lebih cerdas untuk menemukan tabel yang benar
68
+ main_df = max(all_tables, key=lambda df: len(df.columns))
69
+ print(f"Main table selected with shape: {main_df.shape}")
70
+
71
+ df = main_df[[1, 5, 6, 7, 8]]
72
+ df.columns = ['Squad', 'Total_Cmp', 'Total_Att', 'Total_Cmp%', 'Total_TotDist']
73
+ df = df.iloc[:-1]
74
+
75
+ return df
76
+
77
+ def filter_teams(df, teams):
78
+ return df[df["Squad"].isin(teams)]
79
+
80
+ def main():
81
+ df = pull_premier_league_team_passing()
82
+ if df is not None:
83
+ teams = ["Arsenal", "Nott'ham Forest"]
84
+ df_filtered = filter_teams(df, teams)
85
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
86
+ print("=" * 70)
87
+ print(df_filtered)
88
+
89
+ if __name__ == "__main__":
90
+ main()
.history/fbrefdata_example_20251004190944.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+
18
+ options = ChromeOptions()
19
+ options.add_argument("--start-maximized")
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ # options.add_argument("--headless")
23
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
24
+ options.add_experimental_option('useAutomationExtension', False)
25
+
26
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
27
+ driver.get(url)
28
+
29
+ try:
30
+ wait = WebDriverWait(driver, 10)
31
+ accept_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
32
+ accept_button.click()
33
+ print("Cookie banner accepted.")
34
+ except TimeoutException:
35
+ print("No cookie banner found or it took too long.")
36
+
37
+ try:
38
+ wait = WebDriverWait(driver, 10)
39
+ wait.until(EC.visibility_of_element_located((By.ID, "div_stats_passing")))
40
+ print("Stats table is now visible.")
41
+ html_source = driver.page_source
42
+ all_tables = pd.read_html(StringIO(html_source))
43
+ except TimeoutException:
44
+ print("The stats table could not be found on the page. Saving debug files...")
45
+ driver.save_screenshot('debug_screenshot.png')
46
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
47
+ f.write(driver.page_source)
48
+ driver.quit()
49
+ return None
50
+
51
+ driver.quit()
52
+ print("Data downloaded. Processing with pandas...")
53
+
54
+ # Ambil tabel utama (yang paling banyak kolomnya)
55
+ main_df = max(all_tables, key=lambda df: len(df.columns))
56
+ print(f"Main table selected with shape: {main_df.shape}")
57
+
58
+ # Jika kolom multi-level (MultiIndex), kita gabungkan nama header-nya
59
+ if isinstance(main_df.columns, pd.MultiIndex):
60
+ main_df.columns = ['_'.join(col).strip() for col in main_df.columns.values]
61
+
62
+ # Coba tampilkan beberapa kolom agar tahu nama sebenarnya
63
+ print("Available columns:", main_df.columns[:10].tolist())
64
+
65
+ # Cari kolom yang relevan untuk passing
66
+ cols_to_use = [c for c in main_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
67
+ df = main_df[cols_to_use]
68
+
69
+ # Normalisasi nama kolom agar lebih rapi
70
+ df.columns = ['Squad', 'Total_Cmp', 'Total_Att', 'Total_Cmp%', 'Total_TotDist']
71
+ df = df[df['Squad'].notna() & (df['Squad'] != 'Squad')] # hapus header duplikat
72
+
73
+ return df
74
+
75
+
76
+ def filter_teams(df, teams):
77
+ return df[df["Squad"].isin(teams)]
78
+
79
+
80
+ def main():
81
+ df = pull_premier_league_team_passing()
82
+ if df is not None:
83
+ teams = ["Arsenal", "Nott'ham Forest"]
84
+ df_filtered = filter_teams(df, teams)
85
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
86
+ print("=" * 70)
87
+ print(df_filtered)
88
+
89
+
90
+ if __name__ == "__main__":
91
+ main()
.history/fbrefdata_example_20251004191947.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+
18
+ options = ChromeOptions()
19
+ options.add_argument("--start-maximized")
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ # options.add_argument("--headless")
23
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
24
+ options.add_experimental_option('useAutomationExtension', False)
25
+
26
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
27
+ driver.get(url)
28
+
29
+ try:
30
+ wait = WebDriverWait(driver, 10)
31
+ accept_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
32
+ accept_button.click()
33
+ print("Cookie banner accepted.")
34
+ except TimeoutException:
35
+ print("No cookie banner found or it took too long.")
36
+
37
+ try:
38
+ wait = WebDriverWait(driver, 15)
39
+ wait.until(EC.visibility_of_element_located((By.ID, "stats_passing_team")))
40
+ print("Team stats table is visible.")
41
+ html_source = driver.page_source
42
+ except TimeoutException:
43
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
44
+ driver.save_screenshot('debug_screenshot.png')
45
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
46
+ f.write(driver.page_source)
47
+ driver.quit()
48
+ return None
49
+
50
+ driver.quit()
51
+ print("Data downloaded. Processing with pandas...")
52
+
53
+ # Ambil hanya tabel team passing
54
+ all_tables = pd.read_html(StringIO(html_source))
55
+ team_df = None
56
+ for df in all_tables:
57
+ if 'Squad' in df.columns:
58
+ team_df = df
59
+ break
60
+
61
+ if team_df is None:
62
+ print("❌ No team table found.")
63
+ return None
64
+
65
+ print(f"βœ… Found team table with shape: {team_df.shape}")
66
+
67
+ # Bersihkan kolom header ganda jika ada
68
+ if isinstance(team_df.columns, pd.MultiIndex):
69
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
70
+
71
+ # Ambil kolom yang relevan
72
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
73
+ team_df = team_df[cols_to_use]
74
+
75
+ # Normalisasi nama kolom
76
+ rename_map = {}
77
+ for c in team_df.columns:
78
+ if 'Squad' in c: rename_map[c] = 'Squad'
79
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
80
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
81
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
82
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
83
+ team_df.rename(columns=rename_map, inplace=True)
84
+
85
+ # Hapus baris duplikat atau NaN
86
+ team_df = team_df[team_df['Squad'].notna()]
87
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
88
+
89
+ return team_df
90
+
91
+
92
+ def filter_teams(df, teams):
93
+ return df[df["Squad"].isin(teams)]
94
+
95
+
96
+ def main():
97
+ df = pull_premier_league_team_passing()
98
+ if df is not None:
99
+ teams = ["Arsenal", "Nott'ham Forest"]
100
+ df_filtered = filter_teams(df, teams)
101
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
102
+ print("=" * 70)
103
+ print(df_filtered)
104
+
105
+
106
+ if __name__ == "__main__":
107
+ main()
.history/fbrefdata_example_20251005091604.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+
18
+ options = ChromeOptions()
19
+ options.add_argument("--start-maximized")
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
23
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
24
+ options.add_experimental_option('useAutomationExtension', False)
25
+
26
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
27
+ driver.get(url)
28
+
29
+ try:
30
+ wait = WebDriverWait(driver, 10)
31
+ accept_button = wait.until(EC.element_to_be_clickable(
32
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
33
+ accept_button.click()
34
+ print("Cookie banner accepted.")
35
+ except TimeoutException:
36
+ print("No cookie banner found or it took too long.")
37
+
38
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
39
+ try:
40
+ wait = WebDriverWait(driver, 20)
41
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
42
+ print("βœ… Team stats div found, extracting HTML...")
43
+
44
+ # Ambil HTML hanya bagian tabel team passing
45
+ team_html = div_element.get_attribute("outerHTML")
46
+ except TimeoutException:
47
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
48
+ driver.save_screenshot('debug_screenshot.png')
49
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
50
+ f.write(driver.page_source)
51
+ driver.quit()
52
+ return None
53
+
54
+ driver.quit()
55
+ print("Data downloaded. Processing with pandas...")
56
+
57
+ # Baca tabel dari potongan HTML
58
+ team_df = pd.read_html(StringIO(team_html))[0]
59
+ print(f"βœ… Found team table with shape: {team_df.shape}")
60
+
61
+ # Jika ada header dua baris, gabungkan
62
+ if isinstance(team_df.columns, pd.MultiIndex):
63
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
64
+
65
+ # Pilih kolom utama yang relevan
66
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
67
+ team_df = team_df[cols_to_use]
68
+
69
+ # Normalisasi nama kolom
70
+ rename_map = {}
71
+ for c in team_df.columns:
72
+ if 'Squad' in c: rename_map[c] = 'Squad'
73
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
74
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
75
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
76
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
77
+ team_df.rename(columns=rename_map, inplace=True)
78
+
79
+ team_df = team_df[team_df['Squad'].notna()]
80
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
81
+
82
+ return team_df
83
+
84
+
85
+ def filter_teams(df, teams):
86
+ return df[df["Squad"].isin(teams)]
87
+
88
+
89
+ def main():
90
+ df = pull_premier_league_team_passing()
91
+ if df is not None:
92
+ # Simpan ke CSV otomatis
93
+ df.to_csv("premier_league_team_passing.csv", index=False)
94
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
95
+
96
+ teams = ["Arsenal", "Nott'ham Forest"]
97
+ df_filtered = filter_teams(df, teams)
98
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
99
+ print("=" * 70)
100
+ print(df_filtered)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
.history/fbrefdata_example_20251005091825.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+
18
+ options = ChromeOptions()
19
+ options.add_argument("--start-maximized")
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
23
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
24
+ options.add_experimental_option('useAutomationExtension', False)
25
+
26
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
27
+ driver.get(url)
28
+
29
+ try:
30
+ wait = WebDriverWait(driver, 10)
31
+ accept_button = wait.until(EC.element_to_be_clickable(
32
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
33
+ accept_button.click()
34
+ print("Cookie banner accepted.")
35
+ except TimeoutException:
36
+ print("No cookie banner found or it took too long.")
37
+
38
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
39
+ try:
40
+ wait = WebDriverWait(driver, 20)
41
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
42
+ print("βœ… Team stats div found, extracting HTML...")
43
+
44
+ # Ambil HTML hanya bagian tabel team passing
45
+ team_html = div_element.get_attribute("outerHTML")
46
+ except TimeoutException:
47
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
48
+ driver.save_screenshot('debug_screenshot.png')
49
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
50
+ f.write(driver.page_source)
51
+ driver.quit()
52
+ return None
53
+
54
+ driver.quit()
55
+ print("Data downloaded. Processing with pandas...")
56
+
57
+ # Baca tabel dari potongan HTML
58
+ team_df = pd.read_html(StringIO(team_html))[0]
59
+ print(f"βœ… Found team table with shape: {team_df.shape}")
60
+
61
+ # Jika ada header dua baris, gabungkan
62
+ if isinstance(team_df.columns, pd.MultiIndex):
63
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
64
+
65
+ # Pilih kolom utama yang relevan
66
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
67
+ team_df = team_df[cols_to_use]
68
+
69
+ # Normalisasi nama kolom
70
+ rename_map = {}
71
+ for c in team_df.columns:
72
+ if 'Squad' in c: rename_map[c] = 'Squad'
73
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
74
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
75
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
76
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
77
+ team_df.rename(columns=rename_map, inplace=True)
78
+
79
+ team_df = team_df[team_df['Squad'].notna()]
80
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
81
+
82
+ return team_df
83
+
84
+
85
+ def filter_teams(df, teams):
86
+ return df[df["Squad"].isin(teams)]
87
+
88
+
89
+ def main():
90
+ df = pull_premier_league_team_passing()
91
+ if df is not None:
92
+ # Simpan ke CSV otomatis
93
+ df.to_csv("premier_league_team_passing.csv", index=False)
94
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
95
+
96
+ teams = ["Wolves", "Nott'ham Forest"]
97
+ df_filtered = filter_teams(df, teams)
98
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
99
+ print("=" * 70)
100
+ print(df_filtered)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
.history/fbrefdata_example_20251005091830.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+
18
+ options = ChromeOptions()
19
+ options.add_argument("--start-maximized")
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
23
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
24
+ options.add_experimental_option('useAutomationExtension', False)
25
+
26
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
27
+ driver.get(url)
28
+
29
+ try:
30
+ wait = WebDriverWait(driver, 10)
31
+ accept_button = wait.until(EC.element_to_be_clickable(
32
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
33
+ accept_button.click()
34
+ print("Cookie banner accepted.")
35
+ except TimeoutException:
36
+ print("No cookie banner found or it took too long.")
37
+
38
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
39
+ try:
40
+ wait = WebDriverWait(driver, 20)
41
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
42
+ print("βœ… Team stats div found, extracting HTML...")
43
+
44
+ # Ambil HTML hanya bagian tabel team passing
45
+ team_html = div_element.get_attribute("outerHTML")
46
+ except TimeoutException:
47
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
48
+ driver.save_screenshot('debug_screenshot.png')
49
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
50
+ f.write(driver.page_source)
51
+ driver.quit()
52
+ return None
53
+
54
+ driver.quit()
55
+ print("Data downloaded. Processing with pandas...")
56
+
57
+ # Baca tabel dari potongan HTML
58
+ team_df = pd.read_html(StringIO(team_html))[0]
59
+ print(f"βœ… Found team table with shape: {team_df.shape}")
60
+
61
+ # Jika ada header dua baris, gabungkan
62
+ if isinstance(team_df.columns, pd.MultiIndex):
63
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
64
+
65
+ # Pilih kolom utama yang relevan
66
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
67
+ team_df = team_df[cols_to_use]
68
+
69
+ # Normalisasi nama kolom
70
+ rename_map = {}
71
+ for c in team_df.columns:
72
+ if 'Squad' in c: rename_map[c] = 'Squad'
73
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
74
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
75
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
76
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
77
+ team_df.rename(columns=rename_map, inplace=True)
78
+
79
+ team_df = team_df[team_df['Squad'].notna()]
80
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
81
+
82
+ return team_df
83
+
84
+
85
+ def filter_teams(df, teams):
86
+ return df[df["Squad"].isin(teams)]
87
+
88
+
89
+ def main():
90
+ df = pull_premier_league_team_passing()
91
+ if df is not None:
92
+ # Simpan ke CSV otomatis
93
+ df.to_csv("premier_league_team_passing.csv", index=False)
94
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
95
+
96
+ teams = ["Wolves", "B"]
97
+ df_filtered = filter_teams(df, teams)
98
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
99
+ print("=" * 70)
100
+ print(df_filtered)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
.history/fbrefdata_example_20251005091835.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+
18
+ options = ChromeOptions()
19
+ options.add_argument("--start-maximized")
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
23
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
24
+ options.add_experimental_option('useAutomationExtension', False)
25
+
26
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
27
+ driver.get(url)
28
+
29
+ try:
30
+ wait = WebDriverWait(driver, 10)
31
+ accept_button = wait.until(EC.element_to_be_clickable(
32
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
33
+ accept_button.click()
34
+ print("Cookie banner accepted.")
35
+ except TimeoutException:
36
+ print("No cookie banner found or it took too long.")
37
+
38
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
39
+ try:
40
+ wait = WebDriverWait(driver, 20)
41
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
42
+ print("βœ… Team stats div found, extracting HTML...")
43
+
44
+ # Ambil HTML hanya bagian tabel team passing
45
+ team_html = div_element.get_attribute("outerHTML")
46
+ except TimeoutException:
47
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
48
+ driver.save_screenshot('debug_screenshot.png')
49
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
50
+ f.write(driver.page_source)
51
+ driver.quit()
52
+ return None
53
+
54
+ driver.quit()
55
+ print("Data downloaded. Processing with pandas...")
56
+
57
+ # Baca tabel dari potongan HTML
58
+ team_df = pd.read_html(StringIO(team_html))[0]
59
+ print(f"βœ… Found team table with shape: {team_df.shape}")
60
+
61
+ # Jika ada header dua baris, gabungkan
62
+ if isinstance(team_df.columns, pd.MultiIndex):
63
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
64
+
65
+ # Pilih kolom utama yang relevan
66
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
67
+ team_df = team_df[cols_to_use]
68
+
69
+ # Normalisasi nama kolom
70
+ rename_map = {}
71
+ for c in team_df.columns:
72
+ if 'Squad' in c: rename_map[c] = 'Squad'
73
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
74
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
75
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
76
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
77
+ team_df.rename(columns=rename_map, inplace=True)
78
+
79
+ team_df = team_df[team_df['Squad'].notna()]
80
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
81
+
82
+ return team_df
83
+
84
+
85
+ def filter_teams(df, teams):
86
+ return df[df["Squad"].isin(teams)]
87
+
88
+
89
+ def main():
90
+ df = pull_premier_league_team_passing()
91
+ if df is not None:
92
+ # Simpan ke CSV otomatis
93
+ df.to_csv("premier_league_team_passing.csv", index=False)
94
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
95
+
96
+ teams = ["Wolves", "Brighton""]
97
+ df_filtered = filter_teams(df, teams)
98
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
99
+ print("=" * 70)
100
+ print(df_filtered)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
.history/fbrefdata_example_20251005091839.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+
18
+ options = ChromeOptions()
19
+ options.add_argument("--start-maximized")
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
23
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
24
+ options.add_experimental_option('useAutomationExtension', False)
25
+
26
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
27
+ driver.get(url)
28
+
29
+ try:
30
+ wait = WebDriverWait(driver, 10)
31
+ accept_button = wait.until(EC.element_to_be_clickable(
32
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
33
+ accept_button.click()
34
+ print("Cookie banner accepted.")
35
+ except TimeoutException:
36
+ print("No cookie banner found or it took too long.")
37
+
38
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
39
+ try:
40
+ wait = WebDriverWait(driver, 20)
41
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
42
+ print("βœ… Team stats div found, extracting HTML...")
43
+
44
+ # Ambil HTML hanya bagian tabel team passing
45
+ team_html = div_element.get_attribute("outerHTML")
46
+ except TimeoutException:
47
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
48
+ driver.save_screenshot('debug_screenshot.png')
49
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
50
+ f.write(driver.page_source)
51
+ driver.quit()
52
+ return None
53
+
54
+ driver.quit()
55
+ print("Data downloaded. Processing with pandas...")
56
+
57
+ # Baca tabel dari potongan HTML
58
+ team_df = pd.read_html(StringIO(team_html))[0]
59
+ print(f"βœ… Found team table with shape: {team_df.shape}")
60
+
61
+ # Jika ada header dua baris, gabungkan
62
+ if isinstance(team_df.columns, pd.MultiIndex):
63
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
64
+
65
+ # Pilih kolom utama yang relevan
66
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
67
+ team_df = team_df[cols_to_use]
68
+
69
+ # Normalisasi nama kolom
70
+ rename_map = {}
71
+ for c in team_df.columns:
72
+ if 'Squad' in c: rename_map[c] = 'Squad'
73
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
74
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
75
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
76
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
77
+ team_df.rename(columns=rename_map, inplace=True)
78
+
79
+ team_df = team_df[team_df['Squad'].notna()]
80
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
81
+
82
+ return team_df
83
+
84
+
85
+ def filter_teams(df, teams):
86
+ return df[df["Squad"].isin(teams)]
87
+
88
+
89
+ def main():
90
+ df = pull_premier_league_team_passing()
91
+ if df is not None:
92
+ # Simpan ke CSV otomatis
93
+ df.to_csv("premier_league_team_passing.csv", index=False)
94
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
95
+
96
+ teams = ["Wolves", "Brighton"]
97
+ df_filtered = filter_teams(df, teams)
98
+ print("\nπŸ“Š Passing Stats for Arsenal & Nottingham Forest (Team Level)")
99
+ print("=" * 70)
100
+ print(df_filtered)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
.history/fbrefdata_example_20251005091854.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+
18
+ options = ChromeOptions()
19
+ options.add_argument("--start-maximized")
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
23
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
24
+ options.add_experimental_option('useAutomationExtension', False)
25
+
26
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
27
+ driver.get(url)
28
+
29
+ try:
30
+ wait = WebDriverWait(driver, 10)
31
+ accept_button = wait.until(EC.element_to_be_clickable(
32
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
33
+ accept_button.click()
34
+ print("Cookie banner accepted.")
35
+ except TimeoutException:
36
+ print("No cookie banner found or it took too long.")
37
+
38
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
39
+ try:
40
+ wait = WebDriverWait(driver, 20)
41
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
42
+ print("βœ… Team stats div found, extracting HTML...")
43
+
44
+ # Ambil HTML hanya bagian tabel team passing
45
+ team_html = div_element.get_attribute("outerHTML")
46
+ except TimeoutException:
47
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
48
+ driver.save_screenshot('debug_screenshot.png')
49
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
50
+ f.write(driver.page_source)
51
+ driver.quit()
52
+ return None
53
+
54
+ driver.quit()
55
+ print("Data downloaded. Processing with pandas...")
56
+
57
+ # Baca tabel dari potongan HTML
58
+ team_df = pd.read_html(StringIO(team_html))[0]
59
+ print(f"βœ… Found team table with shape: {team_df.shape}")
60
+
61
+ # Jika ada header dua baris, gabungkan
62
+ if isinstance(team_df.columns, pd.MultiIndex):
63
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
64
+
65
+ # Pilih kolom utama yang relevan
66
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
67
+ team_df = team_df[cols_to_use]
68
+
69
+ # Normalisasi nama kolom
70
+ rename_map = {}
71
+ for c in team_df.columns:
72
+ if 'Squad' in c: rename_map[c] = 'Squad'
73
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
74
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
75
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
76
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
77
+ team_df.rename(columns=rename_map, inplace=True)
78
+
79
+ team_df = team_df[team_df['Squad'].notna()]
80
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
81
+
82
+ return team_df
83
+
84
+
85
+ def filter_teams(df, teams):
86
+ return df[df["Squad"].isin(teams)]
87
+
88
+
89
+ def main():
90
+ df = pull_premier_league_team_passing()
91
+ if df is not None:
92
+ # Simpan ke CSV otomatis
93
+ df.to_csv("premier_league_team_passing.csv", index=False)
94
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
95
+
96
+ teams = ["Wolves", "Brighton"]
97
+ df_filtered = filter_teams(df, teams)
98
+ print("\nπŸ“Š Passing Stats for Wolves (Team Level)")
99
+ print("=" * 70)
100
+ print(df_filtered)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
.history/fbrefdata_example_20251005091857.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+
18
+ options = ChromeOptions()
19
+ options.add_argument("--start-maximized")
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
23
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
24
+ options.add_experimental_option('useAutomationExtension', False)
25
+
26
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
27
+ driver.get(url)
28
+
29
+ try:
30
+ wait = WebDriverWait(driver, 10)
31
+ accept_button = wait.until(EC.element_to_be_clickable(
32
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
33
+ accept_button.click()
34
+ print("Cookie banner accepted.")
35
+ except TimeoutException:
36
+ print("No cookie banner found or it took too long.")
37
+
38
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
39
+ try:
40
+ wait = WebDriverWait(driver, 20)
41
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
42
+ print("βœ… Team stats div found, extracting HTML...")
43
+
44
+ # Ambil HTML hanya bagian tabel team passing
45
+ team_html = div_element.get_attribute("outerHTML")
46
+ except TimeoutException:
47
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
48
+ driver.save_screenshot('debug_screenshot.png')
49
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
50
+ f.write(driver.page_source)
51
+ driver.quit()
52
+ return None
53
+
54
+ driver.quit()
55
+ print("Data downloaded. Processing with pandas...")
56
+
57
+ # Baca tabel dari potongan HTML
58
+ team_df = pd.read_html(StringIO(team_html))[0]
59
+ print(f"βœ… Found team table with shape: {team_df.shape}")
60
+
61
+ # Jika ada header dua baris, gabungkan
62
+ if isinstance(team_df.columns, pd.MultiIndex):
63
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
64
+
65
+ # Pilih kolom utama yang relevan
66
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
67
+ team_df = team_df[cols_to_use]
68
+
69
+ # Normalisasi nama kolom
70
+ rename_map = {}
71
+ for c in team_df.columns:
72
+ if 'Squad' in c: rename_map[c] = 'Squad'
73
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
74
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
75
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
76
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
77
+ team_df.rename(columns=rename_map, inplace=True)
78
+
79
+ team_df = team_df[team_df['Squad'].notna()]
80
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
81
+
82
+ return team_df
83
+
84
+
85
+ def filter_teams(df, teams):
86
+ return df[df["Squad"].isin(teams)]
87
+
88
+
89
+ def main():
90
+ df = pull_premier_league_team_passing()
91
+ if df is not None:
92
+ # Simpan ke CSV otomatis
93
+ df.to_csv("premier_league_team_passing.csv", index=False)
94
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
95
+
96
+ teams = ["Wolves", "Brighton"]
97
+ df_filtered = filter_teams(df, teams)
98
+ print("\nπŸ“Š Passing Stats for Wolves & (Team Level)")
99
+ print("=" * 70)
100
+ print(df_filtered)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
.history/fbrefdata_example_20251005091858.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+
18
+ options = ChromeOptions()
19
+ options.add_argument("--start-maximized")
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
23
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
24
+ options.add_experimental_option('useAutomationExtension', False)
25
+
26
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
27
+ driver.get(url)
28
+
29
+ try:
30
+ wait = WebDriverWait(driver, 10)
31
+ accept_button = wait.until(EC.element_to_be_clickable(
32
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
33
+ accept_button.click()
34
+ print("Cookie banner accepted.")
35
+ except TimeoutException:
36
+ print("No cookie banner found or it took too long.")
37
+
38
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
39
+ try:
40
+ wait = WebDriverWait(driver, 20)
41
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
42
+ print("βœ… Team stats div found, extracting HTML...")
43
+
44
+ # Ambil HTML hanya bagian tabel team passing
45
+ team_html = div_element.get_attribute("outerHTML")
46
+ except TimeoutException:
47
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
48
+ driver.save_screenshot('debug_screenshot.png')
49
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
50
+ f.write(driver.page_source)
51
+ driver.quit()
52
+ return None
53
+
54
+ driver.quit()
55
+ print("Data downloaded. Processing with pandas...")
56
+
57
+ # Baca tabel dari potongan HTML
58
+ team_df = pd.read_html(StringIO(team_html))[0]
59
+ print(f"βœ… Found team table with shape: {team_df.shape}")
60
+
61
+ # Jika ada header dua baris, gabungkan
62
+ if isinstance(team_df.columns, pd.MultiIndex):
63
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
64
+
65
+ # Pilih kolom utama yang relevan
66
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
67
+ team_df = team_df[cols_to_use]
68
+
69
+ # Normalisasi nama kolom
70
+ rename_map = {}
71
+ for c in team_df.columns:
72
+ if 'Squad' in c: rename_map[c] = 'Squad'
73
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
74
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
75
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
76
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
77
+ team_df.rename(columns=rename_map, inplace=True)
78
+
79
+ team_df = team_df[team_df['Squad'].notna()]
80
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
81
+
82
+ return team_df
83
+
84
+
85
+ def filter_teams(df, teams):
86
+ return df[df["Squad"].isin(teams)]
87
+
88
+
89
+ def main():
90
+ df = pull_premier_league_team_passing()
91
+ if df is not None:
92
+ # Simpan ke CSV otomatis
93
+ df.to_csv("premier_league_team_passing.csv", index=False)
94
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
95
+
96
+ teams = ["Wolves", "Brighton"]
97
+ df_filtered = filter_teams(df, teams)
98
+ print("\nπŸ“Š Passing Stats for Wolves & Brighton (Team Level)")
99
+ print("=" * 70)
100
+ print(df_filtered)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
.history/fbrefdata_example_20251005092140.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+ import time
18
+ time.sleep(5)
19
+
20
+ options = ChromeOptions()
21
+ options.add_argument("--start-maximized")
22
+ options.add_argument("--no-sandbox")
23
+ options.add_argument("--disable-dev-shm-usage")
24
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
25
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
26
+ options.add_experimental_option('useAutomationExtension', False)
27
+
28
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
29
+ driver.get(url)
30
+
31
+ try:
32
+ wait = WebDriverWait(driver, 10)
33
+ accept_button = wait.until(EC.element_to_be_clickable(
34
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
35
+ accept_button.click()
36
+ print("Cookie banner accepted.")
37
+ except TimeoutException:
38
+ print("No cookie banner found or it took too long.")
39
+
40
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
41
+ try:
42
+ wait = WebDriverWait(driver, 20)
43
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
44
+ print("βœ… Team stats div found, extracting HTML...")
45
+
46
+ # Ambil HTML hanya bagian tabel team passing
47
+ team_html = div_element.get_attribute("outerHTML")
48
+ except TimeoutException:
49
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
50
+ driver.save_screenshot('debug_screenshot.png')
51
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
52
+ f.write(driver.page_source)
53
+ driver.quit()
54
+ return None
55
+
56
+ driver.quit()
57
+ print("Data downloaded. Processing with pandas...")
58
+
59
+ # Baca tabel dari potongan HTML
60
+ team_df = pd.read_html(StringIO(team_html))[0]
61
+ print(f"βœ… Found team table with shape: {team_df.shape}")
62
+
63
+ # Jika ada header dua baris, gabungkan
64
+ if isinstance(team_df.columns, pd.MultiIndex):
65
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
66
+
67
+ # Pilih kolom utama yang relevan
68
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
69
+ team_df = team_df[cols_to_use]
70
+
71
+ # Normalisasi nama kolom
72
+ rename_map = {}
73
+ for c in team_df.columns:
74
+ if 'Squad' in c: rename_map[c] = 'Squad'
75
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
76
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
77
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
78
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
79
+ team_df.rename(columns=rename_map, inplace=True)
80
+
81
+ team_df = team_df[team_df['Squad'].notna()]
82
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
83
+
84
+ return team_df
85
+
86
+
87
+ def filter_teams(df, teams):
88
+ return df[df["Squad"].isin(teams)]
89
+
90
+
91
+ def main():
92
+ df = pull_premier_league_team_passing()
93
+ if df is not None:
94
+ # Simpan ke CSV otomatis
95
+ df.to_csv("premier_league_team_passing.csv", index=False)
96
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
97
+
98
+ teams = ["Wolves", "Brighton"]
99
+ df_filtered = filter_teams(df, teams)
100
+ print("\nπŸ“Š Passing Stats for Wolves & Brighton (Team Level)")
101
+ print("=" * 70)
102
+ print(df_filtered)
103
+
104
+
105
+ if __name__ == "__main__":
106
+ main()
.history/fbrefdata_example_20251005092144.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+ time.sleep(5)
18
+
19
+ options = ChromeOptions()
20
+ options.add_argument("--start-maximized")
21
+ options.add_argument("--no-sandbox")
22
+ options.add_argument("--disable-dev-shm-usage")
23
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
24
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
25
+ options.add_experimental_option('useAutomationExtension', False)
26
+
27
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
28
+ driver.get(url)
29
+
30
+ try:
31
+ wait = WebDriverWait(driver, 10)
32
+ accept_button = wait.until(EC.element_to_be_clickable(
33
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
34
+ accept_button.click()
35
+ print("Cookie banner accepted.")
36
+ except TimeoutException:
37
+ print("No cookie banner found or it took too long.")
38
+
39
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
40
+ try:
41
+ wait = WebDriverWait(driver, 20)
42
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
43
+ print("βœ… Team stats div found, extracting HTML...")
44
+
45
+ # Ambil HTML hanya bagian tabel team passing
46
+ team_html = div_element.get_attribute("outerHTML")
47
+ except TimeoutException:
48
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
49
+ driver.save_screenshot('debug_screenshot.png')
50
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
51
+ f.write(driver.page_source)
52
+ driver.quit()
53
+ return None
54
+
55
+ driver.quit()
56
+ print("Data downloaded. Processing with pandas...")
57
+
58
+ # Baca tabel dari potongan HTML
59
+ team_df = pd.read_html(StringIO(team_html))[0]
60
+ print(f"βœ… Found team table with shape: {team_df.shape}")
61
+
62
+ # Jika ada header dua baris, gabungkan
63
+ if isinstance(team_df.columns, pd.MultiIndex):
64
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
65
+
66
+ # Pilih kolom utama yang relevan
67
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
68
+ team_df = team_df[cols_to_use]
69
+
70
+ # Normalisasi nama kolom
71
+ rename_map = {}
72
+ for c in team_df.columns:
73
+ if 'Squad' in c: rename_map[c] = 'Squad'
74
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
75
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
76
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
77
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
78
+ team_df.rename(columns=rename_map, inplace=True)
79
+
80
+ team_df = team_df[team_df['Squad'].notna()]
81
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
82
+
83
+ return team_df
84
+
85
+
86
+ def filter_teams(df, teams):
87
+ return df[df["Squad"].isin(teams)]
88
+
89
+
90
+ def main():
91
+ df = pull_premier_league_team_passing()
92
+ if df is not None:
93
+ # Simpan ke CSV otomatis
94
+ df.to_csv("premier_league_team_passing.csv", index=False)
95
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
96
+
97
+ teams = ["Wolves", "Brighton"]
98
+ df_filtered = filter_teams(df, teams)
99
+ print("\nπŸ“Š Passing Stats for Wolves & Brighton (Team Level)")
100
+ print("=" * 70)
101
+ print(df_filtered)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()
.history/fbrefdata_example_20251005092150.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+ time.sleep(5)
18
+
19
+ options = ChromeOptions()
20
+ options.add_argument("--start-maximized")
21
+ options.add_argument("--no-sandbox")
22
+ options.add_argument("--disable-dev-shm-usage")
23
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
24
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
25
+ options.add_experimental_option('useAutomationExtension', False)
26
+
27
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
28
+ driver.get(url)
29
+
30
+ try:
31
+ wait = WebDriverWait(driver, 10)
32
+ accept_button = wait.until(EC.element_to_be_clickable(
33
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
34
+ accept_button.click()
35
+ print("Cookie banner accepted.")
36
+ except TimeoutException:
37
+ print("No cookie banner found or it took too long.")
38
+
39
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
40
+ try:
41
+ wait = WebDriverWait(driver, 20)
42
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
43
+ print("βœ… Team stats div found, extracting HTML...")
44
+
45
+ # Ambil HTML hanya bagian tabel team passing
46
+ team_html = div_element.get_attribute("outerHTML")
47
+ except TimeoutException:
48
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
49
+ driver.save_screenshot('debug_screenshot.png')
50
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
51
+ f.write(driver.page_source)
52
+ driver.quit()
53
+ return None
54
+
55
+ driver.quit()
56
+ print("Data downloaded. Processing with pandas...")
57
+
58
+ # Baca tabel dari potongan HTML
59
+ team_df = pd.read_html(StringIO(team_html))[0]
60
+ print(f"βœ… Found team table with shape: {team_df.shape}")
61
+
62
+ # Jika ada header dua baris, gabungkan
63
+ if isinstance(team_df.columns, pd.MultiIndex):
64
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
65
+
66
+ # Pilih kolom utama yang relevan
67
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
68
+ team_df = team_df[cols_to_use]
69
+
70
+ # Normalisasi nama kolom
71
+ rename_map = {}
72
+ for c in team_df.columns:
73
+ if 'Squad' in c: rename_map[c] = 'Squad'
74
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
75
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
76
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
77
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
78
+ team_df.rename(columns=rename_map, inplace=True)
79
+
80
+ team_df = team_df[team_df['Squad'].notna()]
81
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
82
+
83
+ return team_df
84
+
85
+
86
+ def filter_teams(df, teams):
87
+ return df[df["Squad"].isin(teams)]
88
+
89
+
90
+ def main():
91
+ df = pull_premier_league_team_passing()
92
+ if df is not None:
93
+ # Simpan ke CSV otomatis
94
+ df.to_csv("premier_league_team_passing.csv", index=False)
95
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
96
+
97
+ teams = ["Wolves", "Brighton"]
98
+ df_filtered = filter_teams(df, teams)
99
+ print("\nπŸ“Š Passing Stats for Wolves & Brighton (Team Level)")
100
+ print("=" * 70)
101
+ print(df_filtered)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()
.history/fbrefdata_example_20251005092800.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "url = "https://fbref.com/en/comps/9/teams/Premier-League-Stats"
16
+ "
17
+ print(f"Opening browser to download team passing stats from {url} ...")
18
+ time.sleep(5)
19
+
20
+ options = ChromeOptions()
21
+ options.add_argument("--start-maximized")
22
+ options.add_argument("--no-sandbox")
23
+ options.add_argument("--disable-dev-shm-usage")
24
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
25
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
26
+ options.add_experimental_option('useAutomationExtension', False)
27
+
28
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
29
+ driver.get(url)
30
+
31
+ try:
32
+ wait = WebDriverWait(driver, 10)
33
+ accept_button = wait.until(EC.element_to_be_clickable(
34
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
35
+ accept_button.click()
36
+ print("Cookie banner accepted.")
37
+ except TimeoutException:
38
+ print("No cookie banner found or it took too long.")
39
+
40
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
41
+ try:
42
+ wait = WebDriverWait(driver, 20)
43
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
44
+ print("βœ… Team stats div found, extracting HTML...")
45
+
46
+ # Ambil HTML hanya bagian tabel team passing
47
+ team_html = div_element.get_attribute("outerHTML")
48
+ except TimeoutException:
49
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
50
+ driver.save_screenshot('debug_screenshot.png')
51
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
52
+ f.write(driver.page_source)
53
+ driver.quit()
54
+ return None
55
+
56
+ driver.quit()
57
+ print("Data downloaded. Processing with pandas...")
58
+
59
+ # Baca tabel dari potongan HTML
60
+ team_df = pd.read_html(StringIO(team_html))[0]
61
+ print(f"βœ… Found team table with shape: {team_df.shape}")
62
+
63
+ # Jika ada header dua baris, gabungkan
64
+ if isinstance(team_df.columns, pd.MultiIndex):
65
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
66
+
67
+ # Pilih kolom utama yang relevan
68
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
69
+ team_df = team_df[cols_to_use]
70
+
71
+ # Normalisasi nama kolom
72
+ rename_map = {}
73
+ for c in team_df.columns:
74
+ if 'Squad' in c: rename_map[c] = 'Squad'
75
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
76
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
77
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
78
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
79
+ team_df.rename(columns=rename_map, inplace=True)
80
+
81
+ team_df = team_df[team_df['Squad'].notna()]
82
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
83
+
84
+ return team_df
85
+
86
+
87
+ def filter_teams(df, teams):
88
+ return df[df["Squad"].isin(teams)]
89
+
90
+
91
+ def main():
92
+ df = pull_premier_league_team_passing()
93
+ if df is not None:
94
+ # Simpan ke CSV otomatis
95
+ df.to_csv("premier_league_team_passing.csv", index=False)
96
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
97
+
98
+ teams = ["Wolves", "Brighton"]
99
+ df_filtered = filter_teams(df, teams)
100
+ print("\nπŸ“Š Passing Stats for Wolves & Brighton (Team Level)")
101
+ print("=" * 70)
102
+ print(df_filtered)
103
+
104
+
105
+ if __name__ == "__main__":
106
+ main()
.history/fbrefdata_example_20251005092803.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "url = "https://fbref.com/en/comps/9/teams/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+ time.sleep(5)
18
+
19
+ options = ChromeOptions()
20
+ options.add_argument("--start-maximized")
21
+ options.add_argument("--no-sandbox")
22
+ options.add_argument("--disable-dev-shm-usage")
23
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
24
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
25
+ options.add_experimental_option('useAutomationExtension', False)
26
+
27
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
28
+ driver.get(url)
29
+
30
+ try:
31
+ wait = WebDriverWait(driver, 10)
32
+ accept_button = wait.until(EC.element_to_be_clickable(
33
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
34
+ accept_button.click()
35
+ print("Cookie banner accepted.")
36
+ except TimeoutException:
37
+ print("No cookie banner found or it took too long.")
38
+
39
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
40
+ try:
41
+ wait = WebDriverWait(driver, 20)
42
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
43
+ print("βœ… Team stats div found, extracting HTML...")
44
+
45
+ # Ambil HTML hanya bagian tabel team passing
46
+ team_html = div_element.get_attribute("outerHTML")
47
+ except TimeoutException:
48
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
49
+ driver.save_screenshot('debug_screenshot.png')
50
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
51
+ f.write(driver.page_source)
52
+ driver.quit()
53
+ return None
54
+
55
+ driver.quit()
56
+ print("Data downloaded. Processing with pandas...")
57
+
58
+ # Baca tabel dari potongan HTML
59
+ team_df = pd.read_html(StringIO(team_html))[0]
60
+ print(f"βœ… Found team table with shape: {team_df.shape}")
61
+
62
+ # Jika ada header dua baris, gabungkan
63
+ if isinstance(team_df.columns, pd.MultiIndex):
64
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
65
+
66
+ # Pilih kolom utama yang relevan
67
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
68
+ team_df = team_df[cols_to_use]
69
+
70
+ # Normalisasi nama kolom
71
+ rename_map = {}
72
+ for c in team_df.columns:
73
+ if 'Squad' in c: rename_map[c] = 'Squad'
74
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
75
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
76
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
77
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
78
+ team_df.rename(columns=rename_map, inplace=True)
79
+
80
+ team_df = team_df[team_df['Squad'].notna()]
81
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
82
+
83
+ return team_df
84
+
85
+
86
+ def filter_teams(df, teams):
87
+ return df[df["Squad"].isin(teams)]
88
+
89
+
90
+ def main():
91
+ df = pull_premier_league_team_passing()
92
+ if df is not None:
93
+ # Simpan ke CSV otomatis
94
+ df.to_csv("premier_league_team_passing.csv", index=False)
95
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
96
+
97
+ teams = ["Wolves", "Brighton"]
98
+ df_filtered = filter_teams(df, teams)
99
+ print("\nπŸ“Š Passing Stats for Wolves & Brighton (Team Level)")
100
+ print("=" * 70)
101
+ print(df_filtered)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()
.history/fbrefdata_example_20251005092809.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+ time.sleep(5)
18
+
19
+ options = ChromeOptions()
20
+ options.add_argument("--start-maximized")
21
+ options.add_argument("--no-sandbox")
22
+ options.add_argument("--disable-dev-shm-usage")
23
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
24
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
25
+ options.add_experimental_option('useAutomationExtension', False)
26
+
27
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
28
+ driver.get(url)
29
+
30
+ try:
31
+ wait = WebDriverWait(driver, 10)
32
+ accept_button = wait.until(EC.element_to_be_clickable(
33
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
34
+ accept_button.click()
35
+ print("Cookie banner accepted.")
36
+ except TimeoutException:
37
+ print("No cookie banner found or it took too long.")
38
+
39
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
40
+ try:
41
+ wait = WebDriverWait(driver, 20)
42
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
43
+ print("βœ… Team stats div found, extracting HTML...")
44
+
45
+ # Ambil HTML hanya bagian tabel team passing
46
+ team_html = div_element.get_attribute("outerHTML")
47
+ except TimeoutException:
48
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
49
+ driver.save_screenshot('debug_screenshot.png')
50
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
51
+ f.write(driver.page_source)
52
+ driver.quit()
53
+ return None
54
+
55
+ driver.quit()
56
+ print("Data downloaded. Processing with pandas...")
57
+
58
+ # Baca tabel dari potongan HTML
59
+ team_df = pd.read_html(StringIO(team_html))[0]
60
+ print(f"βœ… Found team table with shape: {team_df.shape}")
61
+
62
+ # Jika ada header dua baris, gabungkan
63
+ if isinstance(team_df.columns, pd.MultiIndex):
64
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
65
+
66
+ # Pilih kolom utama yang relevan
67
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
68
+ team_df = team_df[cols_to_use]
69
+
70
+ # Normalisasi nama kolom
71
+ rename_map = {}
72
+ for c in team_df.columns:
73
+ if 'Squad' in c: rename_map[c] = 'Squad'
74
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
75
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
76
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
77
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
78
+ team_df.rename(columns=rename_map, inplace=True)
79
+
80
+ team_df = team_df[team_df['Squad'].notna()]
81
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
82
+
83
+ return team_df
84
+
85
+
86
+ def filter_teams(df, teams):
87
+ return df[df["Squad"].isin(teams)]
88
+
89
+
90
+ def main():
91
+ df = pull_premier_league_team_passing()
92
+ if df is not None:
93
+ # Simpan ke CSV otomatis
94
+ df.to_csv("premier_league_team_passing.csv", index=False)
95
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
96
+
97
+ teams = ["Wolves", "Brighton"]
98
+ df_filtered = filter_teams(df, teams)
99
+ print("\nπŸ“Š Passing Stats for Wolves & Brighton (Team Level)")
100
+ print("=" * 70)
101
+ print(df_filtered)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()
.history/fbrefdata_example_20251005092817.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/teams/Premier-League-Stats"
16
+
17
+ print(f"Opening browser to download team passing stats from {url} ...")
18
+ time.sleep(5)
19
+
20
+ options = ChromeOptions()
21
+ options.add_argument("--start-maximized")
22
+ options.add_argument("--no-sandbox")
23
+ options.add_argument("--disable-dev-shm-usage")
24
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
25
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
26
+ options.add_experimental_option('useAutomationExtension', False)
27
+
28
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
29
+ driver.get(url)
30
+
31
+ try:
32
+ wait = WebDriverWait(driver, 10)
33
+ accept_button = wait.until(EC.element_to_be_clickable(
34
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
35
+ accept_button.click()
36
+ print("Cookie banner accepted.")
37
+ except TimeoutException:
38
+ print("No cookie banner found or it took too long.")
39
+
40
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
41
+ try:
42
+ wait = WebDriverWait(driver, 20)
43
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
44
+ print("βœ… Team stats div found, extracting HTML...")
45
+
46
+ # Ambil HTML hanya bagian tabel team passing
47
+ team_html = div_element.get_attribute("outerHTML")
48
+ except TimeoutException:
49
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
50
+ driver.save_screenshot('debug_screenshot.png')
51
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
52
+ f.write(driver.page_source)
53
+ driver.quit()
54
+ return None
55
+
56
+ driver.quit()
57
+ print("Data downloaded. Processing with pandas...")
58
+
59
+ # Baca tabel dari potongan HTML
60
+ team_df = pd.read_html(StringIO(team_html))[0]
61
+ print(f"βœ… Found team table with shape: {team_df.shape}")
62
+
63
+ # Jika ada header dua baris, gabungkan
64
+ if isinstance(team_df.columns, pd.MultiIndex):
65
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
66
+
67
+ # Pilih kolom utama yang relevan
68
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
69
+ team_df = team_df[cols_to_use]
70
+
71
+ # Normalisasi nama kolom
72
+ rename_map = {}
73
+ for c in team_df.columns:
74
+ if 'Squad' in c: rename_map[c] = 'Squad'
75
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
76
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
77
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
78
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
79
+ team_df.rename(columns=rename_map, inplace=True)
80
+
81
+ team_df = team_df[team_df['Squad'].notna()]
82
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
83
+
84
+ return team_df
85
+
86
+
87
+ def filter_teams(df, teams):
88
+ return df[df["Squad"].isin(teams)]
89
+
90
+
91
+ def main():
92
+ df = pull_premier_league_team_passing()
93
+ if df is not None:
94
+ # Simpan ke CSV otomatis
95
+ df.to_csv("premier_league_team_passing.csv", index=False)
96
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
97
+
98
+ teams = ["Wolves", "Brighton"]
99
+ df_filtered = filter_teams(df, teams)
100
+ print("\nπŸ“Š Passing Stats for Wolves & Brighton (Team Level)")
101
+ print("=" * 70)
102
+ print(df_filtered)
103
+
104
+
105
+ if __name__ == "__main__":
106
+ main()
.history/fbrefdata_example_20251005092820.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/teams/Premier-League-Stats"
16
+
17
+ print(f"Opening browser to download team passing stats from {url} ...")
18
+ time.sleep(5)
19
+
20
+ options = ChromeOptions()
21
+ options.add_argument("--start-maximized")
22
+ options.add_argument("--no-sandbox")
23
+ options.add_argument("--disable-dev-shm-usage")
24
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
25
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
26
+ options.add_experimental_option('useAutomationExtension', False)
27
+
28
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
29
+ driver.get(url)
30
+
31
+ try:
32
+ wait = WebDriverWait(driver, 10)
33
+ accept_button = wait.until(EC.element_to_be_clickable(
34
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
35
+ accept_button.click()
36
+ print("Cookie banner accepted.")
37
+ except TimeoutException:
38
+ print("No cookie banner found or it took too long.")
39
+
40
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
41
+ try:
42
+ wait = WebDriverWait(driver, 20)
43
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
44
+ print("βœ… Team stats div found, extracting HTML...")
45
+
46
+ # Ambil HTML hanya bagian tabel team passing
47
+ team_html = div_element.get_attribute("outerHTML")
48
+ except TimeoutException:
49
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
50
+ driver.save_screenshot('debug_screenshot.png')
51
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
52
+ f.write(driver.page_source)
53
+ driver.quit()
54
+ return None
55
+
56
+ driver.quit()
57
+ print("Data downloaded. Processing with pandas...")
58
+
59
+ # Baca tabel dari potongan HTML
60
+ team_df = pd.read_html(StringIO(team_html))[0]
61
+ print(f"βœ… Found team table with shape: {team_df.shape}")
62
+
63
+ # Jika ada header dua baris, gabungkan
64
+ if isinstance(team_df.columns, pd.MultiIndex):
65
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
66
+
67
+ # Pilih kolom utama yang relevan
68
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
69
+ team_df = team_df[cols_to_use]
70
+
71
+ # Normalisasi nama kolom
72
+ rename_map = {}
73
+ for c in team_df.columns:
74
+ if 'Squad' in c: rename_map[c] = 'Squad'
75
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
76
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
77
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
78
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
79
+ team_df.rename(columns=rename_map, inplace=True)
80
+
81
+ team_df = team_df[team_df['Squad'].notna()]
82
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
83
+
84
+ return team_df
85
+
86
+
87
+ def filter_teams(df, teams):
88
+ return df[df["Squad"].isin(teams)]
89
+
90
+
91
+ def main():
92
+ df = pull_premier_league_team_passing()
93
+ if df is not None:
94
+ # Simpan ke CSV otomatis
95
+ df.to_csv("premier_league_team_passing.csv", index=False)
96
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
97
+
98
+ teams = ["Wolves", "Brighton"]
99
+ df_filtered = filter_teams(df, teams)
100
+ print("\nπŸ“Š Passing Stats for Wolves & Brighton (Team Level)")
101
+ print("=" * 70)
102
+ print(df_filtered)
103
+
104
+
105
+ if __name__ == "__main__":
106
+ main()
.history/fbrefdata_example_20251005092822.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_team_passing():
15
+ url = "https://fbref.com/en/comps/9/teams/Premier-League-Stats"
16
+ print(f"Opening browser to download team passing stats from {url} ...")
17
+ time.sleep(5)
18
+
19
+ options = ChromeOptions()
20
+ options.add_argument("--start-maximized")
21
+ options.add_argument("--no-sandbox")
22
+ options.add_argument("--disable-dev-shm-usage")
23
+ # options.add_argument("--headless") # bisa diaktifkan jika tidak perlu melihat browser
24
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
25
+ options.add_experimental_option('useAutomationExtension', False)
26
+
27
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
28
+ driver.get(url)
29
+
30
+ try:
31
+ wait = WebDriverWait(driver, 10)
32
+ accept_button = wait.until(EC.element_to_be_clickable(
33
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")))
34
+ accept_button.click()
35
+ print("Cookie banner accepted.")
36
+ except TimeoutException:
37
+ print("No cookie banner found or it took too long.")
38
+
39
+ # βœ… Tunggu elemen tabel tim muncul (div wrapper)
40
+ try:
41
+ wait = WebDriverWait(driver, 20)
42
+ div_element = wait.until(EC.presence_of_element_located((By.ID, "div_stats_passing_team")))
43
+ print("βœ… Team stats div found, extracting HTML...")
44
+
45
+ # Ambil HTML hanya bagian tabel team passing
46
+ team_html = div_element.get_attribute("outerHTML")
47
+ except TimeoutException:
48
+ print("❌ The team stats table could not be found on the page. Saving debug files...")
49
+ driver.save_screenshot('debug_screenshot.png')
50
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
51
+ f.write(driver.page_source)
52
+ driver.quit()
53
+ return None
54
+
55
+ driver.quit()
56
+ print("Data downloaded. Processing with pandas...")
57
+
58
+ # Baca tabel dari potongan HTML
59
+ team_df = pd.read_html(StringIO(team_html))[0]
60
+ print(f"βœ… Found team table with shape: {team_df.shape}")
61
+
62
+ # Jika ada header dua baris, gabungkan
63
+ if isinstance(team_df.columns, pd.MultiIndex):
64
+ team_df.columns = ['_'.join(col).strip() for col in team_df.columns.values]
65
+
66
+ # Pilih kolom utama yang relevan
67
+ cols_to_use = [c for c in team_df.columns if any(x in c for x in ['Squad', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
68
+ team_df = team_df[cols_to_use]
69
+
70
+ # Normalisasi nama kolom
71
+ rename_map = {}
72
+ for c in team_df.columns:
73
+ if 'Squad' in c: rename_map[c] = 'Squad'
74
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
75
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
76
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
77
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
78
+ team_df.rename(columns=rename_map, inplace=True)
79
+
80
+ team_df = team_df[team_df['Squad'].notna()]
81
+ team_df = team_df[~team_df['Squad'].str.contains("Squad|Rk", na=False)]
82
+
83
+ return team_df
84
+
85
+
86
+ def filter_teams(df, teams):
87
+ return df[df["Squad"].isin(teams)]
88
+
89
+
90
+ def main():
91
+ df = pull_premier_league_team_passing()
92
+ if df is not None:
93
+ # Simpan ke CSV otomatis
94
+ df.to_csv("premier_league_team_passing.csv", index=False)
95
+ print("\nπŸ’Ύ Saved to premier_league_team_passing.csv")
96
+
97
+ teams = ["Wolves", "Brighton"]
98
+ df_filtered = filter_teams(df, teams)
99
+ print("\nπŸ“Š Passing Stats for Wolves & Brighton (Team Level)")
100
+ print("=" * 70)
101
+ print(df_filtered)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()
.history/fbrefdata_example_20251005092904.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_passing():
15
+ """
16
+ Ambil data passing (otomatis deteksi: tim atau pemain)
17
+ dari halaman FBref Premier League terbaru.
18
+ """
19
+ # URL utama
20
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
21
+ print(f"🌐 Opening browser to download passing stats from {url} ...")
22
+
23
+ # --- Setup browser Chrome ---
24
+ options = ChromeOptions()
25
+ options.add_argument("--start-maximized")
26
+ options.add_argument("--no-sandbox")
27
+ options.add_argument("--disable-dev-shm-usage")
28
+ # options.add_argument("--headless") # aktifkan jika ingin tanpa tampilan browser
29
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
30
+ options.add_experimental_option('useAutomationExtension', False)
31
+
32
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
33
+ driver.get(url)
34
+
35
+ # --- Handle cookie banner (jika muncul) ---
36
+ try:
37
+ wait = WebDriverWait(driver, 10)
38
+ accept_button = wait.until(EC.element_to_be_clickable(
39
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")
40
+ ))
41
+ accept_button.click()
42
+ print("πŸͺ Cookie banner accepted.")
43
+ except TimeoutException:
44
+ print("No cookie banner found or it took too long.")
45
+
46
+ # --- Coba deteksi tabel TIM terlebih dahulu ---
47
+ table_html = None
48
+ try:
49
+ wait = WebDriverWait(driver, 15)
50
+ div_team = wait.until(EC.presence_of_element_located((By.ID, "all_stats_passing_team")))
51
+ print("βœ… Team passing table found.")
52
+ table_html = div_team.get_attribute("outerHTML")
53
+ table_type = "team"
54
+ except TimeoutException:
55
+ print("⚠️ Team passing table not found. Trying player table...")
56
+
57
+ # --- Fallback ke tabel pemain ---
58
+ try:
59
+ div_player = wait.until(EC.presence_of_element_located((By.ID, "all_stats_passing")))
60
+ print("βœ… Player passing table found.")
61
+ table_html = div_player.get_attribute("outerHTML")
62
+ table_type = "player"
63
+ except TimeoutException:
64
+ print("❌ No passing table found at all. Saving debug files...")
65
+ driver.save_screenshot('debug_screenshot.png')
66
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
67
+ f.write(driver.page_source)
68
+ driver.quit()
69
+ return None
70
+
71
+ driver.quit()
72
+ print("πŸ“„ Data downloaded. Processing with pandas...")
73
+
74
+ # --- Parse HTML table ke DataFrame ---
75
+ df = pd.read_html(StringIO(table_html))[0]
76
+ print(f"βœ… Table found with shape: {df.shape}")
77
+
78
+ # Gabungkan header dua baris (jika ada)
79
+ if isinstance(df.columns, pd.MultiIndex):
80
+ df.columns = ['_'.join(col).strip() for col in df.columns.values]
81
+
82
+ # Pilih kolom relevan
83
+ cols_to_use = [c for c in df.columns if any(x in c for x in ['Squad', 'Player', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
84
+ df = df[cols_to_use]
85
+
86
+ # Normalisasi nama kolom
87
+ rename_map = {}
88
+ for c in df.columns:
89
+ if 'Squad' in c: rename_map[c] = 'Squad'
90
+ elif 'Player' in c: rename_map[c] = 'Player'
91
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
92
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
93
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
94
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
95
+ df.rename(columns=rename_map, inplace=True)
96
+
97
+ # Bersihkan baris kosong / header duplikat
98
+ if 'Squad' in df.columns:
99
+ df = df[df['Squad'].notna()]
100
+ df = df[~df['Squad'].str.contains("Squad|Rk", na=False)]
101
+
102
+ print(f"βœ… Cleaned dataframe shape: {df.shape}")
103
+ return df, table_type
104
+
105
+
106
+ def filter_teams(df, teams):
107
+ """Filter baris berdasarkan nama tim"""
108
+ if "Squad" not in df.columns:
109
+ print("⚠️ 'Squad' column not found, skipping team filter.")
110
+ return df
111
+ return df[df["Squad"].isin(teams)]
112
+
113
+
114
+ def main():
115
+ df, table_type = pull_premier_league_passing()
116
+ if df is not None:
117
+ # Simpan hasil
118
+ filename = f"premier_league_{table_type}_passing.csv"
119
+ df.to_csv(filename, index=False)
120
+ print(f"\nπŸ’Ύ Saved to {filename}")
121
+
122
+ # Filter contoh tim
123
+ teams = ["Arsenal", "Wolves", "Brighton"]
124
+ df_filtered = filter_teams(df, teams)
125
+ print(f"\nπŸ“Š Passing Stats ({table_type.title()} Level) for selected teams")
126
+ print("=" * 80)
127
+ print(df_filtered.head())
128
+
129
+
130
+ if __name__ == "__main__":
131
+ main()
.history/fbrefdata_example_20251005093119.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup, Comment
3
+ import pandas as pd
4
+ import re
5
+
6
+ # === 1. URL target (Premier League Passing Stats terbaru) ===
7
+ URL = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
8
+
9
+ print(f"πŸ“‘ Mengambil data dari {URL} ...")
10
+
11
+ # === 2. Ambil HTML page ===
12
+ headers = {
13
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
14
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
15
+ "Chrome/120.0.0.0 Safari/537.36"
16
+ }
17
+ response = requests.get(URL, headers=headers)
18
+
19
+ if response.status_code != 200:
20
+ raise Exception(f"Gagal mengunduh halaman (status code {response.status_code})")
21
+
22
+ html = response.text
23
+
24
+ # === 3. Tangani tabel yang tersembunyi dalam komentar HTML ===
25
+ soup = BeautifulSoup(html, "html.parser")
26
+
27
+ # FBref sering menyembunyikan tabel di dalam komentar <!-- ... -->
28
+ comments = soup.find_all(string=lambda text: isinstance(text, Comment))
29
+ passing_table_html = None
30
+
31
+ for c in comments:
32
+ if 'table' in c and 'passing' in c:
33
+ if 'id="stats_passing' in c:
34
+ passing_table_html = c
35
+ break
36
+
37
+ if not passing_table_html:
38
+ raise Exception("❌ Tabel passing tidak ditemukan. Mungkin struktur halaman berubah.")
39
+
40
+ # === 4. Parse tabel dari komentar ===
41
+ passing_soup = BeautifulSoup(passing_table_html, "html.parser")
42
+ table = passing_soup.find("table")
43
+
44
+ if table is None:
45
+ raise Exception("❌ Tidak bisa mem-parse tabel dari komentar HTML.")
46
+
47
+ # === 5. Konversi ke DataFrame ===
48
+ df = pd.read_html(str(table))[0]
49
+
50
+ # === 6. Bersihkan kolom ===
51
+ df.columns = [' '.join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
52
+ df = df.dropna(how='all') # hapus baris kosong
53
+
54
+ # === 7. Simpan ke CSV ===
55
+ csv_name = "premier_league_passing_2025.csv"
56
+ df.to_csv(csv_name, index=False)
57
+ print(f"βœ… Data berhasil diunduh dan disimpan ke {csv_name}")
58
+
59
+ # === 8. Tampilkan preview ===
60
+ print("\n=== Preview Data ===")
61
+ print(df.head(10))
.history/fbrefdata_example_20251005093129.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup, Comment
3
+ import pandas as pd
4
+ import re
5
+
6
+ # === 1. URL target (Premier League Passing Stats terbaru) ===
7
+ URL = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
8
+
9
+ print(f"πŸ“‘ Mengambil data dari {URL} ...")
10
+
11
+ # === 2. Ambil HTML page ===
12
+ headers = {
13
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
14
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
15
+ "Chrome/120.0.0.0 Safari/537.36"
16
+ }
17
+ response = requests.get(URL, headers=headers)
18
+
19
+ if response.status_code != 200:
20
+ raise Exception(f"Gagal mengunduh halaman (status code {response.status_code})")
21
+
22
+ html = response.text
23
+
24
+ # === 3. Tangani tabel yang tersembunyi dalam komentar HTML ===
25
+ soup = BeautifulSoup(html, "html.parser")
26
+
27
+ # FBref sering menyembunyikan tabel di dalam komentar <!-- ... -->
28
+ comments = soup.find_all(string=lambda text: isinstance(text, Comment))
29
+ passing_table_html = None
30
+
31
+ for c in comments:
32
+ if 'table' in c and 'passing' in c:
33
+ if 'id="stats_passing' in c:
34
+ passing_table_html = c
35
+ break
36
+
37
+ if not passing_table_html:
38
+ raise Exception("❌ Tabel passing tidak ditemukan. Mungkin struktur halaman berubah.")
39
+
40
+ # === 4. Parse tabel dari komentar ===
41
+ passing_soup = BeautifulSoup(passing_table_html, "html.parser")
42
+ table = passing_soup.find("table")
43
+
44
+ if table is None:
45
+ raise Exception("❌ Tidak bisa mem-parse tabel dari komentar HTML.")
46
+
47
+ # === 5. Konversi ke DataFrame ===
48
+ df = pd.read_html(str(table))[0]
49
+
50
+ # === 6. Bersihkan kolom ===
51
+ df.columns = [' '.join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
52
+ df = df.dropna(how='all') # hapus baris kosong
53
+
54
+ # === 7. Simpan ke CSV ===
55
+ csv_name = "premier_league_passing_2025.csv"
56
+ df.to_csv(csv_name, index=False)
57
+ print(f"βœ… Data berhasil diunduh dan disimpan ke {csv_name}")
58
+
59
+ # === 8. Tampilkan preview ===
60
+ print("\n=== Preview Data ===")
61
+ print(df.head(10))
.history/fbrefdata_example_20251005093230.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service as ChromeService
6
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
+
13
+
14
+ def pull_premier_league_passing():
15
+ """
16
+ Ambil data passing (otomatis deteksi: tim atau pemain)
17
+ dari halaman FBref Premier League terbaru.
18
+ """
19
+ # URL utama
20
+ url = "https://fbref.com/en/comps/9/passing/Premier-League-Stats"
21
+ print(f"🌐 Opening browser to download passing stats from {url} ...")
22
+
23
+ # --- Setup browser Chrome ---
24
+ options = ChromeOptions()
25
+ options.add_argument("--start-maximized")
26
+ options.add_argument("--no-sandbox")
27
+ options.add_argument("--disable-dev-shm-usage")
28
+ # options.add_argument("--headless") # aktifkan jika ingin tanpa tampilan browser
29
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
30
+ options.add_experimental_option('useAutomationExtension', False)
31
+
32
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
33
+ driver.get(url)
34
+
35
+ # --- Handle cookie banner (jika muncul) ---
36
+ try:
37
+ wait = WebDriverWait(driver, 10)
38
+ accept_button = wait.until(EC.element_to_be_clickable(
39
+ (By.XPATH, "//button[contains(text(), 'Accept All Cookies')]")
40
+ ))
41
+ accept_button.click()
42
+ print("πŸͺ Cookie banner accepted.")
43
+ except TimeoutException:
44
+ print("No cookie banner found or it took too long.")
45
+
46
+ # --- Coba deteksi tabel TIM terlebih dahulu ---
47
+ table_html = None
48
+ try:
49
+ wait = WebDriverWait(driver, 15)
50
+ div_team = wait.until(EC.presence_of_element_located((By.ID, "all_stats_passing_team")))
51
+ print("βœ… Team passing table found.")
52
+ table_html = div_team.get_attribute("outerHTML")
53
+ table_type = "team"
54
+ except TimeoutException:
55
+ print("⚠️ Team passing table not found. Trying player table...")
56
+
57
+ # --- Fallback ke tabel pemain ---
58
+ try:
59
+ div_player = wait.until(EC.presence_of_element_located((By.ID, "all_stats_passing")))
60
+ print("βœ… Player passing table found.")
61
+ table_html = div_player.get_attribute("outerHTML")
62
+ table_type = "player"
63
+ except TimeoutException:
64
+ print("❌ No passing table found at all. Saving debug files...")
65
+ driver.save_screenshot('debug_screenshot.png')
66
+ with open('debug_page.html', 'w', encoding='utf-8') as f:
67
+ f.write(driver.page_source)
68
+ driver.quit()
69
+ return None
70
+
71
+ driver.quit()
72
+ print("πŸ“„ Data downloaded. Processing with pandas...")
73
+
74
+ # --- Parse HTML table ke DataFrame ---
75
+ df = pd.read_html(StringIO(table_html))[0]
76
+ print(f"βœ… Table found with shape: {df.shape}")
77
+
78
+ # Gabungkan header dua baris (jika ada)
79
+ if isinstance(df.columns, pd.MultiIndex):
80
+ df.columns = ['_'.join(col).strip() for col in df.columns.values]
81
+
82
+ # Pilih kolom relevan
83
+ cols_to_use = [c for c in df.columns if any(x in c for x in ['Squad', 'Player', 'Cmp', 'Att', 'Cmp%', 'TotDist'])]
84
+ df = df[cols_to_use]
85
+
86
+ # Normalisasi nama kolom
87
+ rename_map = {}
88
+ for c in df.columns:
89
+ if 'Squad' in c: rename_map[c] = 'Squad'
90
+ elif 'Player' in c: rename_map[c] = 'Player'
91
+ elif 'Cmp%' in c: rename_map[c] = 'Total_Cmp%'
92
+ elif 'Cmp' in c and 'Cmp%' not in c: rename_map[c] = 'Total_Cmp'
93
+ elif 'Att' in c: rename_map[c] = 'Total_Att'
94
+ elif 'TotDist' in c: rename_map[c] = 'Total_TotDist'
95
+ df.rename(columns=rename_map, inplace=True)
96
+
97
+ # Bersihkan baris kosong / header duplikat
98
+ if 'Squad' in df.columns:
99
+ df = df[df['Squad'].notna()]
100
+ df = df[~df['Squad'].str.contains("Squad|Rk", na=False)]
101
+
102
+ print(f"βœ… Cleaned dataframe shape: {df.shape}")
103
+ return df, table_type
104
+
105
+
106
+ def filter_teams(df, teams):
107
+ """Filter baris berdasarkan nama tim"""
108
+ if "Squad" not in df.columns:
109
+ print("⚠️ 'Squad' column not found, skipping team filter.")
110
+ return df
111
+ return df[df["Squad"].isin(teams)]
112
+
113
+
114
+ def main():
115
+ df, table_type = pull_premier_league_passing()
116
+ if df is not None:
117
+ # Simpan hasil
118
+ filename = f"premier_league_{table_type}_passing.csv"
119
+ df.to_csv(filename, index=False)
120
+ print(f"\nπŸ’Ύ Saved to {filename}")
121
+
122
+ # Filter contoh tim
123
+ teams = ["Arsenal", "Wolves", "Brighton"]
124
+ df_filtered = filter_teams(df, teams)
125
+ print(f"\nπŸ“Š Passing Stats ({table_type.title()} Level) for selected teams")
126
+ print("=" * 80)
127
+ print(df_filtered.head())
128
+
129
+
130
+ if __name__ == "__main__":
131
+ main()
.history/historical_data_20251005104339.py ADDED
File without changes
.history/historical_data_20251005104343.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from selenium import webdriver
3
+ from selenium.webdriver.chrome.service import Service as ChromeService
4
+ from webdriver_manager.chrome import ChromeDriverManager
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.support.ui import WebDriverWait
7
+ from selenium.webdriver.support import expected_conditions as EC
8
+ from io import StringIO
9
+ import time
10
+ import sys
11
+
12
+ # --- FUNGSI UNTUK MENGHITUNG RATA-RATA PASSING % PER TIM ---
13
+ def calculate_team_passing_avg(passing_stats_file):
14
+ """
15
+ Membaca file statistik passing pemain dan menghitung rata-rata
16
+ persentase passing ('Total_Cmp%') untuk setiap tim.
17
+ """
18
+ try:
19
+ df_pass = pd.read_csv(passing_stats_file)
20
+ if "Squad" not in df_pass.columns or "Total_Cmp%" not in df_pass.columns:
21
+ print(f"❌ Error: Kolom 'Squad' atau 'Total_Cmp%' tidak ditemukan di {passing_stats_file}")
22
+ return None
23
+
24
+ # Mengubah tipe data dan menghitung rata-rata
25
+ df_pass['Total_Cmp%'] = pd.to_numeric(df_pass['Total_Cmp%'], errors='coerce')
26
+ team_avg_pass = df_pass.groupby('Squad')['Total_Cmp%'].mean().reset_index()
27
+ team_avg_pass.rename(columns={'Total_Cmp%': 'AvgPass%'}, inplace=True)
28
+ print("βœ… Berhasil menghitung rata-rata passing % per tim.")
29
+ return team_avg_pass
30
+
31
+ except FileNotFoundError:
32
+ print(f"❌ Error: File '{passing_stats_file}' tidak ditemukan.")
33
+ print(" Pastikan file ini ada di folder yang sama.")
34
+ return None
35
+ except Exception as e:
36
+ print(f"❌ Terjadi error saat memproses {passing_stats_file}: {e}")
37
+ return None
38
+
39
+
40
+ # --- FUNGSI UTAMA UNTUK SCRAPING DATA PERTANDINGAN ---
41
+ def scrape_historical_matches():
42
+ """
43
+ Scrape data pertandingan historis dari FBref menggunakan Selenium.
44
+ """
45
+ # URL untuk data Premier League musim 2023-2024 yang sudah selesai
46
+ url = "https://fbref.com/en/comps/9/schedule/2023-2024/Premier-League-Scores-and-Fixtures"
47
+ print(f"🌐 Mengakses halaman: {url}")
48
+
49
+ options = webdriver.ChromeOptions()
50
+ options.add_argument("--headless") # Jalankan di background tanpa membuka browser
51
+ options.add_argument("--no-sandbox")
52
+ options.add_argument("--disable-dev-shm-usage")
53
+ options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36")
54
+
55
+ driver = None
56
+ try:
57
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
58
+ driver.get(url)
59
+
60
+ # Coba klik cookie banner jika ada
61
+ try:
62
+ wait = WebDriverWait(driver, 5)
63
+ accept_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//button[text()="Accept All"]')))
64
+ accept_button.click()
65
+ print("βœ… Cookie banner diterima.")
66
+ time.sleep(2)
67
+ except:
68
+ print("ℹ️ Tidak ada cookie banner atau sudah diterima.")
69
+
70
+ # Ambil HTML dari tabel data pertandingan
71
+ try:
72
+ table_element = WebDriverWait(driver, 10).until(
73
+ EC.presence_of_element_located((By.ID, "sched_2023-2024_9_1"))
74
+ )
75
+ html_source = table_element.get_attribute('outerHTML')
76
+ print("βœ… Berhasil mengambil tabel data pertandingan.")
77
+ return html_source
78
+ except Exception as e:
79
+ print(f"❌ Gagal menemukan tabel pertandingan: {e}")
80
+ return None
81
+
82
+ finally:
83
+ if driver:
84
+ driver.quit()
85
+
86
+ # --- MAIN SCRIPT ---
87
+ if __name__ == "__main__":
88
+ PASSING_STATS_FILE = "premier_league_player_passing.csv"
89
+ OUTPUT_FILE = "historical_matches.csv"
90
+
91
+ # 1. Hitung rata-rata passing dari file yang sudah ada
92
+ team_pass_avg_df = calculate_team_passing_avg(PASSING_STATS_FILE)
93
+ if team_pass_avg_df is None:
94
+ sys.exit()
95
+
96
+ # 2. Scrape data historis pertandingan
97
+ html_table = scrape_historical_matches()
98
+ if html_table is None:
99
+ sys.exit()
100
+
101
+ # 3. Proses data hasil scrape
102
+ print("βš™οΈ Memproses data pertandingan...")
103
+ df_matches = pd.read_html(StringIO(html_table))[0]
104
+
105
+ # Membersihkan data
106
+ df_matches = df_matches[['Date', 'Home', 'Score', 'Away']]
107
+ df_matches.dropna(subset=['Score'], inplace=True)
108
+ df_matches = df_matches[df_matches['Score'].str.contains('–', na=False)]
109
+
110
+ scores = df_matches['Score'].str.split('–', expand=True)
111
+ df_matches['HomeGoals'] = pd.to_numeric(scores[0])
112
+ df_matches['AwayGoals'] = pd.to_numeric(scores[1])
113
+
114
+ print("πŸ”„ Menggabungkan data pertandingan dengan data passing...")
115
+
116
+ # Buat dictionary untuk mapping nama tim ke passing %
117
+ pass_map = {row['Squad']: row['AvgPass%'] for index, row in team_pass_avg_df.iterrows()}
118
+
119
+ def get_pass_perc(team_name):
120
+ if team_name in pass_map:
121
+ return pass_map[team_name]
122
+ for squad_name, perc in pass_map.items():
123
+ if team_name in squad_name or squad_name in team_name:
124
+ return perc
125
+ return team_pass_avg_df['AvgPass%'].mean()
126
+
127
+ df_matches['HomePass%'] = df_matches['Home'].apply(get_pass_perc)
128
+ df_matches['AwayPass%'] = df_matches['Away'].apply(get_pass_perc)
129
+
130
+ # Finalisasi DataFrame
131
+ final_df = df_matches[['Date', 'Home', 'Away', 'HomeGoals', 'AwayGoals', 'HomePass%', 'AwayPass%']]
132
+ final_df = final_df.round(1)
133
+
134
+ # 4. Simpan ke CSV
135
+ try:
136
+ final_df.to_csv(OUTPUT_FILE, index=False)
137
+ print(f"\nπŸŽ‰ SUKSES! File '{OUTPUT_FILE}' berhasil dibuat dengan {len(final_df)} data pertandingan.")
138
+ print(" Sekarang Anda bisa menjalankan script prediksi utama Anda.")
139
+ except Exception as e:
140
+ print(f"❌ Gagal menyimpan file CSV: {e}")