Instantaneous1 commited on
Commit
56f6887
·
0 Parent(s):

first commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model/t20features.feather.joblib filter=lfs diff=lfs merge=lfs -text
2
+ model/odifeatures.feather.joblib filter=lfs diff=lfs merge=lfs -text
.github/workflows/main.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push https://Instantaneous1:$HF_TOKEN@huggingface.co/spaces/Instantaneous1/cricket-prophet main
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ temp/
2
+ data/
3
+ cricsheet/
4
+ catbosst_info/
5
+ depr/
6
+ env/
7
+ __pycache__/
8
+ result/
9
+ static_test/
10
+ temp/
11
+ history/
12
+ .ipynb_checkpoints/
.streamlit/config.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [theme]
2
+ base="dark"
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Cricket-Prophet
2
+
3
+ #cricketprophet is an AI-Ml based cricket score prediction app. It takes account of batting team, current run, fall of wkts and gives a realistic prediction of the final score using a #randomforest
4
+
5
+ Scores are fetched from #cricbuzz site in realtime
6
+
7
+ The app is online at https://cricket-prophet.streamlit.app/
8
+
9
+ It is a better prediction than the projected score as it doesn't only rely on current run rate, but also balls left, wkts left and batting team.
10
+
11
+ #machinelearning #cricket #sportsprediction
12
+
13
+ ## ![Cricket-Prophet](<Screenshot from 2023-10-23 09-13-41.png>)
14
+
15
+ title: Cricket Prophet
16
+ emoji: 📈
17
+ colorFrom: yellow
18
+ colorTo: purple
19
+ sdk: streamlit
20
+ sdk_version: 1.29.0
21
+ app_file: app.py
22
+ pinned: false
23
+
24
+ ---
25
+
26
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
Screenshot from 2023-10-23 09-13-41.png ADDED
__init__.py ADDED
File without changes
cricksheet.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json, os
2
+ import pandas as pd
3
+ from tqdm import tqdm
4
+ from datetime import datetime
5
+
6
+ root = "cricsheet/all_json"
7
+
8
+ # print([json.load(open(os.path.join(root, f)))['meta']['data_version'] for f in os.listdir(root) if json.load(open(os.path.join(root, f)))['meta']['data_version']=='1.1.0'])
9
+ # print(set([json.load(open(os.path.join(root, f)))['info']['match_type'] for f in os.listdir(root) if f.endswith('.json') and json.load(open(os.path.join(root, f)))['meta']['data_version']=='1.1.0']))
10
+
11
+ # formats: 'ODI', 'MDM', 'IT20', 'ODM', 'Test', 'T20'
12
+
13
+
14
+ class Inning(object):
15
+ def __init__(self, df, inning, format):
16
+ self.df = df
17
+ self.inning = inning
18
+ self.final_score = df["run"].sum()
19
+ self.format = format
20
+
21
+ def settarget(self, target):
22
+ if self.inning == 1:
23
+ print("first innning: don't set target")
24
+ self.target = target
25
+
26
+
27
+ def process_inning(ballbyball):
28
+ score = []
29
+ for over in ballbyball["overs"]:
30
+ overall = []
31
+ for ballcount, dlv in enumerate(over["deliveries"]):
32
+ run = dlv["runs"]["total"]
33
+ wicket = len(dlv.get("wickets", []))
34
+ if ballcount < 6:
35
+ overall.append((run, wicket))
36
+ else:
37
+ lastrun, lastwkt = overall.pop()
38
+ overall.append((run + lastrun, wicket + lastwkt))
39
+ score.extend(overall)
40
+ df = pd.DataFrame(score, columns=["run", "wicket"], index=range(1, len(score) + 1))
41
+ df.index.name = "balls"
42
+ return df
43
+
44
+
45
+ def process_matches(matches, format):
46
+ print("processing jsons...")
47
+ ID = 0
48
+ for match in tqdm(matches):
49
+ if len(match) == 2:
50
+ inning1, inning2 = [
51
+ Inning(process_inning(inning), i + 1, format)
52
+ for i, inning in enumerate(match)
53
+ ]
54
+ inning2.settarget(inning1.final_score)
55
+ inning1.battingteam, inning2.battingteam = (
56
+ match[0]["team"],
57
+ match[1]["team"],
58
+ )
59
+ inning1.bowlingteam, inning2.bowlingteam = (
60
+ match[1]["team"],
61
+ match[0]["team"],
62
+ )
63
+ ID += 1
64
+ inning1.matchid = inning2.matchid = ID
65
+ yield inning1
66
+ yield inning2
67
+
68
+
69
+ def get_all_matches(
70
+ format,
71
+ since=1990,
72
+ ):
73
+ matches = []
74
+ print("Loading jsons...")
75
+ for f in tqdm(os.listdir(root)[:]):
76
+ if f.endswith(".json"):
77
+ obj = json.load(open(os.path.join(root, f)))
78
+ if (
79
+ format in obj["info"]["match_type"]
80
+ and int(datetime.strptime(obj["info"]["dates"][0], "%Y-%m-%d").year)
81
+ >= since
82
+ ):
83
+ matches.append(obj["innings"])
84
+ return list(process_matches(matches, format))
85
+
86
+
87
+ # get_all_T20s()
eda.ipynb ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "62252039-37f7-467f-bde2-0a576770d4be",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "id": "e9f9088b-aab7-4faf-a158-5b6e51d1b1bc",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "features = [\n",
21
+ " # \"batting_team\",\n",
22
+ " # \"bowling_team\",\n",
23
+ " # \"balls\",\n",
24
+ " # \"runs\",\n",
25
+ " # \"wickets\",\n",
26
+ " \"wkt_last_5_overs\",\n",
27
+ " # \"runrate_last_5_overs\",\n",
28
+ " \"current_RR\",\n",
29
+ " # \"average\",\n",
30
+ " \"balls_left\",\n",
31
+ " \"wkts_left\",\n",
32
+ " # \"required_RR\",\n",
33
+ " # \"projected_score_more\",\n",
34
+ " # \"min_score_more\",\n",
35
+ " # \"max_score_more\",\n",
36
+ " # \"projected_avg_score_more\",\n",
37
+ " \"runrate_last_5_overs-current_RR\",\n",
38
+ " \"deviation_from_projected\",\n",
39
+ "]\n"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 3,
45
+ "id": "7694e380-b6d3-4d56-af94-66f5233e6c49",
46
+ "metadata": {},
47
+ "outputs": [
48
+ {
49
+ "name": "stderr",
50
+ "output_type": "stream",
51
+ "text": [
52
+ "/media/instantinopaul/data/Code/ML/github.com/scorepredictor/env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
53
+ " from .autonotebook import tqdm as notebook_tqdm\n"
54
+ ]
55
+ }
56
+ ],
57
+ "source": [
58
+ "from ydata_profiling import ProfileReport"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": 4,
64
+ "id": "4845035c-3501-4eb0-a67b-0bbc5e289a8b",
65
+ "metadata": {},
66
+ "outputs": [
67
+ {
68
+ "name": "stderr",
69
+ "output_type": "stream",
70
+ "text": [
71
+ "/media/instantinopaul/data/Code/ML/github.com/scorepredictor/env/lib/python3.10/site-packages/ydata_profiling/utils/dataframe.py:137: SettingWithCopyWarning: \n",
72
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
73
+ "\n",
74
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
75
+ " df.rename(columns={\"index\": \"df_index\"}, inplace=True)\n",
76
+ "Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]/media/instantinopaul/data/Code/ML/github.com/scorepredictor/env/lib/python3.10/site-packages/ydata_profiling/model/typeset.py:125: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n",
77
+ " not pdt.is_categorical_dtype(series)\n",
78
+ "Summarize dataset: 100%|██████████| 51/51 [00:11<00:00, 4.50it/s, Completed] \n",
79
+ "Generate report structure: 100%|██████████| 1/1 [00:02<00:00, 2.68s/it]\n",
80
+ "Render HTML: 100%|██████████| 1/1 [00:01<00:00, 1.43s/it]\n",
81
+ "Export report to file: 100%|██████████| 1/1 [00:00<00:00, 210.06it/s]\n"
82
+ ]
83
+ }
84
+ ],
85
+ "source": [
86
+ "df=pd.read_feather('data/t20features.feather')\n",
87
+ "r=ProfileReport(df[features])\n",
88
+ "r.to_file('result/profilereportT20.html')"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": 5,
94
+ "id": "1272e5c3-2a60-4966-b70a-43f49101a5a9",
95
+ "metadata": {
96
+ "scrolled": true
97
+ },
98
+ "outputs": [
99
+ {
100
+ "name": "stderr",
101
+ "output_type": "stream",
102
+ "text": [
103
+ "/media/instantinopaul/data/Code/ML/github.com/scorepredictor/env/lib/python3.10/site-packages/ydata_profiling/utils/dataframe.py:137: SettingWithCopyWarning: \n",
104
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
105
+ "\n",
106
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
107
+ " df.rename(columns={\"index\": \"df_index\"}, inplace=True)\n",
108
+ "Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]/media/instantinopaul/data/Code/ML/github.com/scorepredictor/env/lib/python3.10/site-packages/ydata_profiling/model/typeset.py:208: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n",
109
+ " is_valid_dtype = pdt.is_categorical_dtype(series) and not pdt.is_bool_dtype(\n",
110
+ "Summarize dataset: 100%|██████████| 51/51 [00:07<00:00, 7.05it/s, Completed] \n",
111
+ "Generate report structure: 100%|██████████| 1/1 [00:02<00:00, 2.76s/it]\n",
112
+ "Render HTML: 100%|██████████| 1/1 [00:01<00:00, 1.32s/it]\n",
113
+ "Export report to file: 100%|██████████| 1/1 [00:00<00:00, 121.46it/s]\n"
114
+ ]
115
+ }
116
+ ],
117
+ "source": [
118
+ "df=pd.read_feather('data/odifeatures.feather')\n",
119
+ "r=ProfileReport(df[features])\n",
120
+ "r.to_file('result/profilereportODI.html')"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": null,
126
+ "id": "e55fd847-05c8-47dc-aedc-9b96c23b4aa6",
127
+ "metadata": {},
128
+ "outputs": [],
129
+ "source": []
130
+ }
131
+ ],
132
+ "metadata": {
133
+ "kernelspec": {
134
+ "display_name": "cricpred",
135
+ "language": "python",
136
+ "name": "cricpred"
137
+ },
138
+ "language_info": {
139
+ "codemirror_mode": {
140
+ "name": "ipython",
141
+ "version": 3
142
+ },
143
+ "file_extension": ".py",
144
+ "mimetype": "text/x-python",
145
+ "name": "python",
146
+ "nbconvert_exporter": "python",
147
+ "pygments_lexer": "ipython3",
148
+ "version": "3.10.12"
149
+ }
150
+ },
151
+ "nbformat": 4,
152
+ "nbformat_minor": 5
153
+ }
features.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess, sys
2
+ from multiprocessing import Pool
3
+ import pandas as pd, json, os, math
4
+ import numpy as np
5
+ from tqdm import tqdm
6
+ from sklearn.model_selection import train_test_split
7
+ import matplotlib.pyplot as plt
8
+ from cricksheet import get_all_matches
9
+
10
+ # import ydata_profiling
11
+
12
+
13
+ ## Reading IPL dataset
14
+ total_wickets = 10
15
+ n_pools = 100
16
+
17
+
18
+ ## Feature selection/creation and ngram creation
19
+
20
+ features = [
21
+ "matchid",
22
+ "format",
23
+ "inning",
24
+ "batting_team",
25
+ "bowling_team",
26
+ "balls",
27
+ "runs",
28
+ "wickets",
29
+ "wkt_last_5_overs",
30
+ "runrate_last_5_overs",
31
+ "runrate_last_5_overs-current_RR",
32
+ "current_RR",
33
+ # "average",
34
+ "balls_left",
35
+ "wkts_left",
36
+ # "required_RR",
37
+ # "projected_score_more",
38
+ # "min_score_more",
39
+ # "max_score_more",
40
+ # "projected_avg_score_more",
41
+ "final_score",
42
+ "final_score_more",
43
+ "deviation_from_projected",
44
+ ]
45
+
46
+ getformat = {"ODI": 1, "T20": 2}
47
+
48
+
49
+ def extract_features(inning):
50
+ data = []
51
+ # total_balls = (
52
+ # 120 if inning.format == "T20" else 300 if inning.format == "ODI" else None
53
+ # )
54
+ total_balls = len(inning.df)
55
+ df = inning.df
56
+ # matchid = inning.matchid
57
+ # batting_team = inning.battingteam
58
+
59
+ for i in range(1, len(df)):
60
+ min_RR = 0.5
61
+ max_RR = 2.5
62
+ runs = df.iloc[:i]["run"].sum()
63
+ run_last_5_overs = df["run"].iloc[-30:].sum()
64
+ runrate_last_5_overs = run_last_5_overs / 6
65
+
66
+ wickets = df.iloc[:i]["wicket"].sum()
67
+ wkt_last_5_overs = df.iloc[:i]["wicket"].iloc[-30:].sum()
68
+
69
+ balls = len(df.iloc[:i])
70
+
71
+ current_RR = (runs * 6) / balls
72
+ rr_diff = runrate_last_5_overs - current_RR
73
+ average = runs / (wickets + 1)
74
+
75
+ balls_left = total_balls - balls
76
+ wk_left = total_wickets - wickets
77
+
78
+ required_RR = (
79
+ ((inning.target - runs) * 6) / balls if inning.inning == 2 else -9999
80
+ )
81
+
82
+ projected_score_more = current_RR * balls_left / 6
83
+ min_score_more = min_RR * balls_left / 6
84
+ max_score_more = max_RR * balls_left / 6
85
+ projected_avg_score_more = average * wk_left / 6
86
+
87
+ final_score_more = inning.final_score - runs
88
+ format = getformat[inning.format]
89
+
90
+ deviation_from_projected = final_score_more - projected_score_more
91
+ data.append(
92
+ (
93
+ inning.matchid,
94
+ format,
95
+ inning.inning,
96
+ inning.battingteam,
97
+ inning.bowlingteam,
98
+ balls,
99
+ runs,
100
+ wickets,
101
+ wkt_last_5_overs,
102
+ round(runrate_last_5_overs, 2),
103
+ round(rr_diff, 2),
104
+ round(current_RR, 2),
105
+ # average,
106
+ balls_left,
107
+ wk_left,
108
+ # required_RR,
109
+ # projected_score_more,
110
+ # min_score_more,
111
+ # max_score_more,
112
+ # projected_avg_score_more,
113
+ inning.final_score,
114
+ final_score_more,
115
+ round(deviation_from_projected),
116
+ )
117
+ )
118
+ return data
119
+
120
+
121
+ def save_features(innings, fname):
122
+ print("Feature enggineering and ngram creation...")
123
+
124
+ n_innings = len(innings)
125
+ print(f"{n_innings=}")
126
+ pool = Pool(processes=n_pools)
127
+ Xy = pool.map(extract_features, innings)
128
+
129
+ Xy = [xi for Xi in Xy for xi in Xi]
130
+ print(f"{len(Xy)=}")
131
+ featuresdf = pd.DataFrame(Xy, columns=features)
132
+ # ydata_profiling.ProfileReport(featuresdf, title=fname).to_file(fname + ".html")
133
+ featuresdf.to_feather(fname)
134
+ featuresdf.to_csv(fname + ".csv")
135
+
136
+
137
+ if __name__ == "__main__":
138
+ print("Loading t20 data...")
139
+ innings = get_all_matches(format="T20", since=2021)
140
+ print("Saving t20 data")
141
+ save_features(innings, "data/t20features.feather")
142
+
143
+ print("Loading odi data...")
144
+ innings = get_all_matches(format="ODI", since=2021)
145
+ print("Saving odi data")
146
+ save_features(innings, "data/odifeatures.feather")
model.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.preprocessing import LabelEncoder, MinMaxScaler
5
+ import math
6
+ import matplotlib.pyplot as plt, joblib
7
+
8
+
9
+ # from sklearn.linear_model import LinearRegression
10
+ from sklearn.ensemble import RandomForestRegressor
11
+
12
+ # from sklearn.tree import DecisionTreeRegressor
13
+
14
+ # from catboost import CatBoostRegressor
15
+ import warnings, random
16
+ from sklearn.metrics import mean_absolute_error as mae
17
+ from sklearn.metrics import mean_squared_error as mse
18
+
19
+ # from sklearn import tree
20
+ # from sklearn.svm import SVR
21
+ # from sklearn.ensemble import VotingRegressor
22
+ import os
23
+
24
+ warnings.filterwarnings("ignore")
25
+ features = [
26
+ "batting_team",
27
+ # "bowling_team",
28
+ # "balls",
29
+ # "runs",
30
+ # "wickets",
31
+ "wkt_last_5_overs",
32
+ # "runrate_last_5_overs",
33
+ "current_RR",
34
+ # "average",
35
+ "balls_left",
36
+ "wkts_left",
37
+ # "required_RR",
38
+ # "projected_score_more",
39
+ # "min_score_more",
40
+ # "max_score_more",
41
+ # "projected_avg_score_more",
42
+ "runrate_last_5_overs-current_RR",
43
+ ]
44
+ target = "deviation_from_projected"
45
+
46
+
47
+ # evaluate
48
+ def evaluate(model, featuresdf, x_test, fname):
49
+ predictdf = featuresdf.loc[x_test.index].copy()
50
+ # print(predictdf.columns)
51
+ predictdf["h_deviation_from_projected"] = model.predict(
52
+ featuresdf.loc[x_test.index][features]
53
+ )
54
+ predictdf["error"] = (
55
+ predictdf["h_deviation_from_projected"] - predictdf["deviation_from_projected"]
56
+ )
57
+ predictdf["abs_error"] = predictdf["error"].abs()
58
+ ax = plt.gca()
59
+ plt.plot(predictdf.groupby("balls").aggregate({"abs_error": "mean"}))
60
+ plt.legend("Abs deviation")
61
+
62
+ # ax.set_ylim([-50, 50])
63
+ plt.title(type(model).__name__)
64
+ plt.xlabel("Balls on which prediction was made")
65
+ plt.ylabel("Mean Abs Prediction error")
66
+ plt.savefig("result/" + fname + ".png")
67
+ plt.clf()
68
+ predictdf.sample(frac=0.0001).to_csv("result/" + fname + "_sample.csv")
69
+ # fig = plt.figure(figsize=(25, 20))
70
+ # tree.plot_tree(model)
71
+ # fig.savefig(fname + ".png")
72
+ # plt.clf()batting_teamsort_values("overs", ascending=False).to_string(index=False))
73
+
74
+
75
+ def train_test_split_matchid(df, matchids, split=0.2):
76
+ unique_match_ids = set(matchids)
77
+ print(f"{len(unique_match_ids)=}")
78
+ testids = random.sample(unique_match_ids, int(len(unique_match_ids) * split))
79
+ trainids = list(unique_match_ids.difference(testids))
80
+ return (
81
+ df[features][df.matchid.isin(trainids)],
82
+ df[features][df.matchid.isin(testids)],
83
+ df[target][df.matchid.isin(trainids)],
84
+ df[target][df.matchid.isin(testids)],
85
+ )
86
+
87
+
88
+ def encode_teams(series):
89
+ encoder = LabelEncoder()
90
+ encoder.fit(series)
91
+ np.save("model/team.npy", encoder.classes_)
92
+
93
+
94
+ def transform_teams(series):
95
+ encoder = LabelEncoder()
96
+ encoder.classes_ = np.load("model/team.npy", allow_pickle=True)
97
+ return encoder.transform(np.array(series).reshape(-1, 1)).reshape(-1)
98
+
99
+
100
+ def plot_feature_importance(f, imp, fname):
101
+ importance = (
102
+ pd.DataFrame(
103
+ zip(*[f, imp]),
104
+ columns=["feature", "importance"],
105
+ )
106
+ .sort_values("importance", ascending=False)
107
+ .set_index("feature")
108
+ )
109
+ importance["importance"] = importance["importance"] / importance["importance"].sum()
110
+ fig, ax = plt.subplots()
111
+ importance.plot.bar(ax=ax)
112
+ ax.bar_label(ax.containers[0], labels=f, rotation=90, label_type="center")
113
+ ax.set_xticks([])
114
+ ax.set_title("Feature importances for predicted score " + fname)
115
+ ax.set_ylabel("Significance")
116
+ ax.set_xlabel("Features")
117
+ plt.savefig("result/" + fname + "featureimp.png")
118
+ plt.clf()
119
+
120
+
121
+ def train(fname, max_depth=-1):
122
+ print("training on", fname, "...")
123
+ featuresdf = pd.read_feather(fname)
124
+ featuresdf = featuresdf[featuresdf["inning"] == 2]
125
+ encode_teams(
126
+ featuresdf["batting_team"].to_list() + featuresdf["bowling_team"].to_list()
127
+ )
128
+ featuresdf["batting_team"] = transform_teams(featuresdf["batting_team"])
129
+ featuresdf["bowling_team"] = transform_teams(featuresdf["bowling_team"])
130
+ x_train, x_test, y_train, y_test = train_test_split_matchid(
131
+ featuresdf, featuresdf["matchid"], 0.2
132
+ )
133
+ print(f"{len(x_train)=} {len(x_test)=}")
134
+
135
+ model = RandomForestRegressor(max_depth=8)
136
+ model.fit(x_train, y_train)
137
+
138
+ # for xgb
139
+ # plot_feature_importance(
140
+ # model.get_booster().get_score(importance_type="gain").keys(),
141
+ # model.get_booster().get_score(importance_type="gain").values(),
142
+ # fname,
143
+ # )
144
+
145
+ # for rf
146
+ plot_feature_importance(
147
+ features,
148
+ np.std([tree.feature_importances_ for tree in model.estimators_], axis=0),
149
+ os.path.basename(fname),
150
+ )
151
+ print("Depth:", [e.tree_.max_depth for e in model.estimators_])
152
+
153
+ # for dt
154
+ # plot_feature_importance(
155
+ # features,
156
+ # model.feature_importances_,
157
+ # fname,
158
+ # )
159
+ # print(model.tree_.max_depth)
160
+
161
+ # print(f"{model.score(x_train, y_train)=}, {model.score(x_test, y_test)=}")
162
+ print(
163
+ f"{mse(model.predict(x_train), y_train, squared=False)=}, {mse(model.predict(x_test), y_test, squared=False)=}"
164
+ )
165
+ evaluate(model, featuresdf, x_test, os.path.basename(fname))
166
+ model.fit(featuresdf[features], featuresdf[target])
167
+
168
+ joblib.dump(model, f"model/{os.path.basename(fname)}.joblib")
169
+ return model
170
+
171
+
172
+ if __name__ == "__main__":
173
+ train("data/t20features.feather")
174
+ train("data/odifeatures.feather")
model/odifeatures.feather.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:901b2b44a7095ce014a98f2dc5989d06691bb1c39ff9ea6e0a3496c3eb44331d
3
+ size 3497985
model/t20features.feather.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10fc5bd06a3cf347b0eede72942802189138cea8e607a889330f59565fc87db8
3
+ size 3706353
model/team.npy ADDED
Binary file (2.34 kB). View file
 
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ chromium
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scikit-learn
2
+ pandas
3
+ numpy
4
+ streamlit-echarts
5
+ streamlit
6
+ tqdm
7
+ matplotlib
8
+ beautifulsoup4
9
+ selenium
10
+ webdriver-manager
11
+ chromedriver_autoinstaller
12
+ seleniumbase
13
+ streamlit-analytics
14
+ ydata-profiling
scrape.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import re
4
+ from urllib.parse import urljoin
5
+ import numpy as np
6
+ from sklearn.preprocessing import LabelEncoder
7
+ import traceback
8
+ from selenium import webdriver
9
+ from selenium.webdriver.chrome.service import Service
10
+
11
+ import chromedriver_autoinstaller
12
+ from selenium.common import exceptions
13
+
14
+
15
+ chromedriver_autoinstaller.install()
16
+
17
+
18
+ options = webdriver.ChromeOptions()
19
+ options.add_argument("--headless")
20
+ options.add_argument("--disable-dev-shm-usage")
21
+ options.add_argument("--no-sandbox")
22
+
23
+
24
+ def selnium(url):
25
+ try:
26
+ driver = webdriver.Chrome(options=options)
27
+ driver.get(url)
28
+ with open("temp/temp.html", "w+") as f:
29
+ f.write(driver.page_source)
30
+ driver.quit()
31
+ return True
32
+ except exceptions.InvalidSessionIdException as e:
33
+ print(traceback.format_exc())
34
+ print(e.message)
35
+ return False
36
+ except BaseException as e:
37
+ print(traceback.format_exc())
38
+ print(e.message)
39
+ return False
40
+
41
+
42
+ def get_batting_team(soup, status, inning, teams_this_match):
43
+ # teams_this_match = sorted(
44
+ # np.load("team.npy", allow_pickle=True),
45
+ # key=lambda x: soup.text.lower().count(x.lower()),
46
+ # )[-2:]
47
+ # print(f"{teams_this_match=}")
48
+ batting_team = ""
49
+ if inning == 2:
50
+ batting_team = status.split("need")[0].strip()
51
+ for idx, team in enumerate(teams_this_match):
52
+ if team.lower() in batting_team.lower():
53
+ batting_team = team
54
+ else:
55
+ for idx, team in enumerate(teams_this_match):
56
+ if team.lower() in status.lower():
57
+ if "opt to bowl" in status.lower():
58
+ batting_team = teams_this_match[int(~idx)]
59
+ elif "opt to bat" in status.lower():
60
+ batting_team = team
61
+ else:
62
+ print("Could not get batting team)")
63
+ bowling_team = list(set(teams_this_match).difference([batting_team]))[0]
64
+ print(f"{batting_team=}, {bowling_team=}")
65
+ batting_team_enc, bowling_team_enc = None, None
66
+ le = LabelEncoder()
67
+ le.classes_ = np.load("model/team.npy", allow_pickle=True)
68
+ if batting_team in le.classes_:
69
+ batting_team_enc = le.transform([batting_team])[0]
70
+ if bowling_team in le.classes_:
71
+ bowling_team_enc = le.transform([bowling_team])[0]
72
+ return batting_team, bowling_team, batting_team_enc, bowling_team_enc
73
+
74
+
75
+ def scrape(url):
76
+ try:
77
+ if selnium(url) is False:
78
+ return ("Selenium scrape error",)
79
+ soup = BeautifulSoup(open("temp/temp.html", "r").read(), "html.parser")
80
+ # print("Debug>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>.", soup.text)
81
+ matchState = re.findall(
82
+ 'var matchState ="([\da-zA-Z]*)"',
83
+ "\n".join(map(lambda x: x.text, soup.find_all("script"))),
84
+ )[0].lower()
85
+ print(f"{matchState=}")
86
+ title = soup.find_all("title")[0].text
87
+ format = re.findall(
88
+ 'var matchFormat = "([\da-zA-Z]*)"',
89
+ "\n".join(map(lambda x: x.text, soup.find_all("script"))),
90
+ )[0]
91
+ print(f"{format=}")
92
+ if format not in {"ODI", "T20"}:
93
+ raise BaseException("Not ODI or T20")
94
+ status = (
95
+ soup.find_all("div", {"class": "cb-text-inprogress"})[0].text
96
+ if matchState == "inprogress"
97
+ else soup.find_all("div", {"class": "cb-text-complete"})[0].text
98
+ if matchState == "complete"
99
+ else soup.find_all("div", {"class": "cb-text-inningsbreak"})[0].text
100
+ if matchState == "inningsbreak"
101
+ else ""
102
+ )
103
+ score = (
104
+ soup.find_all("div", {"class": "cb-min-bat-rw"})[0].text
105
+ if matchState in ["complete", "inprogress", "inningbreak"]
106
+ else ""
107
+ )
108
+ if matchState != "inprogress":
109
+ return (
110
+ matchState,
111
+ score,
112
+ None,
113
+ None,
114
+ None,
115
+ None,
116
+ None,
117
+ None,
118
+ None,
119
+ None,
120
+ format,
121
+ title,
122
+ status,
123
+ None,
124
+ None,
125
+ None,
126
+ None,
127
+ None,
128
+ )
129
+ teams_this_match = re.match(
130
+ r"(.*) vs (.*)",
131
+ soup.find_all("a", {"class": "cb-nav-tab"})[0]["title"].split(",")[0],
132
+ ).groups()
133
+ print(f"{teams_this_match=}")
134
+
135
+ data = re.findall("(\d+)/(\d+) \(([\.\d]+)\)", soup.text)
136
+ runs, wkts, overs = map(float, data[-1])
137
+ print(f"{runs=}, {wkts=}, {overs=}")
138
+
139
+ if overs >= 5:
140
+ last_5_ovs = (
141
+ soup.find_all("span", string="Last 5 overs")[0].findNext("span").text
142
+ )
143
+ run_last_5_overs, wkt_last_5_overs = map(
144
+ float, re.match("(\d+) runs, (\d+) wkts", last_5_ovs).groups()
145
+ )
146
+ else:
147
+ run_last_5_overs, wkt_last_5_overs = runs, wkts
148
+ print(f"{run_last_5_overs=}, {wkt_last_5_overs=}")
149
+
150
+ req_rr = -9999
151
+ if soup.find_all("span", string="\xa0\xa0REQ:\xa0"):
152
+ reqdata = (
153
+ soup.find_all("span", string="\xa0\xa0REQ:\xa0")[0]
154
+ .findNext("span")
155
+ .text
156
+ )
157
+ if reqdata.strip() != "":
158
+ req_rr = list(map(float, re.match("([\d\.]+)", reqdata).groups()))[0]
159
+ else:
160
+ print("REQ_RR not parsed")
161
+
162
+ crr = -9999
163
+ if soup.find_all("span", string="\xa0\xa0CRR:\xa0"):
164
+ crrdata = (
165
+ soup.find_all("span", string="\xa0\xa0CRR:\xa0")[0]
166
+ .findNext("span")
167
+ .text
168
+ )
169
+ if crrdata.strip() != "":
170
+ crr = list(map(float, re.match("([\d\.]+)", crrdata).groups()))[0]
171
+ else:
172
+ print("CRR not parsed")
173
+
174
+ print(f"{crr=}, {req_rr=}")
175
+
176
+ inning = 2 if req_rr > 0 else 1
177
+ (
178
+ batting_team,
179
+ bowling_team,
180
+ batting_team_enc,
181
+ bowling_team_enc,
182
+ ) = get_batting_team(soup, status, inning, teams_this_match)
183
+
184
+ req = -9999
185
+ if inning == 2:
186
+ req = int(re.match(r".*need (\d+) runs", status).groups()[0])
187
+ print(f"{req=}")
188
+ else:
189
+ print("Not chasing so target not set")
190
+
191
+ return (
192
+ matchState,
193
+ score,
194
+ run_last_5_overs,
195
+ wkt_last_5_overs,
196
+ runs,
197
+ wkts,
198
+ overs,
199
+ req_rr,
200
+ req,
201
+ crr,
202
+ format,
203
+ title,
204
+ status,
205
+ batting_team,
206
+ bowling_team,
207
+ batting_team_enc,
208
+ bowling_team_enc,
209
+ inning,
210
+ )
211
+ except BaseException as e:
212
+ print(traceback.format_exc())
213
+ return (str(e),)
214
+
215
+
216
+ def get_live_matches(url):
217
+ if selnium(url) is False:
218
+ return None
219
+ soup = BeautifulSoup(open("temp/temp.html", "r").read(), "html.parser")
220
+ matches = soup.find_all("a", {"class": "cb-mat-mnu-itm cb-ovr-flo"})
221
+ return {
222
+ m.text: urljoin(url, m.get("href"))
223
+ for m in matches
224
+ if m not in soup.find_all("a", {"id": "live-scores-link"})
225
+ }
226
+
227
+
228
+ if __name__ == "__main__":
229
+ url = "https://cricbuzz.com/live-cricket-scores/79055/wa-vs-saus-3rd-match-australia-domestic-one-day-cup-2023-24"
230
+ print(scrape(url))
231
+ # print(get_live_matches("https://cricbuzz.com"))
serve.py ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from scrape import scrape, get_live_matches
2
+ import pandas as pd
3
+ import streamlit as st
4
+ from streamlit_echarts import st_echarts
5
+ import joblib
6
+ import numpy as np
7
+ import math, os
8
+ import datetime, time
9
+ import matplotlib.pyplot as plt
10
+
11
+ import pathlib
12
+
13
+ for folder in ["data", "model", "history", "result", "temp"]:
14
+ pathlib.Path(folder).mkdir(parents=True, exist_ok=True)
15
+
16
+ # ## Test on realdata
17
+
18
+ # In[16]:
19
+
20
+ from model import features
21
+ import streamlit_analytics
22
+
23
+ # features = [
24
+ # "batting_team",
25
+ # "balls",
26
+ # "runs",
27
+ # "wickets",
28
+ # "wkt_last_5_overs",
29
+ # "runrate_last_5_overs",
30
+ # "current_RR",
31
+ # "average",
32
+ # "balls_left",
33
+ # "wkts_left",
34
+ # "required_RR",
35
+ # "projected_score_more",
36
+ # "min_score_more",
37
+ # "max_score_more",
38
+ # "projected_avg_score_more",
39
+ # ]
40
+
41
+ all_teams_enc = list(range(len(np.load("model/team.npy", allow_pickle=True))))
42
+
43
+
44
+ def overtoball(over):
45
+ over = str(over)
46
+ full = int(over.split(".")[0]) * 6
47
+ part = min(int(over.split(".")[-1]), 6)
48
+ print(f"{over=}", "balls=", full + part)
49
+ return full + part
50
+
51
+
52
+ def save_history(fname, row, total_balls):
53
+ row.to_csv(
54
+ os.path.join("history", fname),
55
+ mode="a" if os.path.isfile(os.path.join("history", fname)) else "w+",
56
+ header=not os.path.isfile(os.path.join("history", fname)),
57
+ )
58
+ fig, ax = plt.subplots()
59
+ historydf = pd.read_csv(os.path.join("history", fname))
60
+ balls = (total_balls - historydf["balls_left"]).to_list()
61
+ runs = historydf["runs"].astype(int).to_list()
62
+ ax.plot(balls, runs, label="So Far")
63
+ balls.append(total_balls)
64
+ pred_runs = runs + [historydf["predicted"].astype(int).iloc[-1]]
65
+ ax.plot(balls[-2:], pred_runs[-2:], label="Predicted")
66
+ proj_runs = runs + [historydf["projected"].astype(int).iloc[-1]]
67
+ ax.plot(balls[-2:], proj_runs[-2:], label="Projected")
68
+ ax.annotate(str(runs[-1]), xy=(balls[-2], runs[-1]))
69
+ ax.annotate(str(pred_runs[-1]), xy=(balls[-1], pred_runs[-1]))
70
+ ax.annotate(str(proj_runs[-1]), xy=(balls[-1], proj_runs[-1]))
71
+ plt.xlim([0, total_balls])
72
+ plt.ylim([0, max(pred_runs[-1], proj_runs[-1]) + 100])
73
+ ax.set_xlabel("Balls")
74
+ ax.set_ylabel("Runs")
75
+ ax.legend()
76
+ return fig
77
+
78
+
79
+ def load_model(format):
80
+ return joblib.load(
81
+ "model/"
82
+ + (
83
+ "t20features.feather.joblib"
84
+ if format == "T20"
85
+ else "odifeatures.feather.joblib"
86
+ if format == "ODI"
87
+ else None
88
+ )
89
+ )
90
+
91
+
92
+ def simulator(args, format):
93
+ inputdf = pd.DataFrame([args.values()], columns=args.keys())
94
+ model = load_model(format)
95
+ h = model.predict(inputdf)
96
+ return h
97
+
98
+
99
+ def predict(url):
100
+ fname = "".join(list(filter(str.isalnum, url))) + ".csv"
101
+ ret = scrape(url)
102
+ print(ret)
103
+ if len(ret) == 1:
104
+ err = ret[0]
105
+ return [err]
106
+ else:
107
+ (
108
+ matchState,
109
+ score,
110
+ run_last_5_overs,
111
+ wkt_last_5_overs,
112
+ runs,
113
+ wkts,
114
+ overs,
115
+ req_rr,
116
+ req,
117
+ current_rr,
118
+ format,
119
+ title,
120
+ status,
121
+ batting_team,
122
+ bowling_team,
123
+ batting_team_enc,
124
+ bowling_team_enc,
125
+ inning,
126
+ ) = ret
127
+ if matchState != "inprogress":
128
+ return matchState, None, score, format, title, status, None, None, None, None
129
+
130
+ total_balls = 120 if format == "T20" else 300 if format == "ODI" else None
131
+ balls = overtoball(overs)
132
+ rr_last_5_overs = (int(run_last_5_overs) * 6) / min(30, balls)
133
+ # current_rr = (runs * 6) / balls
134
+ avg = runs / (wkts + 1)
135
+ req_rr = req_rr
136
+ wkts_left = 10 - wkts
137
+ balls_left = (total_balls - balls) if inning == 1 else math.ceil(req * 6 / req_rr)
138
+ min_score_avg, max_score_avg = (
139
+ math.ceil(balls_left * 0.5),
140
+ math.ceil(balls_left * 3),
141
+ )
142
+ rr_diff = rr_last_5_overs - current_rr
143
+ inputs = {
144
+ "batting_team": batting_team_enc,
145
+ "balls": balls,
146
+ "runs": runs,
147
+ "wickets": wkts,
148
+ "wkt_last_5_overs": wkt_last_5_overs,
149
+ "runrate_last_5_overs": rr_last_5_overs,
150
+ "current_RR": current_rr,
151
+ "runrate_last_5_overs-current_RR": rr_diff,
152
+ "average": avg,
153
+ "balls_left": int(balls_left),
154
+ "wkts_left": int(wkts_left),
155
+ "required_RR": -9999,
156
+ "projected_score_more": math.ceil(balls_left * ((runs) / (balls))),
157
+ "min_score_more": math.ceil(balls_left * 0.5),
158
+ "max_score_more": math.ceil(balls_left * 3),
159
+ "projected_avg_score_more": math.ceil((10 - wkts) * runs / (1 + wkts)),
160
+ }
161
+ inputdf = pd.DataFrame(inputs, index=[0])
162
+ if batting_team_enc is None:
163
+ inputdf = inputdf.drop(columns=["batting_team"])
164
+ inputdf = pd.concat([inputdf] * len(all_teams_enc))
165
+ inputdf["batting_team"] = all_teams_enc
166
+ inputdf = inputdf[features]
167
+ model = load_model(format)
168
+ h = model.predict(inputdf)
169
+ print(f"{h=}")
170
+ projected_score_more = balls_left * current_rr / 6
171
+ projected = math.ceil(projected_score_more + runs)
172
+ predicted_score_more = math.ceil(h.mean() + projected_score_more)
173
+ # predicted_score_more = min(max(min_score_avg, predicted_score_more), max_score_avg)
174
+ predicted = runs + predicted_score_more
175
+
176
+ print(f"{runs=}, {projected=}, {predicted=}")
177
+ inputdf["timestamp"] = datetime.datetime.now()
178
+ inputdf["runs"] = runs
179
+ if inning == 2:
180
+ target = req + runs
181
+ print(f"{target=}")
182
+ inputdf["target"] = target
183
+ batting_team_win = int(predicted - target)
184
+ else:
185
+ batting_team_win = None
186
+ inputdf["target"] = -9999
187
+ inputdf["predicted"] = int(predicted)
188
+ inputdf["projected"] = int(projected)
189
+ print(inputdf.to_string())
190
+ fig = save_history(fname, inputdf, total_balls)
191
+
192
+ return (
193
+ matchState,
194
+ predicted,
195
+ score,
196
+ format,
197
+ title,
198
+ status,
199
+ inning,
200
+ batting_team,
201
+ batting_team_win,
202
+ fig,
203
+ )
204
+
205
+
206
+ def getoption(predicted, maxscore):
207
+ return {
208
+ "series": [
209
+ {
210
+ "type": "gauge",
211
+ "startAngle": 180,
212
+ "endAngle": 0,
213
+ "min": 0,
214
+ "max": maxscore,
215
+ "center": ["50%", "50%"],
216
+ "splitNumber": 4,
217
+ "axisLine": {
218
+ "lineStyle": {
219
+ "width": 6,
220
+ "color": [
221
+ [0.25, "#FF403F"],
222
+ [0.5, "#FDDD60"],
223
+ [0.75, "#00FF00"],
224
+ [1, "#0000FF"],
225
+ ],
226
+ }
227
+ },
228
+ "pointer": {
229
+ "icon": "path://M12.8,0.7l12,40.1H0.7L12.8,0.7z",
230
+ "length": "12%",
231
+ "width": 30,
232
+ "offsetCenter": [0, "-60%"],
233
+ "itemStyle": {"color": "auto"},
234
+ },
235
+ "axisTick": {
236
+ "length": 10,
237
+ "lineStyle": {"color": "auto", "width": 2},
238
+ },
239
+ "splitLine": {
240
+ "length": 15,
241
+ "lineStyle": {"color": "auto", "width": 5},
242
+ },
243
+ "axisLabel": {
244
+ "fontSize": 12,
245
+ "distance": -60,
246
+ },
247
+ "title": {
248
+ "offsetCenter": [0, "-20%"],
249
+ "fontSize": 20,
250
+ "color": "#0000FF"
251
+ if predicted > maxscore * 0.75
252
+ else "#00FF00"
253
+ if predicted > maxscore * 0.5
254
+ else "#FDDD60"
255
+ if predicted > maxscore * 0.25
256
+ else "#FF403F",
257
+ },
258
+ "detail": {
259
+ "fontSize": 15,
260
+ "offsetCenter": [0, "0%"],
261
+ "valueAnimation": True,
262
+ "color": "auto",
263
+ "formatter": "Predicted Score: {value}",
264
+ },
265
+ "data": [
266
+ {
267
+ "value": round(predicted),
268
+ }
269
+ # {
270
+ # "value": round(predicted),
271
+ # "name": "Great"
272
+ # if predicted > maxscore * 0.75
273
+ # else "Decent"
274
+ # if predicted > maxscore * 0.5
275
+ # else "Average"
276
+ # if predicted > maxscore * 0.25
277
+ # else "Bad",
278
+ # }
279
+ ],
280
+ }
281
+ ]
282
+ }
283
+
284
+
285
+ def timestamp(func):
286
+ def caller(*args):
287
+ print(
288
+ "\n---->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Initiated: ",
289
+ datetime.datetime.now(),
290
+ "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<----",
291
+ )
292
+ ret = func(*args)
293
+ print(
294
+ "\n---->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Completed: ",
295
+ datetime.datetime.now(),
296
+ "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<----",
297
+ )
298
+ return ret
299
+
300
+ return caller
301
+
302
+
303
+ @timestamp
304
+ def render(url):
305
+ markdown = []
306
+ option = None
307
+ print("fetching from", url)
308
+ ret = predict(url.strip())
309
+ if len(ret) == 1:
310
+ err = ret[0]
311
+ markdown.append("Error fetching url...")
312
+ return markdown, None, None
313
+ (
314
+ matchState,
315
+ predicted,
316
+ score,
317
+ format,
318
+ title,
319
+ status,
320
+ inning,
321
+ batting_team,
322
+ batting_team_win,
323
+ fig,
324
+ ) = ret
325
+
326
+ if matchState:
327
+ markdown.append("Live score credits: cricbuzz.com")
328
+ if title:
329
+ if "|" in title:
330
+ l1 = (
331
+ title.split("|")[1]
332
+ .replace("Cricbuzz.com", "")
333
+ .replace("Cricbuzz", "")
334
+ )
335
+ if l1.strip():
336
+ markdown.append(l1.strip())
337
+ l2 = (
338
+ title.split("|")[0]
339
+ .replace("Cricbuzz.com", "")
340
+ .replace("Cricbuzz", "")
341
+ )
342
+ if l2.strip():
343
+ markdown.append(l2.strip())
344
+ else:
345
+ markdown.append(
346
+ title.replace("Cricbuzz.com", "").replace("Cricbuzz", "")
347
+ )
348
+ nutshell = ""
349
+ if status:
350
+ nutshell += status + "; "
351
+ if score:
352
+ nutshell += score + "; "
353
+ if matchState:
354
+ nutshell += matchState + "; "
355
+ if nutshell:
356
+ markdown.append(nutshell)
357
+ # if matchState and matchState != "inprogress":
358
+ # markdown.append(matchState)
359
+ if predicted:
360
+ if inning == 2:
361
+ if batting_team_win >= 0:
362
+ markdown.append(f"{batting_team} may win")
363
+ else:
364
+ markdown.append(
365
+ f"{batting_team} may lose by {-int(batting_team_win)} runs"
366
+ )
367
+ maxscore = 300 if format == "T20" else 500 if format == "ODI" else None
368
+ option = getoption(predicted, maxscore)
369
+ if matchState is None:
370
+ markdown.append("Error fetching url...")
371
+
372
+ return "\n".join(markdown), option, fig
373
+
374
+
375
+ if __name__ == "__main__":
376
+ with streamlit_analytics.track(unsafe_password="credict123"):
377
+ st.set_page_config(page_title="Cricket Prophet")
378
+ st.title("Cricket Prophet")
379
+ st.write("**An ML-driven Cricket Score Predictor**")
380
+
381
+ live_matches = get_live_matches("https://cricbuzz.com")
382
+ if live_matches:
383
+ option = st.selectbox(
384
+ "Choose a live match here",
385
+ list(live_matches.keys()) + ["Custom URL", "Simulator"],
386
+ )
387
+ if option == "Simulator":
388
+ format = st.selectbox("Format", ["T20", "ODI"])
389
+ args = {}
390
+ args["batting_team"] = 1
391
+ args["wkt_last_5_overs"] = st.number_input(
392
+ "wkt_last_5_overs", value=0.0, step=0.01, format="%f"
393
+ )
394
+ args["current_RR"] = st.number_input(
395
+ "current_RR", value=0.0, step=0.01, format="%f"
396
+ )
397
+ args["balls_left"] = st.number_input(
398
+ "balls_left", value=0.0, step=0.01, format="%f"
399
+ )
400
+ args["wkts_left"] = st.number_input(
401
+ "wkts_left", value=0.0, step=0.01, format="%f"
402
+ )
403
+ args["runrate_last_5_overs-current_RR"] = (
404
+ st.number_input(
405
+ "runrate_last_5_overs", value=0.0, step=0.01, format="%f"
406
+ )
407
+ - args["current_RR"]
408
+ )
409
+ balls = 300 if format == "ODI" else 120
410
+ st.text(
411
+ str(int((balls * args["current_RR"] / 6) + simulator(args, format)))
412
+ )
413
+ else:
414
+ if option == "Custom URL":
415
+ url = st.text_input("Enter cricbuzz match link")
416
+ else:
417
+ url = live_matches.get(option)
418
+
419
+ col1, col2 = st.columns([3.5, 0.6])
420
+
421
+ with col1:
422
+ live = st.button("Live", help="Livestream")
423
+ with col2:
424
+ fetch = st.button("Fetch", help="Refresh")
425
+
426
+ col3, _ = st.columns([1, 4])
427
+ with col3:
428
+ interval = st.number_input(
429
+ label="Sync Interval (Seconds)", step=1, min_value=1, value=100
430
+ )
431
+
432
+ placeholder = st.empty()
433
+
434
+ if fetch:
435
+ if url:
436
+ markdown, option, fig = render(url)
437
+ placeholder.empty()
438
+ with placeholder.container():
439
+ st.text(markdown)
440
+ st.text(f"Last updated at {time.strftime('%H:%M %p')}")
441
+ if option:
442
+ st_echarts(
443
+ option,
444
+ width="450px",
445
+ height="350px",
446
+ key="gauge" + str(datetime.datetime.now()),
447
+ )
448
+ if fig:
449
+ st.pyplot(fig)
450
+
451
+ if live:
452
+ if url:
453
+ while True:
454
+ markdown, option, fig = render(url)
455
+ placeholder.empty()
456
+ with placeholder.container():
457
+ st.text(markdown)
458
+ st.text(f"Last updated at {time.strftime('%H:%M %p')}")
459
+ if option:
460
+ st_echarts(
461
+ option,
462
+ width="450px",
463
+ height="350px",
464
+ key="gauge" + str(datetime.datetime.now()),
465
+ )
466
+ if fig:
467
+ st.pyplot(fig)
468
+ else:
469
+ break
470
+ time.sleep(interval)
471
+ else:
472
+ st.text("Error fetching matches")
server.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/zsh
2
+ source env/bin/activate && streamlit run serve.py
trainandserve.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ #!/bin/zsh
2
+ mkdir -p data history model result temp
3
+ source env/bin/activate && pip install -r requirements.txt && python features.py && python model.py && streamlit run serve.py