Spaces:
Runtime error
Runtime error
Commit
·
56f6887
0
Parent(s):
first commit
Browse files- .gitattributes +2 -0
- .github/workflows/main.yaml +20 -0
- .gitignore +12 -0
- .streamlit/config.toml +2 -0
- README.md +26 -0
- Screenshot from 2023-10-23 09-13-41.png +0 -0
- __init__.py +0 -0
- cricksheet.py +87 -0
- eda.ipynb +153 -0
- features.py +146 -0
- model.py +174 -0
- model/odifeatures.feather.joblib +3 -0
- model/t20features.feather.joblib +3 -0
- model/team.npy +0 -0
- packages.txt +1 -0
- requirements.txt +14 -0
- scrape.py +231 -0
- serve.py +472 -0
- server.sh +2 -0
- trainandserve.sh +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model/t20features.feather.joblib filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
model/odifeatures.feather.joblib filter=lfs diff=lfs merge=lfs -text
|
.github/workflows/main.yaml
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync to Hugging Face hub
|
| 2 |
+
on:
|
| 3 |
+
push:
|
| 4 |
+
branches: [main]
|
| 5 |
+
|
| 6 |
+
# to run this workflow manually from the Actions tab
|
| 7 |
+
workflow_dispatch:
|
| 8 |
+
|
| 9 |
+
jobs:
|
| 10 |
+
sync-to-hub:
|
| 11 |
+
runs-on: ubuntu-latest
|
| 12 |
+
steps:
|
| 13 |
+
- uses: actions/checkout@v3
|
| 14 |
+
with:
|
| 15 |
+
fetch-depth: 0
|
| 16 |
+
lfs: true
|
| 17 |
+
- name: Push to hub
|
| 18 |
+
env:
|
| 19 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 20 |
+
run: git push https://Instantaneous1:$HF_TOKEN@huggingface.co/spaces/Instantaneous1/cricket-prophet main
|
.gitignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
temp/
|
| 2 |
+
data/
|
| 3 |
+
cricsheet/
|
| 4 |
+
catbosst_info/
|
| 5 |
+
depr/
|
| 6 |
+
env/
|
| 7 |
+
__pycache__/
|
| 8 |
+
result/
|
| 9 |
+
static_test/
|
| 10 |
+
temp/
|
| 11 |
+
history/
|
| 12 |
+
.ipynb_checkpoints/
|
.streamlit/config.toml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[theme]
|
| 2 |
+
base="dark"
|
README.md
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Cricket-Prophet
|
| 2 |
+
|
| 3 |
+
#cricketprophet is an AI-Ml based cricket score prediction app. It takes account of batting team, current run, fall of wkts and gives a realistic prediction of the final score using a #randomforest
|
| 4 |
+
|
| 5 |
+
Scores are fetched from #cricbuzz site in realtime
|
| 6 |
+
|
| 7 |
+
The app is online at https://cricket-prophet.streamlit.app/
|
| 8 |
+
|
| 9 |
+
It is a better prediction than the projected score as it doesn't only rely on current run rate, but also balls left, wkts left and batting team.
|
| 10 |
+
|
| 11 |
+
#machinelearning #cricket #sportsprediction
|
| 12 |
+
|
| 13 |
+
## 
|
| 14 |
+
|
| 15 |
+
title: Cricket Prophet
|
| 16 |
+
emoji: 📈
|
| 17 |
+
colorFrom: yellow
|
| 18 |
+
colorTo: purple
|
| 19 |
+
sdk: streamlit
|
| 20 |
+
sdk_version: 1.29.0
|
| 21 |
+
app_file: app.py
|
| 22 |
+
pinned: false
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
Screenshot from 2023-10-23 09-13-41.png
ADDED
|
__init__.py
ADDED
|
File without changes
|
cricksheet.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json, os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from tqdm import tqdm
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
root = "cricsheet/all_json"
|
| 7 |
+
|
| 8 |
+
# print([json.load(open(os.path.join(root, f)))['meta']['data_version'] for f in os.listdir(root) if json.load(open(os.path.join(root, f)))['meta']['data_version']=='1.1.0'])
|
| 9 |
+
# print(set([json.load(open(os.path.join(root, f)))['info']['match_type'] for f in os.listdir(root) if f.endswith('.json') and json.load(open(os.path.join(root, f)))['meta']['data_version']=='1.1.0']))
|
| 10 |
+
|
| 11 |
+
# formats: 'ODI', 'MDM', 'IT20', 'ODM', 'Test', 'T20'
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Inning(object):
|
| 15 |
+
def __init__(self, df, inning, format):
|
| 16 |
+
self.df = df
|
| 17 |
+
self.inning = inning
|
| 18 |
+
self.final_score = df["run"].sum()
|
| 19 |
+
self.format = format
|
| 20 |
+
|
| 21 |
+
def settarget(self, target):
|
| 22 |
+
if self.inning == 1:
|
| 23 |
+
print("first innning: don't set target")
|
| 24 |
+
self.target = target
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def process_inning(ballbyball):
|
| 28 |
+
score = []
|
| 29 |
+
for over in ballbyball["overs"]:
|
| 30 |
+
overall = []
|
| 31 |
+
for ballcount, dlv in enumerate(over["deliveries"]):
|
| 32 |
+
run = dlv["runs"]["total"]
|
| 33 |
+
wicket = len(dlv.get("wickets", []))
|
| 34 |
+
if ballcount < 6:
|
| 35 |
+
overall.append((run, wicket))
|
| 36 |
+
else:
|
| 37 |
+
lastrun, lastwkt = overall.pop()
|
| 38 |
+
overall.append((run + lastrun, wicket + lastwkt))
|
| 39 |
+
score.extend(overall)
|
| 40 |
+
df = pd.DataFrame(score, columns=["run", "wicket"], index=range(1, len(score) + 1))
|
| 41 |
+
df.index.name = "balls"
|
| 42 |
+
return df
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def process_matches(matches, format):
|
| 46 |
+
print("processing jsons...")
|
| 47 |
+
ID = 0
|
| 48 |
+
for match in tqdm(matches):
|
| 49 |
+
if len(match) == 2:
|
| 50 |
+
inning1, inning2 = [
|
| 51 |
+
Inning(process_inning(inning), i + 1, format)
|
| 52 |
+
for i, inning in enumerate(match)
|
| 53 |
+
]
|
| 54 |
+
inning2.settarget(inning1.final_score)
|
| 55 |
+
inning1.battingteam, inning2.battingteam = (
|
| 56 |
+
match[0]["team"],
|
| 57 |
+
match[1]["team"],
|
| 58 |
+
)
|
| 59 |
+
inning1.bowlingteam, inning2.bowlingteam = (
|
| 60 |
+
match[1]["team"],
|
| 61 |
+
match[0]["team"],
|
| 62 |
+
)
|
| 63 |
+
ID += 1
|
| 64 |
+
inning1.matchid = inning2.matchid = ID
|
| 65 |
+
yield inning1
|
| 66 |
+
yield inning2
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def get_all_matches(
|
| 70 |
+
format,
|
| 71 |
+
since=1990,
|
| 72 |
+
):
|
| 73 |
+
matches = []
|
| 74 |
+
print("Loading jsons...")
|
| 75 |
+
for f in tqdm(os.listdir(root)[:]):
|
| 76 |
+
if f.endswith(".json"):
|
| 77 |
+
obj = json.load(open(os.path.join(root, f)))
|
| 78 |
+
if (
|
| 79 |
+
format in obj["info"]["match_type"]
|
| 80 |
+
and int(datetime.strptime(obj["info"]["dates"][0], "%Y-%m-%d").year)
|
| 81 |
+
>= since
|
| 82 |
+
):
|
| 83 |
+
matches.append(obj["innings"])
|
| 84 |
+
return list(process_matches(matches, format))
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# get_all_T20s()
|
eda.ipynb
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"id": "62252039-37f7-467f-bde2-0a576770d4be",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"import pandas as pd\n"
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"execution_count": 2,
|
| 16 |
+
"id": "e9f9088b-aab7-4faf-a158-5b6e51d1b1bc",
|
| 17 |
+
"metadata": {},
|
| 18 |
+
"outputs": [],
|
| 19 |
+
"source": [
|
| 20 |
+
"features = [\n",
|
| 21 |
+
" # \"batting_team\",\n",
|
| 22 |
+
" # \"bowling_team\",\n",
|
| 23 |
+
" # \"balls\",\n",
|
| 24 |
+
" # \"runs\",\n",
|
| 25 |
+
" # \"wickets\",\n",
|
| 26 |
+
" \"wkt_last_5_overs\",\n",
|
| 27 |
+
" # \"runrate_last_5_overs\",\n",
|
| 28 |
+
" \"current_RR\",\n",
|
| 29 |
+
" # \"average\",\n",
|
| 30 |
+
" \"balls_left\",\n",
|
| 31 |
+
" \"wkts_left\",\n",
|
| 32 |
+
" # \"required_RR\",\n",
|
| 33 |
+
" # \"projected_score_more\",\n",
|
| 34 |
+
" # \"min_score_more\",\n",
|
| 35 |
+
" # \"max_score_more\",\n",
|
| 36 |
+
" # \"projected_avg_score_more\",\n",
|
| 37 |
+
" \"runrate_last_5_overs-current_RR\",\n",
|
| 38 |
+
" \"deviation_from_projected\",\n",
|
| 39 |
+
"]\n"
|
| 40 |
+
]
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"cell_type": "code",
|
| 44 |
+
"execution_count": 3,
|
| 45 |
+
"id": "7694e380-b6d3-4d56-af94-66f5233e6c49",
|
| 46 |
+
"metadata": {},
|
| 47 |
+
"outputs": [
|
| 48 |
+
{
|
| 49 |
+
"name": "stderr",
|
| 50 |
+
"output_type": "stream",
|
| 51 |
+
"text": [
|
| 52 |
+
"/media/instantinopaul/data/Code/ML/github.com/scorepredictor/env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 53 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 54 |
+
]
|
| 55 |
+
}
|
| 56 |
+
],
|
| 57 |
+
"source": [
|
| 58 |
+
"from ydata_profiling import ProfileReport"
|
| 59 |
+
]
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"cell_type": "code",
|
| 63 |
+
"execution_count": 4,
|
| 64 |
+
"id": "4845035c-3501-4eb0-a67b-0bbc5e289a8b",
|
| 65 |
+
"metadata": {},
|
| 66 |
+
"outputs": [
|
| 67 |
+
{
|
| 68 |
+
"name": "stderr",
|
| 69 |
+
"output_type": "stream",
|
| 70 |
+
"text": [
|
| 71 |
+
"/media/instantinopaul/data/Code/ML/github.com/scorepredictor/env/lib/python3.10/site-packages/ydata_profiling/utils/dataframe.py:137: SettingWithCopyWarning: \n",
|
| 72 |
+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
| 73 |
+
"\n",
|
| 74 |
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
| 75 |
+
" df.rename(columns={\"index\": \"df_index\"}, inplace=True)\n",
|
| 76 |
+
"Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]/media/instantinopaul/data/Code/ML/github.com/scorepredictor/env/lib/python3.10/site-packages/ydata_profiling/model/typeset.py:125: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n",
|
| 77 |
+
" not pdt.is_categorical_dtype(series)\n",
|
| 78 |
+
"Summarize dataset: 100%|██████████| 51/51 [00:11<00:00, 4.50it/s, Completed] \n",
|
| 79 |
+
"Generate report structure: 100%|██████████| 1/1 [00:02<00:00, 2.68s/it]\n",
|
| 80 |
+
"Render HTML: 100%|██████████| 1/1 [00:01<00:00, 1.43s/it]\n",
|
| 81 |
+
"Export report to file: 100%|██████████| 1/1 [00:00<00:00, 210.06it/s]\n"
|
| 82 |
+
]
|
| 83 |
+
}
|
| 84 |
+
],
|
| 85 |
+
"source": [
|
| 86 |
+
"df=pd.read_feather('data/t20features.feather')\n",
|
| 87 |
+
"r=ProfileReport(df[features])\n",
|
| 88 |
+
"r.to_file('result/profilereportT20.html')"
|
| 89 |
+
]
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"cell_type": "code",
|
| 93 |
+
"execution_count": 5,
|
| 94 |
+
"id": "1272e5c3-2a60-4966-b70a-43f49101a5a9",
|
| 95 |
+
"metadata": {
|
| 96 |
+
"scrolled": true
|
| 97 |
+
},
|
| 98 |
+
"outputs": [
|
| 99 |
+
{
|
| 100 |
+
"name": "stderr",
|
| 101 |
+
"output_type": "stream",
|
| 102 |
+
"text": [
|
| 103 |
+
"/media/instantinopaul/data/Code/ML/github.com/scorepredictor/env/lib/python3.10/site-packages/ydata_profiling/utils/dataframe.py:137: SettingWithCopyWarning: \n",
|
| 104 |
+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
| 107 |
+
" df.rename(columns={\"index\": \"df_index\"}, inplace=True)\n",
|
| 108 |
+
"Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]/media/instantinopaul/data/Code/ML/github.com/scorepredictor/env/lib/python3.10/site-packages/ydata_profiling/model/typeset.py:208: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n",
|
| 109 |
+
" is_valid_dtype = pdt.is_categorical_dtype(series) and not pdt.is_bool_dtype(\n",
|
| 110 |
+
"Summarize dataset: 100%|██████████| 51/51 [00:07<00:00, 7.05it/s, Completed] \n",
|
| 111 |
+
"Generate report structure: 100%|██████████| 1/1 [00:02<00:00, 2.76s/it]\n",
|
| 112 |
+
"Render HTML: 100%|██████████| 1/1 [00:01<00:00, 1.32s/it]\n",
|
| 113 |
+
"Export report to file: 100%|██████████| 1/1 [00:00<00:00, 121.46it/s]\n"
|
| 114 |
+
]
|
| 115 |
+
}
|
| 116 |
+
],
|
| 117 |
+
"source": [
|
| 118 |
+
"df=pd.read_feather('data/odifeatures.feather')\n",
|
| 119 |
+
"r=ProfileReport(df[features])\n",
|
| 120 |
+
"r.to_file('result/profilereportODI.html')"
|
| 121 |
+
]
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"cell_type": "code",
|
| 125 |
+
"execution_count": null,
|
| 126 |
+
"id": "e55fd847-05c8-47dc-aedc-9b96c23b4aa6",
|
| 127 |
+
"metadata": {},
|
| 128 |
+
"outputs": [],
|
| 129 |
+
"source": []
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"metadata": {
|
| 133 |
+
"kernelspec": {
|
| 134 |
+
"display_name": "cricpred",
|
| 135 |
+
"language": "python",
|
| 136 |
+
"name": "cricpred"
|
| 137 |
+
},
|
| 138 |
+
"language_info": {
|
| 139 |
+
"codemirror_mode": {
|
| 140 |
+
"name": "ipython",
|
| 141 |
+
"version": 3
|
| 142 |
+
},
|
| 143 |
+
"file_extension": ".py",
|
| 144 |
+
"mimetype": "text/x-python",
|
| 145 |
+
"name": "python",
|
| 146 |
+
"nbconvert_exporter": "python",
|
| 147 |
+
"pygments_lexer": "ipython3",
|
| 148 |
+
"version": "3.10.12"
|
| 149 |
+
}
|
| 150 |
+
},
|
| 151 |
+
"nbformat": 4,
|
| 152 |
+
"nbformat_minor": 5
|
| 153 |
+
}
|
features.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import subprocess, sys
|
| 2 |
+
from multiprocessing import Pool
|
| 3 |
+
import pandas as pd, json, os, math
|
| 4 |
+
import numpy as np
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
from sklearn.model_selection import train_test_split
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
from cricksheet import get_all_matches
|
| 9 |
+
|
| 10 |
+
# import ydata_profiling
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
## Reading IPL dataset
|
| 14 |
+
total_wickets = 10
|
| 15 |
+
n_pools = 100
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
## Feature selection/creation and ngram creation
|
| 19 |
+
|
| 20 |
+
features = [
|
| 21 |
+
"matchid",
|
| 22 |
+
"format",
|
| 23 |
+
"inning",
|
| 24 |
+
"batting_team",
|
| 25 |
+
"bowling_team",
|
| 26 |
+
"balls",
|
| 27 |
+
"runs",
|
| 28 |
+
"wickets",
|
| 29 |
+
"wkt_last_5_overs",
|
| 30 |
+
"runrate_last_5_overs",
|
| 31 |
+
"runrate_last_5_overs-current_RR",
|
| 32 |
+
"current_RR",
|
| 33 |
+
# "average",
|
| 34 |
+
"balls_left",
|
| 35 |
+
"wkts_left",
|
| 36 |
+
# "required_RR",
|
| 37 |
+
# "projected_score_more",
|
| 38 |
+
# "min_score_more",
|
| 39 |
+
# "max_score_more",
|
| 40 |
+
# "projected_avg_score_more",
|
| 41 |
+
"final_score",
|
| 42 |
+
"final_score_more",
|
| 43 |
+
"deviation_from_projected",
|
| 44 |
+
]
|
| 45 |
+
|
| 46 |
+
getformat = {"ODI": 1, "T20": 2}
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def extract_features(inning):
|
| 50 |
+
data = []
|
| 51 |
+
# total_balls = (
|
| 52 |
+
# 120 if inning.format == "T20" else 300 if inning.format == "ODI" else None
|
| 53 |
+
# )
|
| 54 |
+
total_balls = len(inning.df)
|
| 55 |
+
df = inning.df
|
| 56 |
+
# matchid = inning.matchid
|
| 57 |
+
# batting_team = inning.battingteam
|
| 58 |
+
|
| 59 |
+
for i in range(1, len(df)):
|
| 60 |
+
min_RR = 0.5
|
| 61 |
+
max_RR = 2.5
|
| 62 |
+
runs = df.iloc[:i]["run"].sum()
|
| 63 |
+
run_last_5_overs = df["run"].iloc[-30:].sum()
|
| 64 |
+
runrate_last_5_overs = run_last_5_overs / 6
|
| 65 |
+
|
| 66 |
+
wickets = df.iloc[:i]["wicket"].sum()
|
| 67 |
+
wkt_last_5_overs = df.iloc[:i]["wicket"].iloc[-30:].sum()
|
| 68 |
+
|
| 69 |
+
balls = len(df.iloc[:i])
|
| 70 |
+
|
| 71 |
+
current_RR = (runs * 6) / balls
|
| 72 |
+
rr_diff = runrate_last_5_overs - current_RR
|
| 73 |
+
average = runs / (wickets + 1)
|
| 74 |
+
|
| 75 |
+
balls_left = total_balls - balls
|
| 76 |
+
wk_left = total_wickets - wickets
|
| 77 |
+
|
| 78 |
+
required_RR = (
|
| 79 |
+
((inning.target - runs) * 6) / balls if inning.inning == 2 else -9999
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
projected_score_more = current_RR * balls_left / 6
|
| 83 |
+
min_score_more = min_RR * balls_left / 6
|
| 84 |
+
max_score_more = max_RR * balls_left / 6
|
| 85 |
+
projected_avg_score_more = average * wk_left / 6
|
| 86 |
+
|
| 87 |
+
final_score_more = inning.final_score - runs
|
| 88 |
+
format = getformat[inning.format]
|
| 89 |
+
|
| 90 |
+
deviation_from_projected = final_score_more - projected_score_more
|
| 91 |
+
data.append(
|
| 92 |
+
(
|
| 93 |
+
inning.matchid,
|
| 94 |
+
format,
|
| 95 |
+
inning.inning,
|
| 96 |
+
inning.battingteam,
|
| 97 |
+
inning.bowlingteam,
|
| 98 |
+
balls,
|
| 99 |
+
runs,
|
| 100 |
+
wickets,
|
| 101 |
+
wkt_last_5_overs,
|
| 102 |
+
round(runrate_last_5_overs, 2),
|
| 103 |
+
round(rr_diff, 2),
|
| 104 |
+
round(current_RR, 2),
|
| 105 |
+
# average,
|
| 106 |
+
balls_left,
|
| 107 |
+
wk_left,
|
| 108 |
+
# required_RR,
|
| 109 |
+
# projected_score_more,
|
| 110 |
+
# min_score_more,
|
| 111 |
+
# max_score_more,
|
| 112 |
+
# projected_avg_score_more,
|
| 113 |
+
inning.final_score,
|
| 114 |
+
final_score_more,
|
| 115 |
+
round(deviation_from_projected),
|
| 116 |
+
)
|
| 117 |
+
)
|
| 118 |
+
return data
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def save_features(innings, fname):
|
| 122 |
+
print("Feature enggineering and ngram creation...")
|
| 123 |
+
|
| 124 |
+
n_innings = len(innings)
|
| 125 |
+
print(f"{n_innings=}")
|
| 126 |
+
pool = Pool(processes=n_pools)
|
| 127 |
+
Xy = pool.map(extract_features, innings)
|
| 128 |
+
|
| 129 |
+
Xy = [xi for Xi in Xy for xi in Xi]
|
| 130 |
+
print(f"{len(Xy)=}")
|
| 131 |
+
featuresdf = pd.DataFrame(Xy, columns=features)
|
| 132 |
+
# ydata_profiling.ProfileReport(featuresdf, title=fname).to_file(fname + ".html")
|
| 133 |
+
featuresdf.to_feather(fname)
|
| 134 |
+
featuresdf.to_csv(fname + ".csv")
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
if __name__ == "__main__":
|
| 138 |
+
print("Loading t20 data...")
|
| 139 |
+
innings = get_all_matches(format="T20", since=2021)
|
| 140 |
+
print("Saving t20 data")
|
| 141 |
+
save_features(innings, "data/t20features.feather")
|
| 142 |
+
|
| 143 |
+
print("Loading odi data...")
|
| 144 |
+
innings = get_all_matches(format="ODI", since=2021)
|
| 145 |
+
print("Saving odi data")
|
| 146 |
+
save_features(innings, "data/odifeatures.feather")
|
model.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sklearn.model_selection import train_test_split
|
| 4 |
+
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
|
| 5 |
+
import math
|
| 6 |
+
import matplotlib.pyplot as plt, joblib
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# from sklearn.linear_model import LinearRegression
|
| 10 |
+
from sklearn.ensemble import RandomForestRegressor
|
| 11 |
+
|
| 12 |
+
# from sklearn.tree import DecisionTreeRegressor
|
| 13 |
+
|
| 14 |
+
# from catboost import CatBoostRegressor
|
| 15 |
+
import warnings, random
|
| 16 |
+
from sklearn.metrics import mean_absolute_error as mae
|
| 17 |
+
from sklearn.metrics import mean_squared_error as mse
|
| 18 |
+
|
| 19 |
+
# from sklearn import tree
|
| 20 |
+
# from sklearn.svm import SVR
|
| 21 |
+
# from sklearn.ensemble import VotingRegressor
|
| 22 |
+
import os
|
| 23 |
+
|
| 24 |
+
warnings.filterwarnings("ignore")
|
| 25 |
+
features = [
|
| 26 |
+
"batting_team",
|
| 27 |
+
# "bowling_team",
|
| 28 |
+
# "balls",
|
| 29 |
+
# "runs",
|
| 30 |
+
# "wickets",
|
| 31 |
+
"wkt_last_5_overs",
|
| 32 |
+
# "runrate_last_5_overs",
|
| 33 |
+
"current_RR",
|
| 34 |
+
# "average",
|
| 35 |
+
"balls_left",
|
| 36 |
+
"wkts_left",
|
| 37 |
+
# "required_RR",
|
| 38 |
+
# "projected_score_more",
|
| 39 |
+
# "min_score_more",
|
| 40 |
+
# "max_score_more",
|
| 41 |
+
# "projected_avg_score_more",
|
| 42 |
+
"runrate_last_5_overs-current_RR",
|
| 43 |
+
]
|
| 44 |
+
target = "deviation_from_projected"
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# evaluate
|
| 48 |
+
def evaluate(model, featuresdf, x_test, fname):
|
| 49 |
+
predictdf = featuresdf.loc[x_test.index].copy()
|
| 50 |
+
# print(predictdf.columns)
|
| 51 |
+
predictdf["h_deviation_from_projected"] = model.predict(
|
| 52 |
+
featuresdf.loc[x_test.index][features]
|
| 53 |
+
)
|
| 54 |
+
predictdf["error"] = (
|
| 55 |
+
predictdf["h_deviation_from_projected"] - predictdf["deviation_from_projected"]
|
| 56 |
+
)
|
| 57 |
+
predictdf["abs_error"] = predictdf["error"].abs()
|
| 58 |
+
ax = plt.gca()
|
| 59 |
+
plt.plot(predictdf.groupby("balls").aggregate({"abs_error": "mean"}))
|
| 60 |
+
plt.legend("Abs deviation")
|
| 61 |
+
|
| 62 |
+
# ax.set_ylim([-50, 50])
|
| 63 |
+
plt.title(type(model).__name__)
|
| 64 |
+
plt.xlabel("Balls on which prediction was made")
|
| 65 |
+
plt.ylabel("Mean Abs Prediction error")
|
| 66 |
+
plt.savefig("result/" + fname + ".png")
|
| 67 |
+
plt.clf()
|
| 68 |
+
predictdf.sample(frac=0.0001).to_csv("result/" + fname + "_sample.csv")
|
| 69 |
+
# fig = plt.figure(figsize=(25, 20))
|
| 70 |
+
# tree.plot_tree(model)
|
| 71 |
+
# fig.savefig(fname + ".png")
|
| 72 |
+
# plt.clf()batting_teamsort_values("overs", ascending=False).to_string(index=False))
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def train_test_split_matchid(df, matchids, split=0.2):
|
| 76 |
+
unique_match_ids = set(matchids)
|
| 77 |
+
print(f"{len(unique_match_ids)=}")
|
| 78 |
+
testids = random.sample(unique_match_ids, int(len(unique_match_ids) * split))
|
| 79 |
+
trainids = list(unique_match_ids.difference(testids))
|
| 80 |
+
return (
|
| 81 |
+
df[features][df.matchid.isin(trainids)],
|
| 82 |
+
df[features][df.matchid.isin(testids)],
|
| 83 |
+
df[target][df.matchid.isin(trainids)],
|
| 84 |
+
df[target][df.matchid.isin(testids)],
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def encode_teams(series):
|
| 89 |
+
encoder = LabelEncoder()
|
| 90 |
+
encoder.fit(series)
|
| 91 |
+
np.save("model/team.npy", encoder.classes_)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def transform_teams(series):
|
| 95 |
+
encoder = LabelEncoder()
|
| 96 |
+
encoder.classes_ = np.load("model/team.npy", allow_pickle=True)
|
| 97 |
+
return encoder.transform(np.array(series).reshape(-1, 1)).reshape(-1)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def plot_feature_importance(f, imp, fname):
|
| 101 |
+
importance = (
|
| 102 |
+
pd.DataFrame(
|
| 103 |
+
zip(*[f, imp]),
|
| 104 |
+
columns=["feature", "importance"],
|
| 105 |
+
)
|
| 106 |
+
.sort_values("importance", ascending=False)
|
| 107 |
+
.set_index("feature")
|
| 108 |
+
)
|
| 109 |
+
importance["importance"] = importance["importance"] / importance["importance"].sum()
|
| 110 |
+
fig, ax = plt.subplots()
|
| 111 |
+
importance.plot.bar(ax=ax)
|
| 112 |
+
ax.bar_label(ax.containers[0], labels=f, rotation=90, label_type="center")
|
| 113 |
+
ax.set_xticks([])
|
| 114 |
+
ax.set_title("Feature importances for predicted score " + fname)
|
| 115 |
+
ax.set_ylabel("Significance")
|
| 116 |
+
ax.set_xlabel("Features")
|
| 117 |
+
plt.savefig("result/" + fname + "featureimp.png")
|
| 118 |
+
plt.clf()
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def train(fname, max_depth=-1):
|
| 122 |
+
print("training on", fname, "...")
|
| 123 |
+
featuresdf = pd.read_feather(fname)
|
| 124 |
+
featuresdf = featuresdf[featuresdf["inning"] == 2]
|
| 125 |
+
encode_teams(
|
| 126 |
+
featuresdf["batting_team"].to_list() + featuresdf["bowling_team"].to_list()
|
| 127 |
+
)
|
| 128 |
+
featuresdf["batting_team"] = transform_teams(featuresdf["batting_team"])
|
| 129 |
+
featuresdf["bowling_team"] = transform_teams(featuresdf["bowling_team"])
|
| 130 |
+
x_train, x_test, y_train, y_test = train_test_split_matchid(
|
| 131 |
+
featuresdf, featuresdf["matchid"], 0.2
|
| 132 |
+
)
|
| 133 |
+
print(f"{len(x_train)=} {len(x_test)=}")
|
| 134 |
+
|
| 135 |
+
model = RandomForestRegressor(max_depth=8)
|
| 136 |
+
model.fit(x_train, y_train)
|
| 137 |
+
|
| 138 |
+
# for xgb
|
| 139 |
+
# plot_feature_importance(
|
| 140 |
+
# model.get_booster().get_score(importance_type="gain").keys(),
|
| 141 |
+
# model.get_booster().get_score(importance_type="gain").values(),
|
| 142 |
+
# fname,
|
| 143 |
+
# )
|
| 144 |
+
|
| 145 |
+
# for rf
|
| 146 |
+
plot_feature_importance(
|
| 147 |
+
features,
|
| 148 |
+
np.std([tree.feature_importances_ for tree in model.estimators_], axis=0),
|
| 149 |
+
os.path.basename(fname),
|
| 150 |
+
)
|
| 151 |
+
print("Depth:", [e.tree_.max_depth for e in model.estimators_])
|
| 152 |
+
|
| 153 |
+
# for dt
|
| 154 |
+
# plot_feature_importance(
|
| 155 |
+
# features,
|
| 156 |
+
# model.feature_importances_,
|
| 157 |
+
# fname,
|
| 158 |
+
# )
|
| 159 |
+
# print(model.tree_.max_depth)
|
| 160 |
+
|
| 161 |
+
# print(f"{model.score(x_train, y_train)=}, {model.score(x_test, y_test)=}")
|
| 162 |
+
print(
|
| 163 |
+
f"{mse(model.predict(x_train), y_train, squared=False)=}, {mse(model.predict(x_test), y_test, squared=False)=}"
|
| 164 |
+
)
|
| 165 |
+
evaluate(model, featuresdf, x_test, os.path.basename(fname))
|
| 166 |
+
model.fit(featuresdf[features], featuresdf[target])
|
| 167 |
+
|
| 168 |
+
joblib.dump(model, f"model/{os.path.basename(fname)}.joblib")
|
| 169 |
+
return model
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
if __name__ == "__main__":
|
| 173 |
+
train("data/t20features.feather")
|
| 174 |
+
train("data/odifeatures.feather")
|
model/odifeatures.feather.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:901b2b44a7095ce014a98f2dc5989d06691bb1c39ff9ea6e0a3496c3eb44331d
|
| 3 |
+
size 3497985
|
model/t20features.feather.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10fc5bd06a3cf347b0eede72942802189138cea8e607a889330f59565fc87db8
|
| 3 |
+
size 3706353
|
model/team.npy
ADDED
|
Binary file (2.34 kB). View file
|
|
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
chromium
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
scikit-learn
|
| 2 |
+
pandas
|
| 3 |
+
numpy
|
| 4 |
+
streamlit-echarts
|
| 5 |
+
streamlit
|
| 6 |
+
tqdm
|
| 7 |
+
matplotlib
|
| 8 |
+
beautifulsoup4
|
| 9 |
+
selenium
|
| 10 |
+
webdriver-manager
|
| 11 |
+
chromedriver_autoinstaller
|
| 12 |
+
seleniumbase
|
| 13 |
+
streamlit-analytics
|
| 14 |
+
ydata-profiling
|
scrape.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
import re
|
| 4 |
+
from urllib.parse import urljoin
|
| 5 |
+
import numpy as np
|
| 6 |
+
from sklearn.preprocessing import LabelEncoder
|
| 7 |
+
import traceback
|
| 8 |
+
from selenium import webdriver
|
| 9 |
+
from selenium.webdriver.chrome.service import Service
|
| 10 |
+
|
| 11 |
+
import chromedriver_autoinstaller
|
| 12 |
+
from selenium.common import exceptions
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
chromedriver_autoinstaller.install()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
options = webdriver.ChromeOptions()
|
| 19 |
+
options.add_argument("--headless")
|
| 20 |
+
options.add_argument("--disable-dev-shm-usage")
|
| 21 |
+
options.add_argument("--no-sandbox")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def selnium(url):
|
| 25 |
+
try:
|
| 26 |
+
driver = webdriver.Chrome(options=options)
|
| 27 |
+
driver.get(url)
|
| 28 |
+
with open("temp/temp.html", "w+") as f:
|
| 29 |
+
f.write(driver.page_source)
|
| 30 |
+
driver.quit()
|
| 31 |
+
return True
|
| 32 |
+
except exceptions.InvalidSessionIdException as e:
|
| 33 |
+
print(traceback.format_exc())
|
| 34 |
+
print(e.message)
|
| 35 |
+
return False
|
| 36 |
+
except BaseException as e:
|
| 37 |
+
print(traceback.format_exc())
|
| 38 |
+
print(e.message)
|
| 39 |
+
return False
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def get_batting_team(soup, status, inning, teams_this_match):
|
| 43 |
+
# teams_this_match = sorted(
|
| 44 |
+
# np.load("team.npy", allow_pickle=True),
|
| 45 |
+
# key=lambda x: soup.text.lower().count(x.lower()),
|
| 46 |
+
# )[-2:]
|
| 47 |
+
# print(f"{teams_this_match=}")
|
| 48 |
+
batting_team = ""
|
| 49 |
+
if inning == 2:
|
| 50 |
+
batting_team = status.split("need")[0].strip()
|
| 51 |
+
for idx, team in enumerate(teams_this_match):
|
| 52 |
+
if team.lower() in batting_team.lower():
|
| 53 |
+
batting_team = team
|
| 54 |
+
else:
|
| 55 |
+
for idx, team in enumerate(teams_this_match):
|
| 56 |
+
if team.lower() in status.lower():
|
| 57 |
+
if "opt to bowl" in status.lower():
|
| 58 |
+
batting_team = teams_this_match[int(~idx)]
|
| 59 |
+
elif "opt to bat" in status.lower():
|
| 60 |
+
batting_team = team
|
| 61 |
+
else:
|
| 62 |
+
print("Could not get batting team)")
|
| 63 |
+
bowling_team = list(set(teams_this_match).difference([batting_team]))[0]
|
| 64 |
+
print(f"{batting_team=}, {bowling_team=}")
|
| 65 |
+
batting_team_enc, bowling_team_enc = None, None
|
| 66 |
+
le = LabelEncoder()
|
| 67 |
+
le.classes_ = np.load("model/team.npy", allow_pickle=True)
|
| 68 |
+
if batting_team in le.classes_:
|
| 69 |
+
batting_team_enc = le.transform([batting_team])[0]
|
| 70 |
+
if bowling_team in le.classes_:
|
| 71 |
+
bowling_team_enc = le.transform([bowling_team])[0]
|
| 72 |
+
return batting_team, bowling_team, batting_team_enc, bowling_team_enc
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def scrape(url):
|
| 76 |
+
try:
|
| 77 |
+
if selnium(url) is False:
|
| 78 |
+
return ("Selenium scrape error",)
|
| 79 |
+
soup = BeautifulSoup(open("temp/temp.html", "r").read(), "html.parser")
|
| 80 |
+
# print("Debug>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>.", soup.text)
|
| 81 |
+
matchState = re.findall(
|
| 82 |
+
'var matchState ="([\da-zA-Z]*)"',
|
| 83 |
+
"\n".join(map(lambda x: x.text, soup.find_all("script"))),
|
| 84 |
+
)[0].lower()
|
| 85 |
+
print(f"{matchState=}")
|
| 86 |
+
title = soup.find_all("title")[0].text
|
| 87 |
+
format = re.findall(
|
| 88 |
+
'var matchFormat = "([\da-zA-Z]*)"',
|
| 89 |
+
"\n".join(map(lambda x: x.text, soup.find_all("script"))),
|
| 90 |
+
)[0]
|
| 91 |
+
print(f"{format=}")
|
| 92 |
+
if format not in {"ODI", "T20"}:
|
| 93 |
+
raise BaseException("Not ODI or T20")
|
| 94 |
+
status = (
|
| 95 |
+
soup.find_all("div", {"class": "cb-text-inprogress"})[0].text
|
| 96 |
+
if matchState == "inprogress"
|
| 97 |
+
else soup.find_all("div", {"class": "cb-text-complete"})[0].text
|
| 98 |
+
if matchState == "complete"
|
| 99 |
+
else soup.find_all("div", {"class": "cb-text-inningsbreak"})[0].text
|
| 100 |
+
if matchState == "inningsbreak"
|
| 101 |
+
else ""
|
| 102 |
+
)
|
| 103 |
+
score = (
|
| 104 |
+
soup.find_all("div", {"class": "cb-min-bat-rw"})[0].text
|
| 105 |
+
if matchState in ["complete", "inprogress", "inningbreak"]
|
| 106 |
+
else ""
|
| 107 |
+
)
|
| 108 |
+
if matchState != "inprogress":
|
| 109 |
+
return (
|
| 110 |
+
matchState,
|
| 111 |
+
score,
|
| 112 |
+
None,
|
| 113 |
+
None,
|
| 114 |
+
None,
|
| 115 |
+
None,
|
| 116 |
+
None,
|
| 117 |
+
None,
|
| 118 |
+
None,
|
| 119 |
+
None,
|
| 120 |
+
format,
|
| 121 |
+
title,
|
| 122 |
+
status,
|
| 123 |
+
None,
|
| 124 |
+
None,
|
| 125 |
+
None,
|
| 126 |
+
None,
|
| 127 |
+
None,
|
| 128 |
+
)
|
| 129 |
+
teams_this_match = re.match(
|
| 130 |
+
r"(.*) vs (.*)",
|
| 131 |
+
soup.find_all("a", {"class": "cb-nav-tab"})[0]["title"].split(",")[0],
|
| 132 |
+
).groups()
|
| 133 |
+
print(f"{teams_this_match=}")
|
| 134 |
+
|
| 135 |
+
data = re.findall("(\d+)/(\d+) \(([\.\d]+)\)", soup.text)
|
| 136 |
+
runs, wkts, overs = map(float, data[-1])
|
| 137 |
+
print(f"{runs=}, {wkts=}, {overs=}")
|
| 138 |
+
|
| 139 |
+
if overs >= 5:
|
| 140 |
+
last_5_ovs = (
|
| 141 |
+
soup.find_all("span", string="Last 5 overs")[0].findNext("span").text
|
| 142 |
+
)
|
| 143 |
+
run_last_5_overs, wkt_last_5_overs = map(
|
| 144 |
+
float, re.match("(\d+) runs, (\d+) wkts", last_5_ovs).groups()
|
| 145 |
+
)
|
| 146 |
+
else:
|
| 147 |
+
run_last_5_overs, wkt_last_5_overs = runs, wkts
|
| 148 |
+
print(f"{run_last_5_overs=}, {wkt_last_5_overs=}")
|
| 149 |
+
|
| 150 |
+
req_rr = -9999
|
| 151 |
+
if soup.find_all("span", string="\xa0\xa0REQ:\xa0"):
|
| 152 |
+
reqdata = (
|
| 153 |
+
soup.find_all("span", string="\xa0\xa0REQ:\xa0")[0]
|
| 154 |
+
.findNext("span")
|
| 155 |
+
.text
|
| 156 |
+
)
|
| 157 |
+
if reqdata.strip() != "":
|
| 158 |
+
req_rr = list(map(float, re.match("([\d\.]+)", reqdata).groups()))[0]
|
| 159 |
+
else:
|
| 160 |
+
print("REQ_RR not parsed")
|
| 161 |
+
|
| 162 |
+
crr = -9999
|
| 163 |
+
if soup.find_all("span", string="\xa0\xa0CRR:\xa0"):
|
| 164 |
+
crrdata = (
|
| 165 |
+
soup.find_all("span", string="\xa0\xa0CRR:\xa0")[0]
|
| 166 |
+
.findNext("span")
|
| 167 |
+
.text
|
| 168 |
+
)
|
| 169 |
+
if crrdata.strip() != "":
|
| 170 |
+
crr = list(map(float, re.match("([\d\.]+)", crrdata).groups()))[0]
|
| 171 |
+
else:
|
| 172 |
+
print("CRR not parsed")
|
| 173 |
+
|
| 174 |
+
print(f"{crr=}, {req_rr=}")
|
| 175 |
+
|
| 176 |
+
inning = 2 if req_rr > 0 else 1
|
| 177 |
+
(
|
| 178 |
+
batting_team,
|
| 179 |
+
bowling_team,
|
| 180 |
+
batting_team_enc,
|
| 181 |
+
bowling_team_enc,
|
| 182 |
+
) = get_batting_team(soup, status, inning, teams_this_match)
|
| 183 |
+
|
| 184 |
+
req = -9999
|
| 185 |
+
if inning == 2:
|
| 186 |
+
req = int(re.match(r".*need (\d+) runs", status).groups()[0])
|
| 187 |
+
print(f"{req=}")
|
| 188 |
+
else:
|
| 189 |
+
print("Not chasing so target not set")
|
| 190 |
+
|
| 191 |
+
return (
|
| 192 |
+
matchState,
|
| 193 |
+
score,
|
| 194 |
+
run_last_5_overs,
|
| 195 |
+
wkt_last_5_overs,
|
| 196 |
+
runs,
|
| 197 |
+
wkts,
|
| 198 |
+
overs,
|
| 199 |
+
req_rr,
|
| 200 |
+
req,
|
| 201 |
+
crr,
|
| 202 |
+
format,
|
| 203 |
+
title,
|
| 204 |
+
status,
|
| 205 |
+
batting_team,
|
| 206 |
+
bowling_team,
|
| 207 |
+
batting_team_enc,
|
| 208 |
+
bowling_team_enc,
|
| 209 |
+
inning,
|
| 210 |
+
)
|
| 211 |
+
except BaseException as e:
|
| 212 |
+
print(traceback.format_exc())
|
| 213 |
+
return (str(e),)
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def get_live_matches(url):
|
| 217 |
+
if selnium(url) is False:
|
| 218 |
+
return None
|
| 219 |
+
soup = BeautifulSoup(open("temp/temp.html", "r").read(), "html.parser")
|
| 220 |
+
matches = soup.find_all("a", {"class": "cb-mat-mnu-itm cb-ovr-flo"})
|
| 221 |
+
return {
|
| 222 |
+
m.text: urljoin(url, m.get("href"))
|
| 223 |
+
for m in matches
|
| 224 |
+
if m not in soup.find_all("a", {"id": "live-scores-link"})
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
if __name__ == "__main__":
|
| 229 |
+
url = "https://cricbuzz.com/live-cricket-scores/79055/wa-vs-saus-3rd-match-australia-domestic-one-day-cup-2023-24"
|
| 230 |
+
print(scrape(url))
|
| 231 |
+
# print(get_live_matches("https://cricbuzz.com"))
|
serve.py
ADDED
|
@@ -0,0 +1,472 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from scrape import scrape, get_live_matches
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import streamlit as st
|
| 4 |
+
from streamlit_echarts import st_echarts
|
| 5 |
+
import joblib
|
| 6 |
+
import numpy as np
|
| 7 |
+
import math, os
|
| 8 |
+
import datetime, time
|
| 9 |
+
import matplotlib.pyplot as plt
|
| 10 |
+
|
| 11 |
+
import pathlib
|
| 12 |
+
|
| 13 |
+
for folder in ["data", "model", "history", "result", "temp"]:
|
| 14 |
+
pathlib.Path(folder).mkdir(parents=True, exist_ok=True)
|
| 15 |
+
|
| 16 |
+
# ## Test on realdata
|
| 17 |
+
|
| 18 |
+
# In[16]:
|
| 19 |
+
|
| 20 |
+
from model import features
|
| 21 |
+
import streamlit_analytics
|
| 22 |
+
|
| 23 |
+
# features = [
|
| 24 |
+
# "batting_team",
|
| 25 |
+
# "balls",
|
| 26 |
+
# "runs",
|
| 27 |
+
# "wickets",
|
| 28 |
+
# "wkt_last_5_overs",
|
| 29 |
+
# "runrate_last_5_overs",
|
| 30 |
+
# "current_RR",
|
| 31 |
+
# "average",
|
| 32 |
+
# "balls_left",
|
| 33 |
+
# "wkts_left",
|
| 34 |
+
# "required_RR",
|
| 35 |
+
# "projected_score_more",
|
| 36 |
+
# "min_score_more",
|
| 37 |
+
# "max_score_more",
|
| 38 |
+
# "projected_avg_score_more",
|
| 39 |
+
# ]
|
| 40 |
+
|
| 41 |
+
all_teams_enc = list(range(len(np.load("model/team.npy", allow_pickle=True))))
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def overtoball(over):
|
| 45 |
+
over = str(over)
|
| 46 |
+
full = int(over.split(".")[0]) * 6
|
| 47 |
+
part = min(int(over.split(".")[-1]), 6)
|
| 48 |
+
print(f"{over=}", "balls=", full + part)
|
| 49 |
+
return full + part
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def save_history(fname, row, total_balls):
|
| 53 |
+
row.to_csv(
|
| 54 |
+
os.path.join("history", fname),
|
| 55 |
+
mode="a" if os.path.isfile(os.path.join("history", fname)) else "w+",
|
| 56 |
+
header=not os.path.isfile(os.path.join("history", fname)),
|
| 57 |
+
)
|
| 58 |
+
fig, ax = plt.subplots()
|
| 59 |
+
historydf = pd.read_csv(os.path.join("history", fname))
|
| 60 |
+
balls = (total_balls - historydf["balls_left"]).to_list()
|
| 61 |
+
runs = historydf["runs"].astype(int).to_list()
|
| 62 |
+
ax.plot(balls, runs, label="So Far")
|
| 63 |
+
balls.append(total_balls)
|
| 64 |
+
pred_runs = runs + [historydf["predicted"].astype(int).iloc[-1]]
|
| 65 |
+
ax.plot(balls[-2:], pred_runs[-2:], label="Predicted")
|
| 66 |
+
proj_runs = runs + [historydf["projected"].astype(int).iloc[-1]]
|
| 67 |
+
ax.plot(balls[-2:], proj_runs[-2:], label="Projected")
|
| 68 |
+
ax.annotate(str(runs[-1]), xy=(balls[-2], runs[-1]))
|
| 69 |
+
ax.annotate(str(pred_runs[-1]), xy=(balls[-1], pred_runs[-1]))
|
| 70 |
+
ax.annotate(str(proj_runs[-1]), xy=(balls[-1], proj_runs[-1]))
|
| 71 |
+
plt.xlim([0, total_balls])
|
| 72 |
+
plt.ylim([0, max(pred_runs[-1], proj_runs[-1]) + 100])
|
| 73 |
+
ax.set_xlabel("Balls")
|
| 74 |
+
ax.set_ylabel("Runs")
|
| 75 |
+
ax.legend()
|
| 76 |
+
return fig
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def load_model(format):
|
| 80 |
+
return joblib.load(
|
| 81 |
+
"model/"
|
| 82 |
+
+ (
|
| 83 |
+
"t20features.feather.joblib"
|
| 84 |
+
if format == "T20"
|
| 85 |
+
else "odifeatures.feather.joblib"
|
| 86 |
+
if format == "ODI"
|
| 87 |
+
else None
|
| 88 |
+
)
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def simulator(args, format):
|
| 93 |
+
inputdf = pd.DataFrame([args.values()], columns=args.keys())
|
| 94 |
+
model = load_model(format)
|
| 95 |
+
h = model.predict(inputdf)
|
| 96 |
+
return h
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def predict(url):
|
| 100 |
+
fname = "".join(list(filter(str.isalnum, url))) + ".csv"
|
| 101 |
+
ret = scrape(url)
|
| 102 |
+
print(ret)
|
| 103 |
+
if len(ret) == 1:
|
| 104 |
+
err = ret[0]
|
| 105 |
+
return [err]
|
| 106 |
+
else:
|
| 107 |
+
(
|
| 108 |
+
matchState,
|
| 109 |
+
score,
|
| 110 |
+
run_last_5_overs,
|
| 111 |
+
wkt_last_5_overs,
|
| 112 |
+
runs,
|
| 113 |
+
wkts,
|
| 114 |
+
overs,
|
| 115 |
+
req_rr,
|
| 116 |
+
req,
|
| 117 |
+
current_rr,
|
| 118 |
+
format,
|
| 119 |
+
title,
|
| 120 |
+
status,
|
| 121 |
+
batting_team,
|
| 122 |
+
bowling_team,
|
| 123 |
+
batting_team_enc,
|
| 124 |
+
bowling_team_enc,
|
| 125 |
+
inning,
|
| 126 |
+
) = ret
|
| 127 |
+
if matchState != "inprogress":
|
| 128 |
+
return matchState, None, score, format, title, status, None, None, None, None
|
| 129 |
+
|
| 130 |
+
total_balls = 120 if format == "T20" else 300 if format == "ODI" else None
|
| 131 |
+
balls = overtoball(overs)
|
| 132 |
+
rr_last_5_overs = (int(run_last_5_overs) * 6) / min(30, balls)
|
| 133 |
+
# current_rr = (runs * 6) / balls
|
| 134 |
+
avg = runs / (wkts + 1)
|
| 135 |
+
req_rr = req_rr
|
| 136 |
+
wkts_left = 10 - wkts
|
| 137 |
+
balls_left = (total_balls - balls) if inning == 1 else math.ceil(req * 6 / req_rr)
|
| 138 |
+
min_score_avg, max_score_avg = (
|
| 139 |
+
math.ceil(balls_left * 0.5),
|
| 140 |
+
math.ceil(balls_left * 3),
|
| 141 |
+
)
|
| 142 |
+
rr_diff = rr_last_5_overs - current_rr
|
| 143 |
+
inputs = {
|
| 144 |
+
"batting_team": batting_team_enc,
|
| 145 |
+
"balls": balls,
|
| 146 |
+
"runs": runs,
|
| 147 |
+
"wickets": wkts,
|
| 148 |
+
"wkt_last_5_overs": wkt_last_5_overs,
|
| 149 |
+
"runrate_last_5_overs": rr_last_5_overs,
|
| 150 |
+
"current_RR": current_rr,
|
| 151 |
+
"runrate_last_5_overs-current_RR": rr_diff,
|
| 152 |
+
"average": avg,
|
| 153 |
+
"balls_left": int(balls_left),
|
| 154 |
+
"wkts_left": int(wkts_left),
|
| 155 |
+
"required_RR": -9999,
|
| 156 |
+
"projected_score_more": math.ceil(balls_left * ((runs) / (balls))),
|
| 157 |
+
"min_score_more": math.ceil(balls_left * 0.5),
|
| 158 |
+
"max_score_more": math.ceil(balls_left * 3),
|
| 159 |
+
"projected_avg_score_more": math.ceil((10 - wkts) * runs / (1 + wkts)),
|
| 160 |
+
}
|
| 161 |
+
inputdf = pd.DataFrame(inputs, index=[0])
|
| 162 |
+
if batting_team_enc is None:
|
| 163 |
+
inputdf = inputdf.drop(columns=["batting_team"])
|
| 164 |
+
inputdf = pd.concat([inputdf] * len(all_teams_enc))
|
| 165 |
+
inputdf["batting_team"] = all_teams_enc
|
| 166 |
+
inputdf = inputdf[features]
|
| 167 |
+
model = load_model(format)
|
| 168 |
+
h = model.predict(inputdf)
|
| 169 |
+
print(f"{h=}")
|
| 170 |
+
projected_score_more = balls_left * current_rr / 6
|
| 171 |
+
projected = math.ceil(projected_score_more + runs)
|
| 172 |
+
predicted_score_more = math.ceil(h.mean() + projected_score_more)
|
| 173 |
+
# predicted_score_more = min(max(min_score_avg, predicted_score_more), max_score_avg)
|
| 174 |
+
predicted = runs + predicted_score_more
|
| 175 |
+
|
| 176 |
+
print(f"{runs=}, {projected=}, {predicted=}")
|
| 177 |
+
inputdf["timestamp"] = datetime.datetime.now()
|
| 178 |
+
inputdf["runs"] = runs
|
| 179 |
+
if inning == 2:
|
| 180 |
+
target = req + runs
|
| 181 |
+
print(f"{target=}")
|
| 182 |
+
inputdf["target"] = target
|
| 183 |
+
batting_team_win = int(predicted - target)
|
| 184 |
+
else:
|
| 185 |
+
batting_team_win = None
|
| 186 |
+
inputdf["target"] = -9999
|
| 187 |
+
inputdf["predicted"] = int(predicted)
|
| 188 |
+
inputdf["projected"] = int(projected)
|
| 189 |
+
print(inputdf.to_string())
|
| 190 |
+
fig = save_history(fname, inputdf, total_balls)
|
| 191 |
+
|
| 192 |
+
return (
|
| 193 |
+
matchState,
|
| 194 |
+
predicted,
|
| 195 |
+
score,
|
| 196 |
+
format,
|
| 197 |
+
title,
|
| 198 |
+
status,
|
| 199 |
+
inning,
|
| 200 |
+
batting_team,
|
| 201 |
+
batting_team_win,
|
| 202 |
+
fig,
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def getoption(predicted, maxscore):
|
| 207 |
+
return {
|
| 208 |
+
"series": [
|
| 209 |
+
{
|
| 210 |
+
"type": "gauge",
|
| 211 |
+
"startAngle": 180,
|
| 212 |
+
"endAngle": 0,
|
| 213 |
+
"min": 0,
|
| 214 |
+
"max": maxscore,
|
| 215 |
+
"center": ["50%", "50%"],
|
| 216 |
+
"splitNumber": 4,
|
| 217 |
+
"axisLine": {
|
| 218 |
+
"lineStyle": {
|
| 219 |
+
"width": 6,
|
| 220 |
+
"color": [
|
| 221 |
+
[0.25, "#FF403F"],
|
| 222 |
+
[0.5, "#FDDD60"],
|
| 223 |
+
[0.75, "#00FF00"],
|
| 224 |
+
[1, "#0000FF"],
|
| 225 |
+
],
|
| 226 |
+
}
|
| 227 |
+
},
|
| 228 |
+
"pointer": {
|
| 229 |
+
"icon": "path://M12.8,0.7l12,40.1H0.7L12.8,0.7z",
|
| 230 |
+
"length": "12%",
|
| 231 |
+
"width": 30,
|
| 232 |
+
"offsetCenter": [0, "-60%"],
|
| 233 |
+
"itemStyle": {"color": "auto"},
|
| 234 |
+
},
|
| 235 |
+
"axisTick": {
|
| 236 |
+
"length": 10,
|
| 237 |
+
"lineStyle": {"color": "auto", "width": 2},
|
| 238 |
+
},
|
| 239 |
+
"splitLine": {
|
| 240 |
+
"length": 15,
|
| 241 |
+
"lineStyle": {"color": "auto", "width": 5},
|
| 242 |
+
},
|
| 243 |
+
"axisLabel": {
|
| 244 |
+
"fontSize": 12,
|
| 245 |
+
"distance": -60,
|
| 246 |
+
},
|
| 247 |
+
"title": {
|
| 248 |
+
"offsetCenter": [0, "-20%"],
|
| 249 |
+
"fontSize": 20,
|
| 250 |
+
"color": "#0000FF"
|
| 251 |
+
if predicted > maxscore * 0.75
|
| 252 |
+
else "#00FF00"
|
| 253 |
+
if predicted > maxscore * 0.5
|
| 254 |
+
else "#FDDD60"
|
| 255 |
+
if predicted > maxscore * 0.25
|
| 256 |
+
else "#FF403F",
|
| 257 |
+
},
|
| 258 |
+
"detail": {
|
| 259 |
+
"fontSize": 15,
|
| 260 |
+
"offsetCenter": [0, "0%"],
|
| 261 |
+
"valueAnimation": True,
|
| 262 |
+
"color": "auto",
|
| 263 |
+
"formatter": "Predicted Score: {value}",
|
| 264 |
+
},
|
| 265 |
+
"data": [
|
| 266 |
+
{
|
| 267 |
+
"value": round(predicted),
|
| 268 |
+
}
|
| 269 |
+
# {
|
| 270 |
+
# "value": round(predicted),
|
| 271 |
+
# "name": "Great"
|
| 272 |
+
# if predicted > maxscore * 0.75
|
| 273 |
+
# else "Decent"
|
| 274 |
+
# if predicted > maxscore * 0.5
|
| 275 |
+
# else "Average"
|
| 276 |
+
# if predicted > maxscore * 0.25
|
| 277 |
+
# else "Bad",
|
| 278 |
+
# }
|
| 279 |
+
],
|
| 280 |
+
}
|
| 281 |
+
]
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def timestamp(func):
|
| 286 |
+
def caller(*args):
|
| 287 |
+
print(
|
| 288 |
+
"\n---->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Initiated: ",
|
| 289 |
+
datetime.datetime.now(),
|
| 290 |
+
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<----",
|
| 291 |
+
)
|
| 292 |
+
ret = func(*args)
|
| 293 |
+
print(
|
| 294 |
+
"\n---->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Completed: ",
|
| 295 |
+
datetime.datetime.now(),
|
| 296 |
+
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<----",
|
| 297 |
+
)
|
| 298 |
+
return ret
|
| 299 |
+
|
| 300 |
+
return caller
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
@timestamp
|
| 304 |
+
def render(url):
|
| 305 |
+
markdown = []
|
| 306 |
+
option = None
|
| 307 |
+
print("fetching from", url)
|
| 308 |
+
ret = predict(url.strip())
|
| 309 |
+
if len(ret) == 1:
|
| 310 |
+
err = ret[0]
|
| 311 |
+
markdown.append("Error fetching url...")
|
| 312 |
+
return markdown, None, None
|
| 313 |
+
(
|
| 314 |
+
matchState,
|
| 315 |
+
predicted,
|
| 316 |
+
score,
|
| 317 |
+
format,
|
| 318 |
+
title,
|
| 319 |
+
status,
|
| 320 |
+
inning,
|
| 321 |
+
batting_team,
|
| 322 |
+
batting_team_win,
|
| 323 |
+
fig,
|
| 324 |
+
) = ret
|
| 325 |
+
|
| 326 |
+
if matchState:
|
| 327 |
+
markdown.append("Live score credits: cricbuzz.com")
|
| 328 |
+
if title:
|
| 329 |
+
if "|" in title:
|
| 330 |
+
l1 = (
|
| 331 |
+
title.split("|")[1]
|
| 332 |
+
.replace("Cricbuzz.com", "")
|
| 333 |
+
.replace("Cricbuzz", "")
|
| 334 |
+
)
|
| 335 |
+
if l1.strip():
|
| 336 |
+
markdown.append(l1.strip())
|
| 337 |
+
l2 = (
|
| 338 |
+
title.split("|")[0]
|
| 339 |
+
.replace("Cricbuzz.com", "")
|
| 340 |
+
.replace("Cricbuzz", "")
|
| 341 |
+
)
|
| 342 |
+
if l2.strip():
|
| 343 |
+
markdown.append(l2.strip())
|
| 344 |
+
else:
|
| 345 |
+
markdown.append(
|
| 346 |
+
title.replace("Cricbuzz.com", "").replace("Cricbuzz", "")
|
| 347 |
+
)
|
| 348 |
+
nutshell = ""
|
| 349 |
+
if status:
|
| 350 |
+
nutshell += status + "; "
|
| 351 |
+
if score:
|
| 352 |
+
nutshell += score + "; "
|
| 353 |
+
if matchState:
|
| 354 |
+
nutshell += matchState + "; "
|
| 355 |
+
if nutshell:
|
| 356 |
+
markdown.append(nutshell)
|
| 357 |
+
# if matchState and matchState != "inprogress":
|
| 358 |
+
# markdown.append(matchState)
|
| 359 |
+
if predicted:
|
| 360 |
+
if inning == 2:
|
| 361 |
+
if batting_team_win >= 0:
|
| 362 |
+
markdown.append(f"{batting_team} may win")
|
| 363 |
+
else:
|
| 364 |
+
markdown.append(
|
| 365 |
+
f"{batting_team} may lose by {-int(batting_team_win)} runs"
|
| 366 |
+
)
|
| 367 |
+
maxscore = 300 if format == "T20" else 500 if format == "ODI" else None
|
| 368 |
+
option = getoption(predicted, maxscore)
|
| 369 |
+
if matchState is None:
|
| 370 |
+
markdown.append("Error fetching url...")
|
| 371 |
+
|
| 372 |
+
return "\n".join(markdown), option, fig
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
if __name__ == "__main__":
|
| 376 |
+
with streamlit_analytics.track(unsafe_password="credict123"):
|
| 377 |
+
st.set_page_config(page_title="Cricket Prophet")
|
| 378 |
+
st.title("Cricket Prophet")
|
| 379 |
+
st.write("**An ML-driven Cricket Score Predictor**")
|
| 380 |
+
|
| 381 |
+
live_matches = get_live_matches("https://cricbuzz.com")
|
| 382 |
+
if live_matches:
|
| 383 |
+
option = st.selectbox(
|
| 384 |
+
"Choose a live match here",
|
| 385 |
+
list(live_matches.keys()) + ["Custom URL", "Simulator"],
|
| 386 |
+
)
|
| 387 |
+
if option == "Simulator":
|
| 388 |
+
format = st.selectbox("Format", ["T20", "ODI"])
|
| 389 |
+
args = {}
|
| 390 |
+
args["batting_team"] = 1
|
| 391 |
+
args["wkt_last_5_overs"] = st.number_input(
|
| 392 |
+
"wkt_last_5_overs", value=0.0, step=0.01, format="%f"
|
| 393 |
+
)
|
| 394 |
+
args["current_RR"] = st.number_input(
|
| 395 |
+
"current_RR", value=0.0, step=0.01, format="%f"
|
| 396 |
+
)
|
| 397 |
+
args["balls_left"] = st.number_input(
|
| 398 |
+
"balls_left", value=0.0, step=0.01, format="%f"
|
| 399 |
+
)
|
| 400 |
+
args["wkts_left"] = st.number_input(
|
| 401 |
+
"wkts_left", value=0.0, step=0.01, format="%f"
|
| 402 |
+
)
|
| 403 |
+
args["runrate_last_5_overs-current_RR"] = (
|
| 404 |
+
st.number_input(
|
| 405 |
+
"runrate_last_5_overs", value=0.0, step=0.01, format="%f"
|
| 406 |
+
)
|
| 407 |
+
- args["current_RR"]
|
| 408 |
+
)
|
| 409 |
+
balls = 300 if format == "ODI" else 120
|
| 410 |
+
st.text(
|
| 411 |
+
str(int((balls * args["current_RR"] / 6) + simulator(args, format)))
|
| 412 |
+
)
|
| 413 |
+
else:
|
| 414 |
+
if option == "Custom URL":
|
| 415 |
+
url = st.text_input("Enter cricbuzz match link")
|
| 416 |
+
else:
|
| 417 |
+
url = live_matches.get(option)
|
| 418 |
+
|
| 419 |
+
col1, col2 = st.columns([3.5, 0.6])
|
| 420 |
+
|
| 421 |
+
with col1:
|
| 422 |
+
live = st.button("Live", help="Livestream")
|
| 423 |
+
with col2:
|
| 424 |
+
fetch = st.button("Fetch", help="Refresh")
|
| 425 |
+
|
| 426 |
+
col3, _ = st.columns([1, 4])
|
| 427 |
+
with col3:
|
| 428 |
+
interval = st.number_input(
|
| 429 |
+
label="Sync Interval (Seconds)", step=1, min_value=1, value=100
|
| 430 |
+
)
|
| 431 |
+
|
| 432 |
+
placeholder = st.empty()
|
| 433 |
+
|
| 434 |
+
if fetch:
|
| 435 |
+
if url:
|
| 436 |
+
markdown, option, fig = render(url)
|
| 437 |
+
placeholder.empty()
|
| 438 |
+
with placeholder.container():
|
| 439 |
+
st.text(markdown)
|
| 440 |
+
st.text(f"Last updated at {time.strftime('%H:%M %p')}")
|
| 441 |
+
if option:
|
| 442 |
+
st_echarts(
|
| 443 |
+
option,
|
| 444 |
+
width="450px",
|
| 445 |
+
height="350px",
|
| 446 |
+
key="gauge" + str(datetime.datetime.now()),
|
| 447 |
+
)
|
| 448 |
+
if fig:
|
| 449 |
+
st.pyplot(fig)
|
| 450 |
+
|
| 451 |
+
if live:
|
| 452 |
+
if url:
|
| 453 |
+
while True:
|
| 454 |
+
markdown, option, fig = render(url)
|
| 455 |
+
placeholder.empty()
|
| 456 |
+
with placeholder.container():
|
| 457 |
+
st.text(markdown)
|
| 458 |
+
st.text(f"Last updated at {time.strftime('%H:%M %p')}")
|
| 459 |
+
if option:
|
| 460 |
+
st_echarts(
|
| 461 |
+
option,
|
| 462 |
+
width="450px",
|
| 463 |
+
height="350px",
|
| 464 |
+
key="gauge" + str(datetime.datetime.now()),
|
| 465 |
+
)
|
| 466 |
+
if fig:
|
| 467 |
+
st.pyplot(fig)
|
| 468 |
+
else:
|
| 469 |
+
break
|
| 470 |
+
time.sleep(interval)
|
| 471 |
+
else:
|
| 472 |
+
st.text("Error fetching matches")
|
server.sh
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/zsh
|
| 2 |
+
source env/bin/activate && streamlit run serve.py
|
trainandserve.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/zsh
|
| 2 |
+
mkdir -p data history model result temp
|
| 3 |
+
source env/bin/activate && pip install -r requirements.txt && python features.py && python model.py && streamlit run serve.py
|