Spaces:
Runtime error
Runtime error
| import json, os | |
| import pandas as pd | |
| from tqdm import tqdm | |
| from datetime import datetime | |
| root = "cricsheet/all_json" | |
| # print([json.load(open(os.path.join(root, f)))['meta']['data_version'] for f in os.listdir(root) if json.load(open(os.path.join(root, f)))['meta']['data_version']=='1.1.0']) | |
| # print(set([json.load(open(os.path.join(root, f)))['info']['match_type'] for f in os.listdir(root) if f.endswith('.json') and json.load(open(os.path.join(root, f)))['meta']['data_version']=='1.1.0'])) | |
| # formats: 'ODI', 'MDM', 'IT20', 'ODM', 'Test', 'T20' | |
| class Inning(object): | |
| def __init__(self, df, inning, format): | |
| self.df = df | |
| self.inning = inning | |
| self.final_score = df["run"].sum() | |
| self.format = format | |
| def settarget(self, target): | |
| if self.inning == 1: | |
| print("first innning: don't set target") | |
| self.target = target | |
| def process_inning(ballbyball): | |
| score = [] | |
| for over in ballbyball["overs"]: | |
| overall = [] | |
| for ballcount, dlv in enumerate(over["deliveries"]): | |
| run = dlv["runs"]["total"] | |
| wicket = len(dlv.get("wickets", [])) | |
| if ballcount < 6: | |
| overall.append((run, wicket)) | |
| else: | |
| lastrun, lastwkt = overall.pop() | |
| overall.append((run + lastrun, wicket + lastwkt)) | |
| score.extend(overall) | |
| df = pd.DataFrame(score, columns=["run", "wicket"], index=range(1, len(score) + 1)) | |
| df.index.name = "balls" | |
| return df | |
| def process_matches(matches, format): | |
| print("processing jsons...") | |
| ID = 0 | |
| for match in tqdm(matches): | |
| if len(match) == 2: | |
| inning1, inning2 = [ | |
| Inning(process_inning(inning), i + 1, format) | |
| for i, inning in enumerate(match) | |
| ] | |
| inning2.settarget(inning1.final_score) | |
| inning1.battingteam, inning2.battingteam = ( | |
| match[0]["team"], | |
| match[1]["team"], | |
| ) | |
| inning1.bowlingteam, inning2.bowlingteam = ( | |
| match[1]["team"], | |
| match[0]["team"], | |
| ) | |
| ID += 1 | |
| inning1.matchid = inning2.matchid = ID | |
| yield inning1 | |
| yield inning2 | |
| def get_all_matches( | |
| format, | |
| since=1990, | |
| ): | |
| matches = [] | |
| print("Loading jsons...") | |
| for f in tqdm(os.listdir(root)[:]): | |
| if f.endswith(".json"): | |
| obj = json.load(open(os.path.join(root, f))) | |
| if ( | |
| format in obj["info"]["match_type"] | |
| and int(datetime.strptime(obj["info"]["dates"][0], "%Y-%m-%d").year) | |
| >= since | |
| ): | |
| matches.append(obj["innings"]) | |
| return list(process_matches(matches, format)) | |
| # get_all_T20s() | |