import json from collections import defaultdict from datasets import load_dataset def load_hf_dataset(dataset_name, split='train'): dataset = load_dataset(dataset_name) return dataset[split] def calculate_win_accuracy(predictions, outcomes): # Initialize dictionary to store winning accuracy for each user win_accuracy = defaultdict(float) for user, user_predictions in predictions.items(): correct_predictions = 0 total_predictions = 0 # Iterate through each match prediction of the user for match_id, predicted_winner in user_predictions.items(): # Filter outcomes dataset to find match_id filtered_outcomes = outcomes.filter(lambda x: x['match_id'] == match_id) # Check if any outcome matches the match_id if len(filtered_outcomes) > 0: total_predictions += 1 # Compare the predicted winner with the actual winning team if predicted_winner == filtered_outcomes[0]['winning_team']: correct_predictions += 1 # Calculate the winning accuracy for the user if total_predictions > 0: win_accuracy[user] = round(correct_predictions / total_predictions, 2) else: win_accuracy[user] = 0.0 return win_accuracy def get_last_5_predictions(predictions, outcomes, users): # Initialize last_5_predictions with user names from outcomes dataset last_5_predictions = {user: [None] * 5 for user in users} for user, user_predictions in predictions.items(): # Get the last five matches for the user last_5_matches = list(user_predictions.keys())[-5:] # Iterate through the last five matches for i, match_id in enumerate(last_5_matches): predicted_winner = user_predictions.get(match_id) # Check if predicted_winner is None if predicted_winner is None: last_5_predictions[user][i] = None continue # Filter outcomes dataset to find match_id filtered_outcomes = outcomes.filter(lambda x: x['match_id'] == match_id) # Check if any outcome matches the match_id if len(filtered_outcomes) > 0: # Compare the predicted winner with the actual winning team outcome = filtered_outcomes[0]['winning_team'] is_correct_prediction = predicted_winner == outcome last_5_predictions[user][i] = is_correct_prediction else: # No outcome found for the match last_5_predictions[user][i] = None return last_5_predictions def main(): # Load predictions dataset from Hugging Face Dataset repo predictions = load_hf_dataset("Jay-Rajput/DIS_IPL_Preds") # Load outcomes dataset from Hugging Face Dataset repo outcomes = load_hf_dataset("Jay-Rajput/DIS_IPL_Outcomes") users_points_dataset = load_hf_dataset("Jay-Rajput/DIS_IPL_Leads") # Convert dataset to list of dictionaries users_points = [user for user in users_points_dataset] # Filter predictions from match number 42 onwards filtered_predictions = {} for user_predictions in predictions: match_id = user_predictions.get("match_id") predicted_winner = user_predictions.get("predicted_winner") # Extract match number from the match_id match_number = int(match_id.split('_')[-1]) # Check if match number is 42 or greater if match_number >= 50: # Add user predictions to filtered predictions user = user_predictions["user_name"] if user not in filtered_predictions: filtered_predictions[user] = {} filtered_predictions[user][match_id] = predicted_winner # Calculate winning accuracy for each user win_accuracy = calculate_win_accuracy(filtered_predictions, outcomes) users = [] for user_data in users_points: for user, points in user_data.items(): users.append(user) # Get last 5 predictions for each user last_5_predictions = get_last_5_predictions(filtered_predictions, outcomes, users) # Load the existing dictionary of users and points # users_points = { # "Arpit": {"0": 45181}, # "Ganesh": {"0": 10251}, # "Haaris": {"0": 13800}, # "Jay": {"0": 23520}, # "Kishore": {"0": 16620}, # "Megha": {"0": 30420}, # "Naveein": {"0": 26100}, # "Neha": {"0": 7500}, # "Praveen": {"0": 28123}, # "Rakesh": {"0": 3416}, # "Sai": {"0": 35061}, # "Sahil": {"0": 29705}, # "Sunil": {"0": 15212}, # "Vaibhav": {"0": 11501}, # "Vinay": {"0": 23220} # } # Update each user's points with winning accuracy and last 5 predictions for user_data in users_points: for user, points in user_data.items(): # Update the points dictionary with winning accuracy # win_acc = win_accuracy.get(user, 0.0) # Get winning accuracy for the user # points['win_accuracy'] = round(win_acc * 100, 2) # Convert to percentage and round to 2 decimal places # Add last 5 predictions points['last_5'] = last_5_predictions.get(user, []) # Print the updated dictionary with winning accuracy print(json.dumps(users_points[0], indent=4)) # Save the updated dictionary to a JSON file # with open("leaders/users.json", "w") as json_file: # json.dump(users_points, json_file, indent=4) if __name__ == "__main__": main()