import json
from collections import defaultdict

from datasets import load_dataset


def load_hf_dataset(dataset_name, split='train'):
    dataset = load_dataset(dataset_name)
    return dataset[split]

def calculate_win_accuracy(predictions, outcomes):
    # Initialize dictionary to store winning accuracy for each user
    win_accuracy = defaultdict(float)
    for user, user_predictions in predictions.items():
        correct_predictions = 0
        total_predictions = 0
        # Iterate through each match prediction of the user
        for match_id, predicted_winner in user_predictions.items():
            # Filter outcomes dataset to find match_id
            filtered_outcomes = outcomes.filter(lambda x: x['match_id'] == match_id)
            # Check if any outcome matches the match_id
            if len(filtered_outcomes) > 0:
                total_predictions += 1
                # Compare the predicted winner with the actual winning team
                if predicted_winner == filtered_outcomes[0]['winning_team']:
                    correct_predictions += 1
        # Calculate the winning accuracy for the user
        if total_predictions > 0:
            win_accuracy[user] = round(correct_predictions / total_predictions, 2)
        else:
            win_accuracy[user] = 0.0
    return win_accuracy


def get_last_5_predictions(predictions, outcomes, users):
    # Initialize last_5_predictions with user names from outcomes dataset
    last_5_predictions = {user: [None] * 5 for user in users}
    for user, user_predictions in predictions.items():
        # Get the last five matches for the user
        last_5_matches = list(user_predictions.keys())[-5:]
        # Iterate through the last five matches
        for i, match_id in enumerate(last_5_matches):
            predicted_winner = user_predictions.get(match_id)
            # Check if predicted_winner is None
            if predicted_winner is None:
                last_5_predictions[user][i] = None
                continue
            # Filter outcomes dataset to find match_id
            filtered_outcomes = outcomes.filter(lambda x: x['match_id'] == match_id)
            # Check if any outcome matches the match_id
            if len(filtered_outcomes) > 0:
                # Compare the predicted winner with the actual winning team
                outcome = filtered_outcomes[0]['winning_team']
                is_correct_prediction = predicted_winner == outcome
                last_5_predictions[user][i] = is_correct_prediction
            else:
                # No outcome found for the match
                last_5_predictions[user][i] = None
    return last_5_predictions


def main():
    # Load predictions dataset from Hugging Face Dataset repo
    predictions = load_hf_dataset("Jay-Rajput/DIS_IPL_Preds")

    # Load outcomes dataset from Hugging Face Dataset repo
    outcomes = load_hf_dataset("Jay-Rajput/DIS_IPL_Outcomes")

    users_points_dataset = load_hf_dataset("Jay-Rajput/DIS_IPL_Leads")
    # Convert dataset to list of dictionaries
    users_points = [user for user in users_points_dataset]

    # Filter predictions from match number 42 onwards
    filtered_predictions = {}
    for user_predictions in predictions:
        match_id = user_predictions.get("match_id")
        predicted_winner = user_predictions.get("predicted_winner")
        # Extract match number from the match_id
        match_number = int(match_id.split('_')[-1])
        # Check if match number is 42 or greater
        if match_number >= 50:
            # Add user predictions to filtered predictions
            user = user_predictions["user_name"]
            if user not in filtered_predictions:
                filtered_predictions[user] = {}
            filtered_predictions[user][match_id] = predicted_winner

    # Calculate winning accuracy for each user
    win_accuracy = calculate_win_accuracy(filtered_predictions, outcomes)

    users = []
    for user_data in users_points:
        for user, points in user_data.items():
            users.append(user)

    # Get last 5 predictions for each user
    last_5_predictions = get_last_5_predictions(filtered_predictions, outcomes, users)

    # Load the existing dictionary of users and points
    # users_points = {
    #     "Arpit": {"0": 45181}, 
    #     "Ganesh": {"0": 10251}, 
    #     "Haaris": {"0": 13800}, 
    #     "Jay": {"0": 23520}, 
    #     "Kishore": {"0": 16620}, 
    #     "Megha": {"0": 30420}, 
    #     "Naveein": {"0": 26100}, 
    #     "Neha": {"0": 7500}, 
    #     "Praveen": {"0": 28123}, 
    #     "Rakesh": {"0": 3416}, 
    #     "Sai": {"0": 35061}, 
    #     "Sahil": {"0": 29705}, 
    #     "Sunil": {"0": 15212}, 
    #     "Vaibhav": {"0": 11501}, 
    #     "Vinay": {"0": 23220}
    # }

    # Update each user's points with winning accuracy and last 5 predictions
    for user_data in users_points:
        for user, points in user_data.items():
            # Update the points dictionary with winning accuracy
            # win_acc = win_accuracy.get(user, 0.0)  # Get winning accuracy for the user
            # points['win_accuracy'] = round(win_acc * 100, 2)  # Convert to percentage and round to 2 decimal places
            # Add last 5 predictions
            points['last_5'] = last_5_predictions.get(user, [])

    # Print the updated dictionary with winning accuracy
    print(json.dumps(users_points[0], indent=4))

    # Save the updated dictionary to a JSON file
    # with open("leaders/users.json", "w") as json_file:
    #     json.dump(users_points, json_file, indent=4)

if __name__ == "__main__":
    main()