import pandas as pd import numpy as np from sklearn.metrics import accuracy_score def find_best_model_by_metric(models_dict, metric_name, is_smaller=True): if is_smaller: best_value = float( "inf" ) # Initialize with a very high value for loss (lower is better) else: best_value = float( "-inf" ) # Initialize with a very high value for loss (higher is better) best_seq_len = None print(f"Searching for best model based on validation '{metric_name}' loss:\n") for k, v in models_dict.items(): # current_value = v['best_score']['validation'][metric_name] current_value = v[metric_name] print(f"Model (SEQ_LEN={k}): Validation {metric_name} = {current_value}") if is_smaller: if current_value < best_value: best_value = current_value best_seq_len = k else: if current_value > best_value: best_value = current_value best_seq_len = k return best_seq_len, best_value, models_dict[best_seq_len] # def create_majority_pred(train, transition): # majority_pred = transition.idxmax(axis=1) # global_majority = train["target"].mode()[0] # preds = [] # for s in train["state"]: # if s in majority_pred.index: # preds.append(int(majority_pred.loc[s])) # else: # preds.append(int(global_majority)) # acc = accuracy_score( # train["target"], # preds # ) # # return {"markov_acc": acc, "random_guess_acc":1/q} # return acc, np.array(preds) # def evaluate_markov_chain(train, test, transition, q): # train_res, _ = create_majority_pred(train, transition) # test_res, _ = create_majority_pred(test, transition) # return { # "train_acc": train_res, # "test_acc": test_res, # "random_guess_acc": 1/q, # } # def get_accuracy_detail(data, preds): # label_acc = [] # # preds = np.array(preds) # for label in sorted(np.unique(data["ret_bin"])): # mask = (data["target"] == label) # acc = (preds[mask] == data["target"].loc[mask]).mean() # label_acc.append({ # "label": label, # "count": mask.sum(), # "accuracy": acc # }) # label_acc = pd.DataFrame(label_acc) # return label_acc # # return label_acc["accuracy"].sum()