LCA-PORVID's picture
Upload 34 files
ebdb5af verified
from pt_variety_identifier.src.results import Results as BaseResults
import logging
import os
from joblib import dump
import pandas as pd
class Results(BaseResults):
def __init__(self, filepath, DOMAINS) -> None:
super().__init__(filepath, DOMAINS)
def process(self, cross_domain_f1, train_domain, test_results, grid_results, balance, pos_prob, ner_prob):
if cross_domain_f1 > self.best_f1_scores[train_domain]["cross_domain_f1"]:
logging.info(f"New best f1 score for {train_domain}")
self.best_f1_scores[train_domain]["cross_domain_f1"] = cross_domain_f1
self.best_f1_scores[train_domain]["test_results"] = test_results
self.best_f1_scores[train_domain]["params"] = grid_results.best_params_
self.best_f1_scores[train_domain]["balance"] = balance
self.best_f1_scores[train_domain]["pos_prob"] = pos_prob
self.best_f1_scores[train_domain]["ner_prob"] = ner_prob
logging.info(
f"Saving best cross_domain_f1 scores to file")
self.best_final_results()
with open(os.path.join(self.filepath, "models", f"{train_domain}.joblib"), "wb") as f:
dump(grid_results.best_estimator_, f)
self.best_intermediate_results({
"domain": train_domain,
"balance": balance,
"pos_prob": pos_prob,
"ner_prob": ner_prob,
"train": {
"best_score": grid_results.best_score_,
},
"test": {
'all': test_results,
'cross_domain_f1': cross_domain_f1
},
"best_params": grid_results.best_params_
})
self.other_results({
"domain": train_domain,
"balance": balance,
"pos_prob": pos_prob,
"ner_prob": ner_prob,
"train": {
"cv_results": pd.DataFrame(grid_results.cv_results_).to_json()
},
"test": test_results,
})