Upload 4 files
Browse files
scripts/evaluate_negative_rejection.py
CHANGED
|
@@ -15,10 +15,10 @@ def evaluate_negative_rejection(config):
|
|
| 15 |
noise_rate = config['noise_rate']
|
| 16 |
passage_num = config['passage_num']
|
| 17 |
|
| 18 |
-
if
|
| 19 |
-
model = GroqClient(plm=
|
| 20 |
else:
|
| 21 |
-
logging.warning(f"Skipping unknown model: {
|
| 22 |
return
|
| 23 |
|
| 24 |
# File paths
|
|
|
|
| 15 |
noise_rate = config['noise_rate']
|
| 16 |
passage_num = config['passage_num']
|
| 17 |
|
| 18 |
+
if modelname in config['models']:
|
| 19 |
+
model = GroqClient(plm=modelname)
|
| 20 |
else:
|
| 21 |
+
logging.warning(f"Skipping unknown model: {modelname}")
|
| 22 |
return
|
| 23 |
|
| 24 |
# File paths
|
scripts/evaluate_noise_robustness.py
CHANGED
|
@@ -13,9 +13,10 @@ def evaluate_noise_robustness(config):
|
|
| 13 |
result_path = config['result_path'] + 'Noise Robustness/'
|
| 14 |
noise_rate = config['noise_rate']
|
| 15 |
passage_num = config['passage_num']
|
|
|
|
| 16 |
|
| 17 |
# Iterate over each model specified in the config
|
| 18 |
-
filename = os.path.join(result_path, f'prediction_{
|
| 19 |
ensure_directory_exists(filename)
|
| 20 |
|
| 21 |
# Load existing results if file exists
|
|
@@ -56,7 +57,7 @@ def evaluate_noise_robustness(config):
|
|
| 56 |
logging.info(f"score: {scores}")
|
| 57 |
logging.info(f"Noise Robustness Accuracy: {accuracy:.2%}")
|
| 58 |
|
| 59 |
-
score_filename = os.path.join(result_path, f'scores_{
|
| 60 |
with open(score_filename, 'w') as f:
|
| 61 |
json.dump(scores, f, ensure_ascii=False, indent=4)
|
| 62 |
|
|
|
|
| 13 |
result_path = config['result_path'] + 'Noise Robustness/'
|
| 14 |
noise_rate = config['noise_rate']
|
| 15 |
passage_num = config['passage_num']
|
| 16 |
+
model_name = config['model_name']
|
| 17 |
|
| 18 |
# Iterate over each model specified in the config
|
| 19 |
+
filename = os.path.join(result_path, f'prediction_{model_name}_noise_{noise_rate}_passage_{passage_num}.json')
|
| 20 |
ensure_directory_exists(filename)
|
| 21 |
|
| 22 |
# Load existing results if file exists
|
|
|
|
| 57 |
logging.info(f"score: {scores}")
|
| 58 |
logging.info(f"Noise Robustness Accuracy: {accuracy:.2%}")
|
| 59 |
|
| 60 |
+
score_filename = os.path.join(result_path, f'scores_{model_name}_noise_{noise_rate}_passage_{passage_num}.json')
|
| 61 |
with open(score_filename, 'w') as f:
|
| 62 |
json.dump(scores, f, ensure_ascii=False, indent=4)
|
| 63 |
|
scripts/get_factual_evaluation.py
CHANGED
|
@@ -13,9 +13,10 @@ def get_factual_evaluation(config):
|
|
| 13 |
result_path = config['result_path'] + 'Counterfactual Robustness/'
|
| 14 |
noise_rate = config['noise_rate']
|
| 15 |
passage_num = config['passage_num']
|
|
|
|
| 16 |
|
| 17 |
# Iterate over each model specified in the config
|
| 18 |
-
filename = os.path.join(result_path, f'prediction_{
|
| 19 |
ensure_directory_exists(filename)
|
| 20 |
|
| 21 |
# Load existing results if file exists
|
|
@@ -61,7 +62,7 @@ def get_factual_evaluation(config):
|
|
| 61 |
scores['correct_tt'] = correct_tt
|
| 62 |
|
| 63 |
#logging.info(f"score: {scores}")
|
| 64 |
-
score_filename = os.path.join(result_path, f'scores_{
|
| 65 |
with open(score_filename, 'w') as f:
|
| 66 |
json.dump(scores, f, ensure_ascii=False, indent=4)
|
| 67 |
|
|
|
|
| 13 |
result_path = config['result_path'] + 'Counterfactual Robustness/'
|
| 14 |
noise_rate = config['noise_rate']
|
| 15 |
passage_num = config['passage_num']
|
| 16 |
+
model_name = config['model_name']
|
| 17 |
|
| 18 |
# Iterate over each model specified in the config
|
| 19 |
+
filename = os.path.join(result_path, f'prediction_{model_name}_noise_{noise_rate}_passage_{passage_num}.json')
|
| 20 |
ensure_directory_exists(filename)
|
| 21 |
|
| 22 |
# Load existing results if file exists
|
|
|
|
| 62 |
scores['correct_tt'] = correct_tt
|
| 63 |
|
| 64 |
#logging.info(f"score: {scores}")
|
| 65 |
+
score_filename = os.path.join(result_path, f'scores_{model_name}_noise_{noise_rate}_passage_{passage_num}.json')
|
| 66 |
with open(score_filename, 'w') as f:
|
| 67 |
json.dump(scores, f, ensure_ascii=False, indent=4)
|
| 68 |
|
scripts/get_prediction_result.py
CHANGED
|
@@ -12,16 +12,18 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
| 12 |
def get_prediction_result(config, data_file_name):
|
| 13 |
results = []
|
| 14 |
dataset = load_dataset(data_file_name)
|
|
|
|
|
|
|
| 15 |
# Create GroqClient instance for supported models
|
| 16 |
-
if
|
| 17 |
-
model = GroqClient(plm=
|
| 18 |
else:
|
| 19 |
-
logging.warning(f"Skipping unknown model: {
|
| 20 |
return
|
| 21 |
|
| 22 |
# Iterate through dataset and process queries
|
| 23 |
for idx, instance in enumerate(dataset[:config['num_queries']], start=0):
|
| 24 |
-
logging.info(f"Executing Query {idx + 1} for Model: {
|
| 25 |
|
| 26 |
query, ans, docs = process_data(instance, config['noise_rate'], config['passage_num'], data_file_name)
|
| 27 |
|
|
|
|
| 12 |
def get_prediction_result(config, data_file_name):
|
| 13 |
results = []
|
| 14 |
dataset = load_dataset(data_file_name)
|
| 15 |
+
modelname = config['model_name']
|
| 16 |
+
|
| 17 |
# Create GroqClient instance for supported models
|
| 18 |
+
if modelname in config['models']:
|
| 19 |
+
model = GroqClient(plm=modelname)
|
| 20 |
else:
|
| 21 |
+
logging.warning(f"Skipping unknown model: {modelname}")
|
| 22 |
return
|
| 23 |
|
| 24 |
# Iterate through dataset and process queries
|
| 25 |
for idx, instance in enumerate(dataset[:config['num_queries']], start=0):
|
| 26 |
+
logging.info(f"Executing Query {idx + 1} for Model: {modelname}")
|
| 27 |
|
| 28 |
query, ans, docs = process_data(instance, config['noise_rate'], config['passage_num'], data_file_name)
|
| 29 |
|