github-actions[bot] commited on
Commit
a806362
·
0 Parent(s):

Clean Push to Hugging Face

Browse files
README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: DATE-LM Leaderboard
3
+ emoji: 🏆
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 5.23.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
+
14
+ # DATE-LM Data Attribution Leaderboards
15
+
16
+ This repo contains the leaderboard code associated with the DATE-LM Paper. The leaderboards
17
+ are hosted in [this HuggingFace Space](https://huggingface.co/spaces/DataAttributionEval/DATE-LM-Leaderboard).
18
+
19
+ The leaderboards are split into 2 broad categories: Training Data Selection and Applications.
20
+ Each category contains 3 leaderboards, as indicated below.
21
+
22
+ - Pre-Training (10K)
23
+ - Pre-Training (30K)
24
+ - Fine-Tuning
25
+
26
+ | Category | Leaderboards |
27
+ | ---------------------------------- | --------------------------------------------------- |
28
+ | Training Data Selection | Pre-Training (10K), Pre-Training (30K), Fine-Tuning |
29
+ | Applications (Toxicity / Bias) | Homogeneous, Heterogeneous |
30
+ | Applications (Factual Attribution) | Factual Attribution |
31
+
32
+ Details on the tasks corresponding to each leaderboard as well as their code pipelines
33
+ can be found in the DATE-LM paper and [Github repo](https://github.com/DataAttributionEval/DATE-LM).
34
+
35
+ ## Submission
36
+
37
+ To submit to the leaderboard: submit via the form in the "Submit Scores" tab on the HuggingFace Space page. This will open up a pull request in this repo. It will need to be merged by a member of the team in order to be displayed in the HuggingFace Space.
38
+
39
+ Information for Submission include:
40
+
41
+ - Influence Scores File
42
+ - Paper/Code/Contact Link
43
+ - Method Name and Category
44
+ - Metrics (dependent on leaderboard chosen)
45
+ - and more
46
+
47
+ ## Ranking
48
+
49
+ Each leaderboard's ranking is based on the values from the metrics, with details specified in the description of each leaderboard. To summarize, the leaderboards are ranked using the following schemes:
50
+
51
+ | Leaderboard | Ranking Metric |
52
+ | -------------------------------------- | -------------------------------------------------- |
53
+ | Pre-Training (10K), Pre-Training (30K) | highest score in **avg** column |
54
+ | Fine-Tuning | average of **MMLU**, **GSM8K**, and **BBH** scores |
55
+ | Applications (Toxicity / Bias) | highest score in **AUPRC** column |
56
+ | Applications (Factual Attribution) | average of **Recall@50** and **MRR** scores |
57
+
58
+ ## Repo Files
59
+
60
+ Overview of Repo files:
61
+
62
+ - app file: `app.py`
63
+ - mappings files: `filePaths.py`, `nameMapping.py`
64
+ - Github PR creation file: `pr.py`
65
+ - submissions storage: `submissions` folder
66
+ - Note: each submission has its own dedicated folder containing `metadata.json` and the influence scores
67
+ - leaderboards data: `data` folder
68
+ - github workflow files:
69
+ - Add submission into leaderboard json upon merge: `merge-data.yml`, `scripts/merge_data.py`
70
+ - Sync repo with HuggingFace Space: `push-to-hf.yml`
app.py ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_leaderboard import Leaderboard
3
+ import pandas as pd
4
+ import json
5
+ import os
6
+
7
+ import filePaths
8
+ import nameMapping
9
+ import pr
10
+
11
+ ##################### Leaderboard Paths + Variables #####################
12
+
13
+ pathLst = filePaths.PATHLIST
14
+ pretrain_10K, pretrain_30K, finetune = pathLst[0], pathLst[1], pathLst[2]
15
+ toxicity_homogeneous, toxicity_heterogeneous, factual = pathLst[3], pathLst[4], pathLst[5]
16
+
17
+ import nameMapping
18
+ leaderboard_names = nameMapping.LEADERBOARD_NAMES
19
+ trainingNamesSet = nameMapping.TRAINING_LEADERBOARDS
20
+
21
+ ########################## Data Loading ###########################
22
+
23
+ def load_leaderboard_data(file_path):
24
+ """
25
+ Load leaderboard data from JSON file.
26
+ """
27
+ if os.path.exists(file_path):
28
+ with open(file_path, 'r') as f:
29
+ return json.load(f)
30
+ return []
31
+
32
+ def add_ranking_column(data, id):
33
+ """
34
+ Add ranking column dynamically for display based on selected metric aggregation.
35
+ """
36
+ if id == 'toxicity': # Toxicity: AUPRC
37
+ key_fn = lambda x: x["AUPRC"]
38
+ elif id == 'factual': # Factual: Avg of Recall@50 and MRR
39
+ key_fn = lambda x: (x["Recall@50"]+x["MRR"])/2
40
+ elif id == 'pretrain': # Pretrain: Avg
41
+ key_fn = lambda x: x["avg"]
42
+ else: # FineTune: Avg of Metrics
43
+ key_fn = lambda x: (x["MMLU"]+x["GSM8K"]+x["BBH"])/3
44
+
45
+ sorted_data = sorted(data, key=key_fn, reverse=True)
46
+ for index, entry in enumerate(sorted_data):
47
+ entry["Rank"] = index + 1
48
+ return sorted_data
49
+
50
+ def load_data(filePath, id):
51
+ """
52
+ Load initial leaderboard data.
53
+ """
54
+ return pd.DataFrame(add_ranking_column(load_leaderboard_data(filePath), id))
55
+
56
+ pretrain_10K_data = load_data(pretrain_10K, "pretrain")
57
+ pretrain_30K_data = load_data(pretrain_30K, "pretrain")
58
+ finetune_data = load_data(finetune, "finetune")
59
+ homogeneous_data = load_data(toxicity_homogeneous, "toxicity")
60
+ heterogeneous_data = load_data(toxicity_heterogeneous, "toxicity")
61
+ factual_data = load_data(factual, "factual")
62
+
63
+ ########################## Leaderboard Columns + Helpers ###########################
64
+
65
+ def get_leaderboard_columns(leaderboard_name):
66
+ """
67
+ Returns the Expected Columns for Leaderboard
68
+ """
69
+ leaderboardNameMap = nameMapping.DROPDOWN_NAME_MAPPING
70
+ if leaderboard_name in leaderboardNameMap["toxicity"]:
71
+ return nameMapping.TOXICITY_COLS
72
+ elif leaderboard_name in leaderboardNameMap["factual"]:
73
+ return nameMapping.FACTUAL_COLS
74
+ elif leaderboard_name in leaderboardNameMap["finetune"]:
75
+ return nameMapping.FINETUNE_COLS
76
+ else: # pretrain
77
+ return nameMapping.PRETRAIN_COLS
78
+
79
+ def get_model_sizes(leaderboard_name):
80
+ """
81
+ Returns Model Sizes for Applications Leaderboards
82
+ """
83
+ nameFileMapping = {"Homogeneous": toxicity_homogeneous,
84
+ "Heterogeneous": toxicity_heterogeneous,
85
+ "Factual Attribution": factual}
86
+ leaderboardJson = load_leaderboard_data(nameFileMapping[leaderboard_name])
87
+ modelSizes = set()
88
+
89
+ for row in leaderboardJson:
90
+ modelSizes.add(row["Model Size"])
91
+
92
+ return ['All'] + list(modelSizes)
93
+
94
+ ################### Submission Helper Functions #############################
95
+
96
+ def update_fields(leaderboard):
97
+ """
98
+ Determine visibility of group / display additional metrics in submission area.
99
+ """
100
+ nameMap = nameMapping.DROPDOWN_NAME_MAPPING
101
+ return {
102
+ pretrain_group: gr.update(visible=(leaderboard in nameMap['pretrain'])),
103
+ finetune_group: gr.update(visible=(leaderboard in nameMap['finetune'])),
104
+ toxicity_group: gr.update(visible=(leaderboard in nameMap['toxicity'])),
105
+ factual_group: gr.update(visible=(leaderboard in nameMap['factual']))
106
+ }
107
+
108
+ def validate_inputs(*inputFields):
109
+ (leaderboard_dropdown, method_name, method_dropdown, model_name, model_size, paper_link, scores,
110
+ pre_avg, pre_sciq, pre_arc_easy, pre_arc_chall, pre_logiqa,
111
+ pre_boolq, pre_hellaswag, pre_piqa, pre_wino, pre_open,
112
+ fine_mmlu, fine_gsm, fine_bbh,
113
+ tox_toxicChat, tox_xsTest, tox_jbb, tox_auprc,
114
+ fac_recall, fac_mrr) = inputFields
115
+
116
+ if not all([leaderboard_dropdown, model_name, method_name, method_dropdown, model_size]):
117
+ raise gr.Error("All fields must be filled out and with the correct type.")
118
+
119
+ if not paper_link:
120
+ raise gr.Error("Please fill in out the Paper/Code/Contact Link info.")
121
+
122
+ if not scores:
123
+ raise gr.Error("Please upload data attribution scores in .pt file.")
124
+
125
+ # Check Metrics Non-Empty
126
+ nameMap = nameMapping.DROPDOWN_NAME_MAPPING
127
+ # nameMap['pretrain'] nameMap['finetune'] nameMap['finetune'] nameMap['factual']
128
+ if leaderboard_dropdown in nameMap['pretrain']:
129
+ metricsList = [pre_avg, pre_sciq, pre_arc_easy, pre_arc_chall, pre_logiqa, pre_boolq, pre_hellaswag, pre_piqa, pre_wino, pre_open]
130
+ elif leaderboard_dropdown in nameMap['finetune']:
131
+ metricsList = [fine_mmlu, fine_gsm, fine_bbh]
132
+ elif leaderboard_dropdown in nameMap['toxicity']:
133
+ metricsList = [tox_toxicChat, tox_xsTest, tox_jbb, tox_auprc]
134
+ elif leaderboard_dropdown in nameMap['factual']:
135
+ metricsList = [fac_recall, fac_mrr]
136
+
137
+ if not all(metricsList):
138
+ raise gr.Error("Metrics must be filled out.")
139
+ if not all(metric > 0 for metric in metricsList):
140
+ raise gr.Error("Metrics must be positive.")
141
+
142
+
143
+ ######## Dynamically Update Ranking when Filtering on Model Size ###############
144
+
145
+ def update_rankings(filtered_df, id):
146
+ df_with_rank = filtered_df.copy() # create copy to avoid modifying original
147
+
148
+ if id == 'toxicity': # Toxicity: AUPRC
149
+ df_with_rank = df_with_rank.sort_values(by="AUPRC", ascending=False)
150
+ elif id == 'factual': # Factual: Avg of Recall@50 and MRR
151
+ average_scores = df_with_rank[["Recall@50", "MRR"]].mean(axis=1)
152
+ sorted_index = average_scores .sort_values(ascending=False).index
153
+ df_with_rank = df_with_rank.loc[sorted_index]
154
+
155
+ df_with_rank["Rank"] = range(1, len(df_with_rank) + 1) # Add rank column
156
+
157
+ return df_with_rank
158
+
159
+ def filter_and_rank(df, filter_value, id):
160
+ if filter_value == "All":
161
+ filtered_df = df
162
+ else:
163
+ filtered_df = df[df["Model Size"] == filter_value]
164
+ return update_rankings(filtered_df, id)
165
+
166
+ def rerank_leaderboard(filter_value, dfPath, idNum):
167
+ df = load_data(dfPath, idNum)
168
+ filtered_ranked_df = filter_and_rank(df, filter_value, idNum)
169
+ return filtered_ranked_df
170
+
171
+ #################### Leaderboards Code ##############################
172
+
173
+ with gr.Blocks(css="""
174
+ body, .gradio-container {
175
+ font-family: 'roboto';
176
+ }
177
+ """) as demo:
178
+ gr.Markdown("""
179
+ # Data Attribution Methods Leaderboards
180
+ """)
181
+ gr.Markdown(f"""
182
+ Survey and ranking of data attribution methods on data selection and
183
+ downstream application tasks for the Date-LM Evaluation paper.
184
+
185
+ **Leaderboard Submission**:
186
+ - To submit your team's scores, click on the "Submit Scores" tab.
187
+
188
+ **Data Attribution Method Categories**:
189
+ - Gradient (ex. GradDot, GradSim, LESS, DataInf, EKFAC)
190
+ - Similarity (ex. RepSim)
191
+ - Modeling (ex. MATES)
192
+ - Lexical (ex. BM25)
193
+ - Baseline (ex. GradSafe, OpenAI Moderation, LLM Classifiers)
194
+ - Other
195
+
196
+ **Search Feature**:
197
+ - Input the name of the method you would like to search / filter for, and
198
+ then press "Enter". The original row from the leaderboard table will be displayed.
199
+ """
200
+ )
201
+
202
+ with gr.Tabs():
203
+ with gr.TabItem("Training Data Selection"):
204
+ with gr.Tabs(): # Subtabs container
205
+ with gr.TabItem("Pre-Training (10K)"): # Subtab
206
+ gr.Markdown("""DATE-LM Task Description: Trained pythia-1B model on Fineweb using
207
+ Lambada reference dataset. Testing results conducted on 10K step checkpoint.
208
+
209
+ Ranking Metric: highest score in **avg** column""") # description
210
+ l1 = Leaderboard(
211
+ value=pd.DataFrame(pretrain_10K_data),
212
+ select_columns=get_leaderboard_columns("Pre-Training (10K)"),
213
+ search_columns=['Method'],
214
+ filter_columns=["Attribution Method Type", "Method", "avg"],
215
+ )
216
+ with gr.TabItem("Pre-Training (30K)"):
217
+ gr.Markdown("""DATE-LM Task Description: Trained pythia-1B model on Fineweb using
218
+ Lambada reference dataset. Testing results conducted on 30K step checkpoint.
219
+
220
+ Ranking Metric: highest score in **avg** column""")
221
+ l2 = Leaderboard(
222
+ value=pd.DataFrame(pretrain_30K_data),
223
+ select_columns=get_leaderboard_columns("Pre-Training (30K)"),
224
+ search_columns=["Method"],
225
+ filter_columns=["Attribution Method Type", "Method", "avg"],
226
+ )
227
+ with gr.TabItem("Fine-Tuning"):
228
+ gr.Markdown("""DATE-LM Task Description: Targeted instruction tuning setting.
229
+ Given a diverse instruction set and a eval dataset, we select data that would yield
230
+ optimal performance on the eval data. For this task, the training data pool is
231
+ Tulu3 (unfiltered) and the eval data is MMLU, GSM8K, and BBH.
232
+
233
+ Ranking Metric: average of the **MMLU**, **GSM8K**, and **BBH** scores""")
234
+ l3 = Leaderboard(
235
+ value=pd.DataFrame(finetune_data),
236
+ select_columns=get_leaderboard_columns("Fine-Tuning"),
237
+ search_columns=["Method"],
238
+ filter_columns=["Attribution Method Type", "MMLU", "GSM8K", "BBH"],
239
+ )
240
+ with gr.TabItem("Applications"):
241
+ with gr.Tabs():
242
+ with gr.TabItem("Toxicity/Bias"):
243
+ with gr.Tabs():
244
+ with gr.TabItem("Homogeneous"):
245
+ gr.Markdown("""DATE-LM Task Description: This leaderboard presents detection AUPRC results of baseline methods and data attribution methods in the homogenous setting
246
+ (i.e., detecting small amount of toxic/biased data embedded into larger benign data).
247
+
248
+ Ranking Metric: **AUPRC** (an average of ToxicChat, XSTest-response, JailBreakBench)""")
249
+ category_filter4 = gr.Dropdown(
250
+ choices=get_model_sizes("Homogeneous"),
251
+ value="All",
252
+ label="Filter Model Size"
253
+ ) # ensures page placement above leaderboard
254
+ l4 = Leaderboard(
255
+ value=pd.DataFrame(homogeneous_data),
256
+ select_columns=get_leaderboard_columns("Homogeneous"),
257
+ search_columns=["Method"],
258
+ filter_columns=["Attribution Method Type", "Model", "AUPRC"],
259
+ )
260
+ data_path4 = gr.Textbox(value=toxicity_homogeneous, visible=False)
261
+ id_str4 = gr.Textbox(value="toxicity", visible=False)
262
+ category_filter4.change(
263
+ fn=rerank_leaderboard,
264
+ inputs=[category_filter4, data_path4, id_str4],
265
+ outputs=[l4]
266
+ )
267
+ with gr.TabItem("Heterogeneous"):
268
+ gr.Markdown("""DATE-LM Task Description: This leaderboard presents detection AUPRC results of baseline methods and data attribution methods in the heterogeneous setting
269
+ (i.e., safety-aligned examples that resemble unsafe data in format but contain safe responses).
270
+
271
+ Ranking Metric: **AUPRC** (an average of ToxicChat, XSTest-response, JailBreakBench)""")
272
+ category_filter5 = gr.Dropdown(
273
+ choices=get_model_sizes("Heterogeneous"),
274
+ value="All",
275
+ label="Filter Model Size"
276
+ )
277
+ l5 = Leaderboard(
278
+ value=pd.DataFrame(heterogeneous_data),
279
+ select_columns=get_leaderboard_columns("Heterogeneous"),
280
+ search_columns=["Method"],
281
+ filter_columns=["Attribution Method Type", "Model", "AUPRC"]
282
+ )
283
+ data_path5 = gr.Textbox(value=toxicity_heterogeneous, visible=False)
284
+ id_str5 = gr.Textbox(value="toxicity", visible=False)
285
+ category_filter5.change(
286
+ fn=rerank_leaderboard,
287
+ inputs=[category_filter5, data_path5, id_str5],
288
+ outputs=[l5]
289
+ )
290
+ with gr.TabItem("Factual Attribution"):
291
+ gr.Markdown("""DATE-LM Task Description: Identifying the specific training examples that support a model's generated facts.
292
+
293
+ Ranking Metric: average of **Recall@50** and **MRR**""")
294
+ category_filter6 = gr.Dropdown(
295
+ choices=get_model_sizes("Factual Attribution"),
296
+ value="All",
297
+ label="Filter Model Size"
298
+ )
299
+ l6 = Leaderboard(
300
+ value=pd.DataFrame(factual_data),
301
+ select_columns=get_leaderboard_columns("Factual Attribution"),
302
+ search_columns=["Method"],
303
+ filter_columns=["Attribution Method Type", "Model", "Recall@50", "MRR"],
304
+ )
305
+ data_path6 = gr.Textbox(value=factual, visible=False)
306
+ id_str6 = gr.Textbox(value="factual", visible=False)
307
+ category_filter6.change(
308
+ fn=rerank_leaderboard,
309
+ inputs=[category_filter6, data_path6, id_str6],
310
+ outputs=[l6]
311
+ )
312
+ with gr.TabItem("Submit Scores 🚀"):
313
+ with gr.Column():
314
+ gr.Markdown("""### Submit Your Score to a Leaderboard
315
+
316
+ Note: Please first select the leaderboard you would like to submit to. This will display the fields for the
317
+ corresponding metrics that are needed.
318
+ """)
319
+
320
+ leaderboard_dropdown = gr.Dropdown(
321
+ label="Select Leaderboard",
322
+ choices=nameMapping.LEADERBOARD_NAMES,
323
+ value=None
324
+ )
325
+
326
+ method_name = gr.Textbox(label="Method Name")
327
+ method_dropdown = gr.Dropdown(
328
+ label="Method Type",
329
+ choices=["Gradient", "Similarity", "Representation-Based", "Modeling", "Baseline", "Lexical", "Other"],
330
+ value=None
331
+ )
332
+
333
+ # model_size = gr.Dropdown(
334
+ # label="Model Size",
335
+ # choices=["400M", "1B", "3B", "7B"],
336
+ # value=None
337
+ # )
338
+ model_name = gr.Textbox(label="Model Name")
339
+ model_size = gr.Textbox(label="Model Size (ex. 410M, 1B, 8B)")
340
+
341
+ paper_link = gr.Textbox(label="Paper/Code/Contact Link")
342
+
343
+ scores = gr.File(label='Upload Data Attribution Scores File (.pt)', height=150, file_types=[".pt"])
344
+
345
+ # Dynamically Display Needed Fields for Each Leaderboard Type
346
+
347
+ with gr.Column(visible=False) as pretrain_group:
348
+ pre_avg = gr.Number(label="Avg")
349
+ pre_sciq = gr.Number(label="sciq")
350
+ pre_arc_easy = gr.Number(label="arc_easy")
351
+ pre_arc_chall = gr.Number(label="arc_challenge")
352
+ pre_logiqa = gr.Number(label="logiqa")
353
+ pre_boolq = gr.Number(label="boolq")
354
+ pre_hellaswag = gr.Number(label="hellaswag")
355
+ pre_piqa = gr.Number(label="piqa")
356
+ pre_wino = gr.Number(label="winogrande")
357
+ pre_open = gr.Number(label="openbookqa")
358
+
359
+ with gr.Column(visible=False) as finetune_group:
360
+ fine_mmlu = gr.Number(label="MMLU")
361
+ fine_gsm = gr.Number(label="GSM8K")
362
+ fine_bbh = gr.Number(label="BBH")
363
+
364
+ with gr.Column(visible=False) as toxicity_group:
365
+ tox_toxicChat = gr.Number(label="ToxicChat")
366
+ tox_xsTest = gr.Number(label="XSTest-response")
367
+ tox_jbb = gr.Number(label="JailBreakBench")
368
+ tox_auprc = gr.Number(label="AUPRC")
369
+
370
+ with gr.Column(visible=False) as factual_group:
371
+ fac_recall = gr.Number(label="Recall@50")
372
+ fac_mrr = gr.Number(label="MRR")
373
+
374
+ # with gr.Group(visible=False) as training_group:
375
+ # acc = gr.Number(label="Accuracy")
376
+
377
+ # applications_group = gr.Column(visible=False)
378
+ # with applications_group:
379
+ # f1_score = gr.Number(label="F1")
380
+ # auprc_score = gr.Number(label="AUPRC")
381
+ # acc1 = gr.Number(label="Accuracy")
382
+
383
+ # Submit button
384
+ submit_button = gr.Button("Submit")
385
+
386
+ leaderboard_dropdown.change(update_fields, inputs=[leaderboard_dropdown], outputs=[pretrain_group, finetune_group, toxicity_group, factual_group])
387
+
388
+ # information lists
389
+ inputsList = [leaderboard_dropdown, method_name, method_dropdown, model_name, model_size, paper_link, scores, \
390
+ pre_avg, pre_sciq, pre_arc_easy, pre_arc_chall, pre_logiqa, pre_boolq, pre_hellaswag, pre_piqa, pre_wino, pre_open, \
391
+ fine_mmlu, fine_gsm, fine_bbh, \
392
+ tox_toxicChat, tox_xsTest, tox_jbb, tox_auprc, \
393
+ fac_recall, fac_mrr]
394
+
395
+ submit_button.click(
396
+ validate_inputs, inputs=inputsList, outputs=[]
397
+ ).success(fn=pr.submit_and_open_PR, inputs=inputsList, outputs=[gr.Textbox(label="Opened PR on Github")])
398
+
399
+ if __name__ == "__main__":
400
+ demo.launch(debug=True)
data/Applications/factual.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {"Rank": 0, "Method": "BM25", "Attribution Method Type": "Lexical", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.305, "MRR": 0.771, "Paper/Code/Contact Link": ""},
3
+ {"Rank": 0, "Method": "Rep Sim", "Attribution Method Type": "Similarity", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.376, "MRR": 0.790, "Paper/Code/Contact Link": ""},
4
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.466, "MRR": 0.768, "Paper/Code/Contact Link": ""},
5
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.493, "MRR": 0.836, "Paper/Code/Contact Link": ""},
6
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.500, "MRR": 0.772, "Paper/Code/Contact Link": ""},
7
+ {"Rank": 0, "Method": "DataInf", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.472, "MRR": 0.765, "Paper/Code/Contact Link": ""},
8
+ {"Rank": 0, "Method": "EKFAC", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.465, "MRR": 0.766, "Paper/Code/Contact Link": ""},
9
+
10
+ {"Rank": 0, "Method": "BM25", "Attribution Method Type": "Lexical", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.236, "MRR": 0.683, "Paper/Code/Contact Link": ""},
11
+ {"Rank": 0, "Method": "Rep Sim", "Attribution Method Type": "Similarity", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.552, "MRR": 0.758, "Paper/Code/Contact Link": ""},
12
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.465, "MRR": 0.786, "Paper/Code/Contact Link": ""},
13
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.584, "MRR": 0.839, "Paper/Code/Contact Link": ""},
14
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.573, "MRR": 0.807, "Paper/Code/Contact Link": ""},
15
+ {"Rank": 0, "Method": "DataInf", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.475, "MRR": 0.785, "Paper/Code/Contact Link": ""},
16
+ {"Rank": 0, "Method": "EKFAC", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.485, "MRR": 0.881, "Paper/Code/Contact Link": ""},
17
+
18
+ {"Rank": 0, "Method": "BM25", "Attribution Method Type": "Lexical", "Model": "Llama-3.1-8B", "Model Size": "8B", "Recall@50": 0.313, "MRR": 0.826, "Paper/Code/Contact Link": ""},
19
+ {"Rank": 0, "Method": "Rep Sim", "Attribution Method Type": "Similarity", "Model": "Llama-3.1-8B", "Model Size": "8B", "Recall@50": 0.625, "MRR": 0.965, "Paper/Code/Contact Link": ""},
20
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "Recall@50": 0.226, "MRR": 0.303, "Paper/Code/Contact Link": ""},
21
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "Recall@50": 0.626, "MRR": 0.970, "Paper/Code/Contact Link": ""},
22
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "Recall@50": 0.491, "MRR": 0.991, "Paper/Code/Contact Link": ""}
23
+ ]
data/Applications/toxicity-heterogeneous.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {"Rank": 0, "Method": "GradSafe", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.347, "XSTest-response": 0.491, "JailBreakBench": 0.802, "AUPRC": 0.546, "Paper/Code/Contact Link": ""},
3
+ {"Rank": 0, "Method": "OpenAI Moderation", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.214, "XSTest-response": 0.358, "JailBreakBench": 0.185, "AUPRC": 0.253, "Paper/Code/Contact Link": ""},
4
+ {"Rank": 0, "Method": "Llama-Guard-3-8B", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.423, "XSTest-response": 0.910, "JailBreakBench": 0.966, "AUPRC": 0.766, "Paper/Code/Contact Link": ""},
5
+ {"Rank": 0, "Method": "Wildguard", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.551, "XSTest-response": 0.928, "JailBreakBench": 0.972, "AUPRC": 0.817, "Paper/Code/Contact Link": ""},
6
+ {"Rank": 0, "Method": "ShieldGemma-2b", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.165, "XSTest-response": 0.731, "JailBreakBench": 0.552, "AUPRC": 0.483, "Paper/Code/Contact Link": ""},
7
+ {"Rank": 0, "Method": "AEGIS-Defensive", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.376, "XSTest-response": 0.274, "JailBreakBench": 0.294, "AUPRC": 0.314, "Paper/Code/Contact Link": ""},
8
+
9
+ {"Rank": 0, "Method": "Rep-Sim", "Attribution Method Type": "Similarity", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.335, "XSTest-response": 0.580, "JailBreakBench": 0.578, "AUPRC": 0.498, "Paper/Code/Contact Link": ""},
10
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.194, "XSTest-response": 0.389, "JailBreakBench": 0.396, "AUPRC": 0.326, "Paper/Code/Contact Link": ""},
11
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.362, "XSTest-response": 0.601, "JailBreakBench": 0.434, "AUPRC": 0.466, "Paper/Code/Contact Link": ""},
12
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.326, "XSTest-response": 0.734, "JailBreakBench": 0.484, "AUPRC": 0.515, "Paper/Code/Contact Link": ""},
13
+ {"Rank": 0, "Method": "DataInf", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.195, "XSTest-response": 0.392, "JailBreakBench": 0.396, "AUPRC": 0.328, "Paper/Code/Contact Link": ""},
14
+ {"Rank": 0, "Method": "EKFAC", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.221, "XSTest-response": 0.344, "JailBreakBench": 0.373, "AUPRC": 0.313, "Paper/Code/Contact Link": ""},
15
+
16
+ {"Rank": 0, "Method": "Rep-Sim", "Attribution Method Type": "Similarity", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.598, "XSTest-response": 0.733, "JailBreakBench": 0.461, "AUPRC": 0.597, "Paper/Code/Contact Link": ""},
17
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.195, "XSTest-response": 0.341, "JailBreakBench": 0.369, "AUPRC": 0.302, "Paper/Code/Contact Link": ""},
18
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.228, "XSTest-response": 0.772, "JailBreakBench": 0.531, "AUPRC": 0.510, "Paper/Code/Contact Link": ""},
19
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.230, "XSTest-response": 0.616, "JailBreakBench": 0.596, "AUPRC": 0.481, "Paper/Code/Contact Link": ""},
20
+ {"Rank": 0, "Method": "DataInf", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.196, "XSTest-response": 0.347, "JailBreakBench": 0.369, "AUPRC": 0.304, "Paper/Code/Contact Link": ""},
21
+ {"Rank": 0, "Method": "EKFAC", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.239, "XSTest-response": 0.398, "JailBreakBench": 0.369, "AUPRC": 0.334, "Paper/Code/Contact Link": ""},
22
+
23
+ {"Rank": 0, "Method": "Rep-Sim", "Attribution Method Type": "Similarity", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.602, "XSTest-response": 0.638, "JailBreakBench": 0.514, "AUPRC": 0.585, "Paper/Code/Contact Link": ""},
24
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.289, "XSTest-response": 0.328, "JailBreakBench": 0.085, "AUPRC": 0.234, "Paper/Code/Contact Link": ""},
25
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.223, "XSTest-response": 0.703, "JailBreakBench": 0.401, "AUPRC": 0.442, "Paper/Code/Contact Link": ""},
26
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.258, "XSTest-response": 0.744, "JailBreakBench": 0.114, "AUPRC": 0.372, "Paper/Code/Contact Link": ""}
27
+ ]
data/Applications/toxicity-homogeneous.json ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "Rank": 0,
4
+ "Method": "GradSafe",
5
+ "Attribution Method Type": "Baseline",
6
+ "Model": "N/A",
7
+ "Model Size": "N/A",
8
+ "ToxicChat": 0.347,
9
+ "XSTest-response": 0.491,
10
+ "JailBreakBench": 0.802,
11
+ "AUPRC": 0.546,
12
+ "Paper/Code/Contact Link": ""
13
+ },
14
+ {
15
+ "Rank": 0,
16
+ "Method": "OpenAI Moderation",
17
+ "Attribution Method Type": "Baseline",
18
+ "Model": "N/A",
19
+ "Model Size": "N/A",
20
+ "ToxicChat": 0.243,
21
+ "XSTest-response": 0.378,
22
+ "JailBreakBench": 0.187,
23
+ "AUPRC": 0.269,
24
+ "Paper/Code/Contact Link": ""
25
+ },
26
+ {
27
+ "Rank": 0,
28
+ "Method": "Llama-Guard-3-8B",
29
+ "Attribution Method Type": "Baseline",
30
+ "Model": "N/A",
31
+ "Model Size": "N/A",
32
+ "ToxicChat": 0.445,
33
+ "XSTest-response": 0.916,
34
+ "JailBreakBench": 0.985,
35
+ "AUPRC": 0.782,
36
+ "Paper/Code/Contact Link": ""
37
+ },
38
+ {
39
+ "Rank": 0,
40
+ "Method": "Wildguard",
41
+ "Attribution Method Type": "Baseline",
42
+ "Model": "N/A",
43
+ "Model Size": "N/A",
44
+ "ToxicChat": 0.56,
45
+ "XSTest-response": 0.93,
46
+ "JailBreakBench": 0.989,
47
+ "AUPRC": 0.827,
48
+ "Paper/Code/Contact Link": ""
49
+ },
50
+ {
51
+ "Rank": 0,
52
+ "Method": "ShieldGemma-2b",
53
+ "Attribution Method Type": "Baseline",
54
+ "Model": "N/A",
55
+ "Model Size": "N/A",
56
+ "ToxicChat": 0.17,
57
+ "XSTest-response": 0.74,
58
+ "JailBreakBench": 0.664,
59
+ "AUPRC": 0.525,
60
+ "Paper/Code/Contact Link": ""
61
+ },
62
+ {
63
+ "Rank": 0,
64
+ "Method": "AEGIS-Defensive",
65
+ "Attribution Method Type": "Baseline",
66
+ "Model": "N/A",
67
+ "Model Size": "N/A",
68
+ "ToxicChat": 0.376,
69
+ "XSTest-response": 0.274,
70
+ "JailBreakBench": 0.346,
71
+ "AUPRC": 0.332,
72
+ "Paper/Code/Contact Link": ""
73
+ },
74
+ {
75
+ "Rank": 0,
76
+ "Method": "Rep-Sim",
77
+ "Attribution Method Type": "Similarity",
78
+ "Model": "Pythia-1b",
79
+ "Model Size": "1B",
80
+ "ToxicChat": 0.374,
81
+ "XSTest-response": 0.657,
82
+ "JailBreakBench": 0.986,
83
+ "AUPRC": 0.672,
84
+ "Paper/Code/Contact Link": ""
85
+ },
86
+ {
87
+ "Rank": 0,
88
+ "Method": "Grad Dot",
89
+ "Attribution Method Type": "Gradient",
90
+ "Model": "Pythia-1b",
91
+ "Model Size": "1B",
92
+ "ToxicChat": 0.084,
93
+ "XSTest-response": 0.483,
94
+ "JailBreakBench": 0.999,
95
+ "AUPRC": 0.522,
96
+ "Paper/Code/Contact Link": ""
97
+ },
98
+ {
99
+ "Rank": 0,
100
+ "Method": "Grad Sim",
101
+ "Attribution Method Type": "Gradient",
102
+ "Model": "Pythia-1b",
103
+ "Model Size": "1B",
104
+ "ToxicChat": 0.106,
105
+ "XSTest-response": 0.647,
106
+ "JailBreakBench": 1.0,
107
+ "AUPRC": 0.584,
108
+ "Paper/Code/Contact Link": ""
109
+ },
110
+ {
111
+ "Rank": 0,
112
+ "Method": "LESS",
113
+ "Attribution Method Type": "Gradient",
114
+ "Model": "Pythia-1b",
115
+ "Model Size": "1B",
116
+ "ToxicChat": 0.388,
117
+ "XSTest-response": 0.724,
118
+ "JailBreakBench": 1.0,
119
+ "AUPRC": 0.704,
120
+ "Paper/Code/Contact Link": ""
121
+ },
122
+ {
123
+ "Rank": 0,
124
+ "Method": "DataInf",
125
+ "Attribution Method Type": "Gradient",
126
+ "Model": "Pythia-1b",
127
+ "Model Size": "1B",
128
+ "ToxicChat": 0.204,
129
+ "XSTest-response": 0.487,
130
+ "JailBreakBench": 0.999,
131
+ "AUPRC": 0.563,
132
+ "Paper/Code/Contact Link": ""
133
+ },
134
+ {
135
+ "Rank": 0,
136
+ "Method": "EKFAC",
137
+ "Attribution Method Type": "Gradient",
138
+ "Model": "Pythia-1b",
139
+ "Model Size": "1B",
140
+ "ToxicChat": 0.216,
141
+ "XSTest-response": 0.497,
142
+ "JailBreakBench": 1.0,
143
+ "AUPRC": 0.571,
144
+ "Paper/Code/Contact Link": ""
145
+ },
146
+ {
147
+ "Rank": 0,
148
+ "Method": "Rep-Sim",
149
+ "Attribution Method Type": "Similarity",
150
+ "Model": "Llama-3.2-1B",
151
+ "Model Size": "1B",
152
+ "ToxicChat": 0.632,
153
+ "XSTest-response": 0.792,
154
+ "JailBreakBench": 0.854,
155
+ "AUPRC": 0.759,
156
+ "Paper/Code/Contact Link": ""
157
+ },
158
+ {
159
+ "Rank": 0,
160
+ "Method": "Grad Dot",
161
+ "Attribution Method Type": "Gradient",
162
+ "Model": "Llama-3.2-1B",
163
+ "Model Size": "1B",
164
+ "ToxicChat": 0.212,
165
+ "XSTest-response": 0.437,
166
+ "JailBreakBench": 1.0,
167
+ "AUPRC": 0.55,
168
+ "Paper/Code/Contact Link": ""
169
+ },
170
+ {
171
+ "Rank": 0,
172
+ "Method": "Grad Sim",
173
+ "Attribution Method Type": "Gradient",
174
+ "Model": "Llama-3.2-1B",
175
+ "Model Size": "1B",
176
+ "ToxicChat": 0.259,
177
+ "XSTest-response": 0.798,
178
+ "JailBreakBench": 1.0,
179
+ "AUPRC": 0.686,
180
+ "Paper/Code/Contact Link": ""
181
+ },
182
+ {
183
+ "Rank": 0,
184
+ "Method": "LESS",
185
+ "Attribution Method Type": "Gradient",
186
+ "Model": "Llama-3.2-1B",
187
+ "Model Size": "1B",
188
+ "ToxicChat": 0.294,
189
+ "XSTest-response": 0.792,
190
+ "JailBreakBench": 1.0,
191
+ "AUPRC": 0.695,
192
+ "Paper/Code/Contact Link": ""
193
+ },
194
+ {
195
+ "Rank": 0,
196
+ "Method": "DataInf",
197
+ "Attribution Method Type": "Gradient",
198
+ "Model": "Llama-3.2-1B",
199
+ "Model Size": "1B",
200
+ "ToxicChat": 0.215,
201
+ "XSTest-response": 0.442,
202
+ "JailBreakBench": 1.0,
203
+ "AUPRC": 0.552,
204
+ "Paper/Code/Contact Link": ""
205
+ },
206
+ {
207
+ "Rank": 0,
208
+ "Method": "EKFAC",
209
+ "Attribution Method Type": "Gradient",
210
+ "Model": "Llama-3.2-1B",
211
+ "Model Size": "1B",
212
+ "ToxicChat": 0.264,
213
+ "XSTest-response": 0.562,
214
+ "JailBreakBench": 1.0,
215
+ "AUPRC": 0.609,
216
+ "Paper/Code/Contact Link": ""
217
+ },
218
+ {
219
+ "Rank": 0,
220
+ "Method": "Rep-Sim",
221
+ "Attribution Method Type": "Similarity",
222
+ "Model": "Llama-3.1-8B",
223
+ "Model Size": "8B",
224
+ "ToxicChat": 0.989,
225
+ "XSTest-response": 0.999,
226
+ "JailBreakBench": 0.98,
227
+ "AUPRC": 0.989,
228
+ "Paper/Code/Contact Link": ""
229
+ },
230
+ {
231
+ "Rank": 0,
232
+ "Method": "Grad Dot",
233
+ "Attribution Method Type": "Gradient",
234
+ "Model": "Llama-3.1-8B",
235
+ "Model Size": "8B",
236
+ "ToxicChat": 0.47,
237
+ "XSTest-response": 0.368,
238
+ "JailBreakBench": 0.274,
239
+ "AUPRC": 0.371,
240
+ "Paper/Code/Contact Link": ""
241
+ },
242
+ {
243
+ "Rank": 0,
244
+ "Method": "Grad Sim",
245
+ "Attribution Method Type": "Gradient",
246
+ "Model": "Llama-3.1-8B",
247
+ "Model Size": "8B",
248
+ "ToxicChat": 0.28,
249
+ "XSTest-response": 0.603,
250
+ "JailBreakBench": 0.82,
251
+ "AUPRC": 0.567,
252
+ "Paper/Code/Contact Link": ""
253
+ },
254
+ {
255
+ "Rank": 0,
256
+ "Method": "LESS",
257
+ "Attribution Method Type": "Gradient",
258
+ "Model": "Llama-3.1-8B",
259
+ "Model Size": "8B",
260
+ "ToxicChat": 0.499,
261
+ "XSTest-response": 0.615,
262
+ "JailBreakBench": 0.767,
263
+ "AUPRC": 0.627,
264
+ "Paper/Code/Contact Link": ""
265
+ },
266
+ {
267
+ "Rank": 0,
268
+ "Method": "test",
269
+ "Attribution Method Type": "Gradient",
270
+ "Model": "pythia",
271
+ "Model Size": "1B",
272
+ "Paper/Code/Contact Link": "test",
273
+ "ToxicChat": 0.3,
274
+ "XSTest-response": 0.3,
275
+ "JailBreakBench": 0.3,
276
+ "AUPRC": 0.3
277
+ }
278
+ ]
data/DATE-LM-Original-Data/factual.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {"Rank": 0, "Method": "BM25", "Attribution Method Type": "Lexical", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.305, "MRR": 0.771, "Paper/Code/Contact Link": ""},
3
+ {"Rank": 0, "Method": "Rep Sim", "Attribution Method Type": "Similarity", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.376, "MRR": 0.790, "Paper/Code/Contact Link": ""},
4
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.466, "MRR": 0.768, "Paper/Code/Contact Link": ""},
5
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.493, "MRR": 0.836, "Paper/Code/Contact Link": ""},
6
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.500, "MRR": 0.772, "Paper/Code/Contact Link": ""},
7
+ {"Rank": 0, "Method": "DataInf", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.472, "MRR": 0.765, "Paper/Code/Contact Link": ""},
8
+ {"Rank": 0, "Method": "EKFAC", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "Recall@50": 0.465, "MRR": 0.766, "Paper/Code/Contact Link": ""},
9
+
10
+ {"Rank": 0, "Method": "BM25", "Attribution Method Type": "Lexical", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.236, "MRR": 0.683, "Paper/Code/Contact Link": ""},
11
+ {"Rank": 0, "Method": "Rep Sim", "Attribution Method Type": "Similarity", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.552, "MRR": 0.758, "Paper/Code/Contact Link": ""},
12
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.465, "MRR": 0.786, "Paper/Code/Contact Link": ""},
13
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.584, "MRR": 0.839, "Paper/Code/Contact Link": ""},
14
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.573, "MRR": 0.807, "Paper/Code/Contact Link": ""},
15
+ {"Rank": 0, "Method": "DataInf", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.475, "MRR": 0.785, "Paper/Code/Contact Link": ""},
16
+ {"Rank": 0, "Method": "EKFAC", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "Recall@50": 0.485, "MRR": 0.881, "Paper/Code/Contact Link": ""},
17
+
18
+ {"Rank": 0, "Method": "BM25", "Attribution Method Type": "Lexical", "Model": "Llama-3.1-8B", "Model Size": "8B", "Recall@50": 0.313, "MRR": 0.826, "Paper/Code/Contact Link": ""},
19
+ {"Rank": 0, "Method": "Rep Sim", "Attribution Method Type": "Similarity", "Model": "Llama-3.1-8B", "Model Size": "8B", "Recall@50": 0.625, "MRR": 0.965, "Paper/Code/Contact Link": ""},
20
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "Recall@50": 0.226, "MRR": 0.303, "Paper/Code/Contact Link": ""},
21
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "Recall@50": 0.626, "MRR": 0.970, "Paper/Code/Contact Link": ""},
22
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "Recall@50": 0.491, "MRR": 0.991, "Paper/Code/Contact Link": ""}
23
+ ]
data/DATE-LM-Original-Data/finetune.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [
2
+ {"Rank": 0, "Method": "Random", "Attribution Method Type": "Other", "Model": "Llama-3.1-8B", "Model Size": "8B", "MMLU": 0.602, "GSM8K": 0.596, "BBH": 0.653, "Paper/Code/Contact Link": ""},
3
+ {"Rank": 0, "Method": "BM25", "Attribution Method Type": "Lexical", "Model": "Llama-3.1-8B", "Model Size": "8B", "MMLU": 0, "GSM8K": 0, "BBH": 0, "Paper/Code/Contact Link": ""},
4
+ {"Rank": 0, "Method": "Rep Sim", "Attribution Method Type": "Similarity", "Model": "Llama-3.1-8B", "Model Size": "8B", "MMLU": 0, "GSM8K": 0, "BBH": 0, "Paper/Code/Contact Link": ""},
5
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "MMLU": 0, "GSM8K": 0, "BBH": 0, "Paper/Code/Contact Link": ""},
6
+ {"Rank": 0, "Method": "LESS (optimizer)", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "MMLU": 0, "GSM8K": 0, "BBH": 0, "Paper/Code/Contact Link": ""}
7
+ ]
data/DATE-LM-Original-Data/pythia1b-10k-lambada.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {"Rank": 0, "Method": "Random", "Attribution Method Type": "Other", "Model": "Pythia-1b", "Model Size": "1B", "avg": 45.34, "sciq": 0.689, "arc_easy": 0.431, "arc_challenge": 0.244, "logiqa": 0.275, "boolq": 0.520, "hellaswag": 0.407, "piqa": 0.690, "winogrande": 0.535, "openbookqa": 0.290, "Paper/Code/Contact Link": ""},
3
+ {"Rank": 0, "Method": "BM25", "Attribution Method Type": "Lexical", "Model": "Pythia-1b", "Model Size": "1B", "avg": 45.72, "sciq": 0.692, "arc_easy": 0.439, "arc_challenge": 0.239, "logiqa": 0.260, "boolq": 0.556, "hellaswag": 0.406, "piqa": 0.696, "winogrande": 0.531, "openbookqa": 0.296, "Paper/Code/Contact Link": ""},
4
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "avg": 45.98, "sciq": 0.689, "arc_easy": 0.440, "arc_challenge": 0.240, "logiqa": 0.272, "boolq": 0.556, "hellaswag": 0.406, "piqa": 0.690, "winogrande": 0.537, "openbookqa": 0.308, "Paper/Code/Contact Link": ""},
5
+ {"Rank": 0, "Method": "Rep Sim", "Attribution Method Type": "Similarity", "Model": "Pythia-1b", "Model Size": "1B", "avg": 46.00, "sciq": 0.691, "arc_easy": 0.441, "arc_challenge": 0.237, "logiqa": 0.275, "boolq": 0.561, "hellaswag": 0.409, "piqa": 0.695, "winogrande": 0.537, "openbookqa": 0.294, "Paper/Code/Contact Link": ""},
6
+ {"Rank": 0, "Method": "Mates", "Attribution Method Type": "Modeling", "Model": "Pythia-1b", "Model Size": "1B", "avg": 45.76, "sciq": 0.685, "arc_easy": 0.441, "arc_challenge": 0.241, "logiqa": 0.269, "boolq": 0.563, "hellaswag": 0.408, "piqa": 0.696, "winogrande": 0.523, "openbookqa": 0.292, "Paper/Code/Contact Link": ""},
7
+ {"Rank": 0, "Method": "Edu", "Attribution Method Type": "Other", "Model": "Pythia-1b", "Model Size": "1B", "avg": 45.83, "sciq": 0.688, "arc_easy": 0.452, "arc_challenge": 0.240, "logiqa": 0.264, "boolq": 0.571, "hellaswag": 0.409, "piqa": 0.689, "winogrande": 0.520, "openbookqa": 0.292, "Paper/Code/Contact Link": ""}
8
+ ]
data/DATE-LM-Original-Data/pythia1b-30k-lambada.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [
2
+ {"Rank": 0, "Method": "Random", "Attribution Method Type": "Other", "Model": "Pythia-1b", "Model Size": "1B", "avg": 47.49, "sciq": 0.695, "arc_easy": 0.456, "arc_challenge": 0.255, "logiqa": 0.276, "boolq": 0.602, "hellaswag": 0.475, "piqa": 0.711, "winogrande": 0.510, "openbookqa": 0.294, "Paper/Code/Contact Link": ""},
3
+ {"Rank": 0, "Method": "Rep Sim", "Attribution Method Type": "Similarity", "Model": "Pythia-1b", "Model Size": "1B", "avg": 47.83, "sciq": 0.697, "arc_easy": 0.462, "arc_challenge": 0.259, "logiqa": 0.263, "boolq": 0.612, "hellaswag": 0.474, "piqa": 0.712, "winogrande": 0.526, "openbookqa": 0.300, "Paper/Code/Contact Link": ""},
4
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "avg": 47.92, "sciq": 0.711, "arc_easy": 0.465, "arc_challenge": 0.254, "logiqa": 0.273, "boolq": 0.606, "hellaswag": 0.475, "piqa": 0.711, "winogrande": 0.522, "openbookqa": 0.296, "Paper/Code/Contact Link": ""},
5
+ {"Rank": 0, "Method": "Mates", "Attribution Method Type": "Modeling", "Model": "Pythia-1b", "Model Size": "1B", "avg": 47.64, "sciq": 0.702, "arc_easy": 0.464, "arc_challenge": 0.253, "logiqa": 0.260, "boolq": 0.617, "hellaswag": 0.474, "piqa": 0.708, "winogrande": 0.518, "openbookqa": 0.292, "Paper/Code/Contact Link": ""},
6
+ {"Rank": 0, "Method": "Edu", "Attribution Method Type": "Other", "Model": "Pythia-1b", "Model Size": "1B", "avg": 48.02, "sciq": 0.700, "arc_easy": 0.471, "arc_challenge": 0.262, "logiqa": 0.267, "boolq": 0.616, "hellaswag": 0.474, "piqa": 0.709, "winogrande": 0.511, "openbookqa": 0.312, "Paper/Code/Contact Link": ""}
7
+ ]
data/DATE-LM-Original-Data/toxicity-heterogeneous.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {"Rank": 0, "Method": "GradSafe", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.347, "XSTest-response": 0.491, "JailBreakBench": 0.802, "AUPRC": 0.546, "Paper/Code/Contact Link": ""},
3
+ {"Rank": 0, "Method": "OpenAI Moderation", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.214, "XSTest-response": 0.358, "JailBreakBench": 0.185, "AUPRC": 0.253, "Paper/Code/Contact Link": ""},
4
+ {"Rank": 0, "Method": "Llama-Guard-3-8B", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.423, "XSTest-response": 0.910, "JailBreakBench": 0.966, "AUPRC": 0.766, "Paper/Code/Contact Link": ""},
5
+ {"Rank": 0, "Method": "Wildguard", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.551, "XSTest-response": 0.928, "JailBreakBench": 0.972, "AUPRC": 0.817, "Paper/Code/Contact Link": ""},
6
+ {"Rank": 0, "Method": "ShieldGemma-2b", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.165, "XSTest-response": 0.731, "JailBreakBench": 0.552, "AUPRC": 0.483, "Paper/Code/Contact Link": ""},
7
+ {"Rank": 0, "Method": "AEGIS-Defensive", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.376, "XSTest-response": 0.274, "JailBreakBench": 0.294, "AUPRC": 0.314, "Paper/Code/Contact Link": ""},
8
+
9
+ {"Rank": 0, "Method": "Rep-Sim", "Attribution Method Type": "Similarity", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.335, "XSTest-response": 0.580, "JailBreakBench": 0.578, "AUPRC": 0.498, "Paper/Code/Contact Link": ""},
10
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.194, "XSTest-response": 0.389, "JailBreakBench": 0.396, "AUPRC": 0.326, "Paper/Code/Contact Link": ""},
11
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.362, "XSTest-response": 0.601, "JailBreakBench": 0.434, "AUPRC": 0.466, "Paper/Code/Contact Link": ""},
12
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.326, "XSTest-response": 0.734, "JailBreakBench": 0.484, "AUPRC": 0.515, "Paper/Code/Contact Link": ""},
13
+ {"Rank": 0, "Method": "DataInf", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.195, "XSTest-response": 0.392, "JailBreakBench": 0.396, "AUPRC": 0.328, "Paper/Code/Contact Link": ""},
14
+ {"Rank": 0, "Method": "EKFAC", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.221, "XSTest-response": 0.344, "JailBreakBench": 0.373, "AUPRC": 0.313, "Paper/Code/Contact Link": ""},
15
+
16
+ {"Rank": 0, "Method": "Rep-Sim", "Attribution Method Type": "Similarity", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.598, "XSTest-response": 0.733, "JailBreakBench": 0.461, "AUPRC": 0.597, "Paper/Code/Contact Link": ""},
17
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.195, "XSTest-response": 0.341, "JailBreakBench": 0.369, "AUPRC": 0.302, "Paper/Code/Contact Link": ""},
18
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.228, "XSTest-response": 0.772, "JailBreakBench": 0.531, "AUPRC": 0.510, "Paper/Code/Contact Link": ""},
19
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.230, "XSTest-response": 0.616, "JailBreakBench": 0.596, "AUPRC": 0.481, "Paper/Code/Contact Link": ""},
20
+ {"Rank": 0, "Method": "DataInf", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.196, "XSTest-response": 0.347, "JailBreakBench": 0.369, "AUPRC": 0.304, "Paper/Code/Contact Link": ""},
21
+ {"Rank": 0, "Method": "EKFAC", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.239, "XSTest-response": 0.398, "JailBreakBench": 0.369, "AUPRC": 0.334, "Paper/Code/Contact Link": ""},
22
+
23
+ {"Rank": 0, "Method": "Rep-Sim", "Attribution Method Type": "Similarity", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.602, "XSTest-response": 0.638, "JailBreakBench": 0.514, "AUPRC": 0.585, "Paper/Code/Contact Link": ""},
24
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.289, "XSTest-response": 0.328, "JailBreakBench": 0.085, "AUPRC": 0.234, "Paper/Code/Contact Link": ""},
25
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.223, "XSTest-response": 0.703, "JailBreakBench": 0.401, "AUPRC": 0.442, "Paper/Code/Contact Link": ""},
26
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.258, "XSTest-response": 0.744, "JailBreakBench": 0.114, "AUPRC": 0.372, "Paper/Code/Contact Link": ""}
27
+ ]
data/DATE-LM-Original-Data/toxicity-homogeneous.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {"Rank": 0, "Method": "GradSafe", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.347, "XSTest-response": 0.491, "JailBreakBench": 0.802, "AUPRC": 0.546, "Paper/Code/Contact Link": ""},
3
+ {"Rank": 0, "Method": "OpenAI Moderation", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.243, "XSTest-response": 0.378, "JailBreakBench": 0.187, "AUPRC": 0.269, "Paper/Code/Contact Link": ""},
4
+ {"Rank": 0, "Method": "Llama-Guard-3-8B", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.445, "XSTest-response": 0.916, "JailBreakBench": 0.985, "AUPRC": 0.782, "Paper/Code/Contact Link": ""},
5
+ {"Rank": 0, "Method": "Wildguard", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.560, "XSTest-response": 0.930, "JailBreakBench": 0.989, "AUPRC": 0.827, "Paper/Code/Contact Link": ""},
6
+ {"Rank": 0, "Method": "ShieldGemma-2b", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.170, "XSTest-response": 0.740, "JailBreakBench": 0.664, "AUPRC": 0.525, "Paper/Code/Contact Link": ""},
7
+ {"Rank": 0, "Method": "AEGIS-Defensive", "Attribution Method Type": "Baseline", "Model": "N/A", "Model Size": "N/A", "ToxicChat": 0.376, "XSTest-response": 0.274, "JailBreakBench": 0.346, "AUPRC": 0.332, "Paper/Code/Contact Link": ""},
8
+
9
+ {"Rank": 0, "Method": "Rep-Sim", "Attribution Method Type": "Similarity", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.374, "XSTest-response": 0.657, "JailBreakBench": 0.986, "AUPRC": 0.672, "Paper/Code/Contact Link": ""},
10
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.084, "XSTest-response": 0.483, "JailBreakBench": 0.999, "AUPRC": 0.522, "Paper/Code/Contact Link": ""},
11
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.106, "XSTest-response": 0.647, "JailBreakBench": 1.000, "AUPRC": 0.584, "Paper/Code/Contact Link": ""},
12
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.388, "XSTest-response": 0.724, "JailBreakBench": 1.000, "AUPRC": 0.704, "Paper/Code/Contact Link": ""},
13
+ {"Rank": 0, "Method": "DataInf", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.204, "XSTest-response": 0.487, "JailBreakBench": 0.999, "AUPRC": 0.563, "Paper/Code/Contact Link": ""},
14
+ {"Rank": 0, "Method": "EKFAC", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "ToxicChat": 0.216, "XSTest-response": 0.497, "JailBreakBench": 1.000, "AUPRC": 0.571, "Paper/Code/Contact Link": ""},
15
+
16
+ {"Rank": 0, "Method": "Rep-Sim", "Attribution Method Type": "Similarity", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.632, "XSTest-response": 0.792, "JailBreakBench": 0.854, "AUPRC": 0.759, "Paper/Code/Contact Link": ""},
17
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.212, "XSTest-response": 0.437, "JailBreakBench": 1.000, "AUPRC": 0.550, "Paper/Code/Contact Link": ""},
18
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.259, "XSTest-response": 0.798, "JailBreakBench": 1.000, "AUPRC": 0.686, "Paper/Code/Contact Link": ""},
19
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.294, "XSTest-response": 0.792, "JailBreakBench": 1.000, "AUPRC": 0.695, "Paper/Code/Contact Link": ""},
20
+ {"Rank": 0, "Method": "DataInf", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.215, "XSTest-response": 0.442, "JailBreakBench": 1.000, "AUPRC": 0.552, "Paper/Code/Contact Link": ""},
21
+ {"Rank": 0, "Method": "EKFAC", "Attribution Method Type": "Gradient", "Model": "Llama-3.2-1B", "Model Size": "1B", "ToxicChat": 0.264, "XSTest-response": 0.562, "JailBreakBench": 1.000, "AUPRC": 0.609, "Paper/Code/Contact Link": ""},
22
+
23
+ {"Rank": 0, "Method": "Rep-Sim", "Attribution Method Type": "Similarity", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.989, "XSTest-response": 0.999, "JailBreakBench": 0.980, "AUPRC": 0.989, "Paper/Code/Contact Link": ""},
24
+ {"Rank": 0, "Method": "Grad Dot", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.470, "XSTest-response": 0.368, "JailBreakBench": 0.274, "AUPRC": 0.371, "Paper/Code/Contact Link": ""},
25
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.280, "XSTest-response": 0.603, "JailBreakBench": 0.820, "AUPRC": 0.567, "Paper/Code/Contact Link": ""},
26
+ {"Rank": 0, "Method": "LESS", "Attribution Method Type": "Gradient", "Model": "Llama-3.1-8B", "Model Size": "8B", "ToxicChat": 0.499, "XSTest-response": 0.615, "JailBreakBench": 0.767, "AUPRC": 0.627, "Paper/Code/Contact Link": ""}
27
+ ]
data/DataSelection/finetune.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "Rank": 0,
4
+ "Method": "Random Avg",
5
+ "Attribution Method Type": "Other",
6
+ "Model": "Llama-3.1-8B",
7
+ "Model Size": "8B",
8
+ "MMLU": 60.2,
9
+ "GSM8K": 59.6,
10
+ "BBH": 65.6,
11
+ "Paper/Code/Contact Link": ""
12
+ },
13
+ {
14
+ "Rank": 0,
15
+ "Method": "BM25",
16
+ "Attribution Method Type": "Lexical",
17
+ "Model": "Llama-3.1-8B",
18
+ "Model Size": "8B",
19
+ "MMLU": 59.5,
20
+ "GSM8K": 60.2,
21
+ "BBH": 62.5,
22
+ "Paper/Code/Contact Link": ""
23
+ },
24
+ {
25
+ "Rank": 0,
26
+ "Method": "Rep Sim",
27
+ "Attribution Method Type": "Similarity",
28
+ "Model": "Llama-3.1-8B",
29
+ "Model Size": "8B",
30
+ "MMLU": 61.2,
31
+ "GSM8K": 59.2,
32
+ "BBH": 65.9,
33
+ "Paper/Code/Contact Link": ""
34
+ },
35
+ {
36
+ "Rank": 0,
37
+ "Method": "RDS+",
38
+ "Attribution Method Type": "Similarity",
39
+ "Model": "Llama-3.1-8B",
40
+ "Model Size": "8B",
41
+ "MMLU": 62.4,
42
+ "GSM8K": 59.6,
43
+ "BBH": 66.9,
44
+ "Paper/Code/Contact Link": ""
45
+ },
46
+ {
47
+ "Rank": 0,
48
+ "Method": "Grad Sim",
49
+ "Attribution Method Type": "Gradient",
50
+ "Model": "Llama-3.1-8B",
51
+ "Model Size": "8B",
52
+ "MMLU": 58.4,
53
+ "GSM8K": 57.8,
54
+ "BBH": 65.5,
55
+ "Paper/Code/Contact Link": ""
56
+ },
57
+ {
58
+ "Rank": 0,
59
+ "Method": "LESS (optimizer)",
60
+ "Attribution Method Type": "Gradient",
61
+ "Model": "Llama-3.1-8B",
62
+ "Model Size": "8B",
63
+ "MMLU": 60,
64
+ "GSM8K": 59.5,
65
+ "BBH": 64.2,
66
+ "Paper/Code/Contact Link": ""
67
+ }
68
+ ]
data/DataSelection/pythia1b-10k-lambada.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {"Rank": 0, "Method": "Random", "Attribution Method Type": "Other", "Model": "Pythia-1b", "Model Size": "1B", "avg": 45.34, "sciq": 0.689, "arc_easy": 0.431, "arc_challenge": 0.244, "logiqa": 0.275, "boolq": 0.520, "hellaswag": 0.407, "piqa": 0.690, "winogrande": 0.535, "openbookqa": 0.290, "Paper/Code/Contact Link": ""},
3
+ {"Rank": 0, "Method": "BM25", "Attribution Method Type": "Lexical", "Model": "Pythia-1b", "Model Size": "1B", "avg": 45.72, "sciq": 0.692, "arc_easy": 0.439, "arc_challenge": 0.239, "logiqa": 0.260, "boolq": 0.556, "hellaswag": 0.406, "piqa": 0.696, "winogrande": 0.531, "openbookqa": 0.296, "Paper/Code/Contact Link": ""},
4
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "avg": 45.98, "sciq": 0.689, "arc_easy": 0.440, "arc_challenge": 0.240, "logiqa": 0.272, "boolq": 0.556, "hellaswag": 0.406, "piqa": 0.690, "winogrande": 0.537, "openbookqa": 0.308, "Paper/Code/Contact Link": ""},
5
+ {"Rank": 0, "Method": "Rep Sim", "Attribution Method Type": "Similarity", "Model": "Pythia-1b", "Model Size": "1B", "avg": 46.00, "sciq": 0.691, "arc_easy": 0.441, "arc_challenge": 0.237, "logiqa": 0.275, "boolq": 0.561, "hellaswag": 0.409, "piqa": 0.695, "winogrande": 0.537, "openbookqa": 0.294, "Paper/Code/Contact Link": ""},
6
+ {"Rank": 0, "Method": "Mates", "Attribution Method Type": "Modeling", "Model": "Pythia-1b", "Model Size": "1B", "avg": 45.76, "sciq": 0.685, "arc_easy": 0.441, "arc_challenge": 0.241, "logiqa": 0.269, "boolq": 0.563, "hellaswag": 0.408, "piqa": 0.696, "winogrande": 0.523, "openbookqa": 0.292, "Paper/Code/Contact Link": ""},
7
+ {"Rank": 0, "Method": "Edu", "Attribution Method Type": "Other", "Model": "Pythia-1b", "Model Size": "1B", "avg": 45.83, "sciq": 0.688, "arc_easy": 0.452, "arc_challenge": 0.240, "logiqa": 0.264, "boolq": 0.571, "hellaswag": 0.409, "piqa": 0.689, "winogrande": 0.520, "openbookqa": 0.292, "Paper/Code/Contact Link": ""}
8
+ ]
data/DataSelection/pythia1b-30k-lambada.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [
2
+ {"Rank": 0, "Method": "Random", "Attribution Method Type": "Other", "Model": "Pythia-1b", "Model Size": "1B", "avg": 47.49, "sciq": 0.695, "arc_easy": 0.456, "arc_challenge": 0.255, "logiqa": 0.276, "boolq": 0.602, "hellaswag": 0.475, "piqa": 0.711, "winogrande": 0.510, "openbookqa": 0.294, "Paper/Code/Contact Link": ""},
3
+ {"Rank": 0, "Method": "Rep Sim", "Attribution Method Type": "Similarity", "Model": "Pythia-1b", "Model Size": "1B", "avg": 47.83, "sciq": 0.697, "arc_easy": 0.462, "arc_challenge": 0.259, "logiqa": 0.263, "boolq": 0.612, "hellaswag": 0.474, "piqa": 0.712, "winogrande": 0.526, "openbookqa": 0.300, "Paper/Code/Contact Link": ""},
4
+ {"Rank": 0, "Method": "Grad Sim", "Attribution Method Type": "Gradient", "Model": "Pythia-1b", "Model Size": "1B", "avg": 47.92, "sciq": 0.711, "arc_easy": 0.465, "arc_challenge": 0.254, "logiqa": 0.273, "boolq": 0.606, "hellaswag": 0.475, "piqa": 0.711, "winogrande": 0.522, "openbookqa": 0.296, "Paper/Code/Contact Link": ""},
5
+ {"Rank": 0, "Method": "Mates", "Attribution Method Type": "Modeling", "Model": "Pythia-1b", "Model Size": "1B", "avg": 47.64, "sciq": 0.702, "arc_easy": 0.464, "arc_challenge": 0.253, "logiqa": 0.260, "boolq": 0.617, "hellaswag": 0.474, "piqa": 0.708, "winogrande": 0.518, "openbookqa": 0.292, "Paper/Code/Contact Link": ""},
6
+ {"Rank": 0, "Method": "Edu", "Attribution Method Type": "Other", "Model": "Pythia-1b", "Model Size": "1B", "avg": 48.02, "sciq": 0.700, "arc_easy": 0.471, "arc_challenge": 0.262, "logiqa": 0.267, "boolq": 0.616, "hellaswag": 0.474, "piqa": 0.709, "winogrande": 0.511, "openbookqa": 0.312, "Paper/Code/Contact Link": ""}
7
+ ]
filePaths.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ PRETRAIN_10K = "./data/DataSelection/pythia1b-10k-lambada.json"
2
+ PRETRAIN_30K = "./data/DataSelection/pythia1b-30k-lambada.json"
3
+ FINETUNE = "./data/DataSelection/finetune.json"
4
+
5
+ TOXICITY_HOMOGENEOUS = "./data/Applications/toxicity-homogeneous.json"
6
+ TOXICITY_HETEROGENEOUS = "./data/Applications/toxicity-heterogeneous.json"
7
+ FACTUAL = "./data/Applications/factual.json"
8
+
9
+ PATHLIST = [PRETRAIN_10K, PRETRAIN_30K, FINETUNE, TOXICITY_HOMOGENEOUS, TOXICITY_HETEROGENEOUS, FACTUAL]
10
+
nameMapping.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Sub-Tab Display
2
+ LEADERBOARD_NAMES = ["Pre-Training (10K)",
3
+ "Pre-Training (30K)",
4
+ "Fine-Tuning",
5
+ "Homogeneous", "Heterogeneous",
6
+ "Factual Attribution"]
7
+
8
+ TRAINING_LEADERBOARDS = {"Pre-Training (10K)", "Pre-Training (30K)", "Fine-Tuning"}
9
+
10
+ # Submission Drop-Down Display
11
+ DROPDOWN_NAME_MAPPING = {"toxicity": {"Homogeneous", "Heterogeneous"},
12
+ "factual": {"Factual Attribution"},
13
+ "finetune": {"Fine-Tuning"},
14
+ "pretrain": {"Pre-Training (10K)", "Pre-Training (30K)"}}
15
+
16
+ # Leaderboard Columns
17
+ TOXICITY_COLS = ["Rank", "Method", "Attribution Method Type", "Model", "Model Size", "ToxicChat", "XSTest-response", "JailBreakBench", "AUPRC", "Paper/Code/Contact Link"]
18
+ FACTUAL_COLS = ["Rank", "Method", "Attribution Method Type", "Model", "Model Size", "Recall@50", "MRR", "Paper/Code/Contact Link"]
19
+ FINETUNE_COLS = ["Rank", "Method", "Attribution Method Type", "Model", "Model Size", "MMLU", "GSM8K", "BBH", "Paper/Code/Contact Link"]
20
+ PRETRAIN_COLS = ["Rank", "Method", "Attribution Method Type", "Model", "Model Size", "avg", "sciq", "arc_easy", "arc_challenge", "logiqa", "boolq", "hellaswag", "piqa", "winogrande", "openbookqa", "Paper/Code/Contact Link"]
pr.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from github import Github
2
+ from datetime import datetime, timezone
3
+ import os
4
+ import json
5
+ import nameMapping
6
+
7
+ ###################### Push Up to Github #################################
8
+
9
+ REPO_NAME = "DataAttributionEval/DATE-LM-Leaderboard"
10
+ GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
11
+ reviewer = "cathyjiao"
12
+
13
+ def submit_and_open_PR(selected_leaderboard, *new_entry):
14
+ # Unpack data
15
+ (method_name, method_dropdown, model_name, model_size, paper_link, scores,
16
+ pre_avg, pre_sciq, pre_arc_easy, pre_arc_chall, pre_logiqa,
17
+ pre_boolq, pre_hellaswag, pre_piqa, pre_wino, pre_open,
18
+ fine_mmlu, fine_gsm, fine_bbh,
19
+ tox_toxicChat, tox_xsTest, tox_jbb, tox_auprc,
20
+ fac_recall, fac_mrr) = new_entry
21
+
22
+ # Save metadata
23
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
24
+ branch_name = f"{timestamp}-{''.join(method_name.split())}-{method_name}"
25
+
26
+ submission_data = {
27
+ "Metadata Path": f"submissions/{branch_name}/metadata.json",
28
+ "Leaderboard": selected_leaderboard,
29
+ "Date of Submission": timestamp,
30
+ "Method": method_name,
31
+ "Attribution Method Type": method_dropdown,
32
+ "Model": model_name,
33
+ "Model Size": model_size,
34
+ "Paper/Code/Contact Link": paper_link
35
+ }
36
+
37
+ nameMap = nameMapping.DROPDOWN_NAME_MAPPING
38
+ if selected_leaderboard in nameMap['pretrain']:
39
+ fields = ["avg", "sciq", "arc_easy", "arc_challenge", "logiqa", \
40
+ "boolq", "hellaswag", "piqa", "winogrande", "openbookqa"]
41
+ vals = [pre_avg, pre_sciq, pre_arc_easy, pre_arc_chall, pre_logiqa, \
42
+ pre_boolq, pre_hellaswag, pre_piqa, pre_wino, pre_open]
43
+ submission_data.update(dict(zip(fields, vals)))
44
+ elif selected_leaderboard in nameMap['finetune']:
45
+ submission_data["MMLU"] = fine_mmlu
46
+ submission_data["GSM8K"] = fine_gsm
47
+ submission_data["BBH"] = fine_bbh
48
+ elif selected_leaderboard in nameMap['toxicity']:
49
+ submission_data["ToxicChat"] = tox_toxicChat
50
+ submission_data["XSTest-response"] = tox_xsTest
51
+ submission_data["JailBreakBench"] = tox_jbb
52
+ submission_data["AUPRC"] = tox_auprc
53
+ elif selected_leaderboard in nameMap['factual']:
54
+ submission_data["Recall@50"] = fac_recall
55
+ submission_data["MRR"] = fac_mrr
56
+
57
+ g = Github(GITHUB_TOKEN)
58
+ repo = g.get_repo(REPO_NAME)
59
+
60
+ # Create a unique branch name
61
+ base = repo.get_branch("main")
62
+ repo.create_git_ref(ref=f"refs/heads/{branch_name}", sha=base.commit.sha)
63
+
64
+ # Upload score files
65
+ with open(scores.name, "rb") as f:
66
+ repo.create_file(
67
+ path=f"submissions/{branch_name}/{os.path.basename(scores.name)}",
68
+ message=f"Uploaded Scores File",
69
+ content=f.read(),
70
+ branch=branch_name
71
+ )
72
+
73
+ # Add PR metadata.json file
74
+ repo.create_file(
75
+ path=f"submissions/{branch_name}/metadata.json",
76
+ message="Submission Form Metadata",
77
+ content=json.dumps(submission_data, indent=2),
78
+ branch=branch_name
79
+ )
80
+
81
+ # Create pull request
82
+ pr = repo.create_pull(
83
+ title=f"[HF Leaderboard Submission] {method_name} for {selected_leaderboard}",
84
+ body=f"Auto-Generated Leaderboard Submission PR from HF Space\n{json.dumps(submission_data, indent=4)}",
85
+ head=branch_name,
86
+ base="main"
87
+ )
88
+ pr.add_to_labels("leaderboard-submission")
89
+ pr.create_review_request(reviewers=[reviewer])
90
+
91
+ return f"✅ PR created: {pr.html_url}"
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ gradio_leaderboard
3
+ pandas
4
+ PyGithub
scripts/merge_data.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+ import sys
4
+ import os
5
+
6
+ def create_data_entry(submitted_json):
7
+ entry = {
8
+ "Rank": 0,
9
+ }
10
+
11
+ submitted_dict = dict(submitted_json)
12
+ del submitted_dict["Metadata Path"]
13
+ del submitted_dict["Leaderboard"]
14
+ del submitted_dict["Date of Submission"]
15
+
16
+ entry.update(submitted_dict)
17
+
18
+ return submitted_json.get("Leaderboard"), entry
19
+
20
+ def merge_metadata(metadata_path):
21
+ file_map = {"Pre-Training (10K)": ['data', 'DataSelection', 'pythia1b-10k-lambada.json'],
22
+ "Pre-Training (30K)": ['data', 'DataSelection', 'pythia1b-30k-lambada.json'],
23
+ "Fine-Tuning": ['data', 'DataSelection', 'finetune.json'],
24
+ "Homogeneous": ['data', 'Applications', 'toxicity-homogeneous.json'],
25
+ "Heterogeneous": ['data', 'Applications', 'toxicity-heterogeneous.json'],
26
+ "Factual Attribution": ['data', 'Applications', 'factual.json']
27
+ }
28
+
29
+ with open(metadata_path, "r") as f:
30
+ metadata_json = json.load(f)
31
+ leaderboard, entry = create_data_entry(metadata_json)
32
+ target_file = file_map.get(leaderboard)
33
+ repo_root = os.environ.get('GITHUB_WORKSPACE', os.getcwd())
34
+
35
+ target_path = os.path.join(repo_root, *target_file)
36
+
37
+ if target_path is not None:
38
+ with open(target_path, "r") as f:
39
+ existing = json.load(f)
40
+ else:
41
+ # existing data should not be empty
42
+ raise ValueError("Could not retreive file content.")
43
+
44
+ existing.append(entry)
45
+
46
+ # Save updated list
47
+ with open(target_path, "w") as f:
48
+ json.dump(existing, f, indent=2)
49
+
50
+ if __name__ == "__main__":
51
+ file_path = sys.argv[1]
52
+ merge_metadata(file_path)