IvoHoese commited on
Commit
4bdf9a2
·
verified ·
1 Parent(s): 7361817

Upload task_template.py

Browse files
Files changed (1) hide show
  1. task_template.py +314 -0
task_template.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import pandas as pd
3
+ import requests
4
+ import sys
5
+ import torchvision.models as models
6
+ import os
7
+
8
+ from transformers import AutoTokenizer, PreTrainedModel, AutoModelForSequenceClassification
9
+
10
+ import utils
11
+
12
+ # --------------------------------
13
+ # DATASET
14
+ # --------------------------------
15
+
16
+ """
17
+ Dataset contents:
18
+
19
+ - 1000 subsets of text data, each subset stored under the key "subset_{i}" where i ranges from 0 to 999.
20
+ Each subset is a dictionary with:
21
+ -"prompts": List of 100 prompts in the subset
22
+ -"labels": Tensor of true labels for the prompts in the subset, has shape (100)
23
+ -"subset_id": Integer ID of the subset (from 0 to 999)
24
+ """
25
+
26
+ # Load the dataset
27
+ dataset = torch.load("datasets/fulltuning.pt")
28
+
29
+ # Example: Acessing subsets
30
+ subset_0 = dataset["subset_0"]
31
+
32
+ print("Subset 0 keys:", subset_0.keys())
33
+ print("Subset ID:", subset_0["subset_id"])
34
+ print("Labels length:", len(subset_0["labels"]))
35
+ print("First prompts:", subset_0["prompts"][:5])
36
+ print("First 5 labels:", subset_0["labels"][:5])
37
+
38
+ # --------------------------------
39
+ # QUERYING THE CLASSIFIER
40
+ # --------------------------------
41
+
42
+ # This Code can be used to load and query the fully fine-tuned models. You also need to the available utils.py file.
43
+
44
+ #|---------------------------------------------------------------------------------------------------|
45
+ #| NOTE: "Missing or unexpected params" warnings are no reason for concern. They stem from the |
46
+ #| fact that the model is first loaded without a classifier head, which is added afterwards. |
47
+ #|---------------------------------------------------------------------------------------------------|
48
+
49
+ # Use this tokenizer for OLMO...
50
+
51
+ tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-1B-hf", trust_remote_code=True)
52
+
53
+ # ...and this one for Pythia
54
+
55
+ tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-410m", trust_remote_code=True)
56
+
57
+
58
+ tokenizer.padding_side = "left"
59
+
60
+ if tokenizer.pad_token is None:
61
+ tokenizer.pad_token = tokenizer.eos_token
62
+
63
+
64
+ # Usage example (fulltuning):
65
+
66
+ model_path = "models/olmo-fulltuning"
67
+
68
+ model = utils.get_fulltuning_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia"
69
+
70
+ example_prompt = "I think, therefore I am.\n\nI am."
71
+
72
+ inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True)
73
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
74
+ with torch.no_grad():
75
+ outputs = model(**inputs)
76
+
77
+ logits = outputs.logits
78
+
79
+ print(f"Logits shape: {logits.shape}")
80
+ print(f"Logits: {logits}")
81
+
82
+
83
+
84
+ # Usage example (softprompt):
85
+
86
+ model_path = "models/olmo-softprompt"
87
+
88
+ model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia"
89
+
90
+ example_prompt = "I think, but do I exist?\n\nSince you think, you exist."
91
+
92
+ inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True)
93
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
94
+ with torch.no_grad():
95
+ outputs = model(**inputs)
96
+
97
+ logits = outputs.logits
98
+
99
+ print(f"Logits shape: {logits.shape}")
100
+ print(f"Logits: {logits}")
101
+
102
+
103
+
104
+ # Usage example (lora):
105
+
106
+ model_path = "models/olmo-lora"
107
+
108
+ model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia"
109
+
110
+ example_prompt = "Who am I?\n\nWhat am I?"
111
+
112
+ inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True)
113
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
114
+ with torch.no_grad():
115
+ outputs = model(**inputs)
116
+
117
+ logits = outputs.logits
118
+
119
+ print(f"Logits shape: {logits.shape}")
120
+ print(f"Logits: {logits}")
121
+
122
+
123
+
124
+ # Usage example (lastlayer):
125
+
126
+ model_path = "models/olmo-lastlayer"
127
+
128
+ model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia"
129
+
130
+ example_prompt = "I love to exist!"
131
+
132
+ inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True)
133
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
134
+ with torch.no_grad():
135
+ outputs = model(**inputs)
136
+
137
+ logits = outputs.logits
138
+
139
+ print(f"Logits shape: {logits.shape}")
140
+ print(f"Logits: {logits}")
141
+
142
+
143
+
144
+ # Usage example (prefix):
145
+
146
+ model_path = "models/olmo-prefix"
147
+
148
+ model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia"
149
+
150
+ example_prompt = "I will exist yesterday."
151
+
152
+ inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True)
153
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
154
+ with torch.no_grad():
155
+ outputs = utils.forward_peft_seqcls(model, **inputs)
156
+
157
+ logits = outputs.logits
158
+
159
+ print(f"Logits shape: {logits.shape}")
160
+ print(f"Logits: {logits}")
161
+
162
+ # --------------------------------
163
+ # SUBMISSION FORMAT
164
+ # --------------------------------
165
+
166
+ """
167
+ The submission must be a .csv file with the following format:
168
+
169
+ -"type": Name of the model (e.g., "softprompt", "fulltuning", etc.)
170
+ -"subset_id": ID of the subset (from 0 to 999, per type)
171
+ -"membership": Membership score for each subset (float)
172
+ """
173
+
174
+ # Example Submission:
175
+
176
+ types = ["softprompt", "fulltuning", "lora", "lastlayer", "prefix"]
177
+ type_list = []
178
+
179
+ for t in types:
180
+ type_list.extend([t] * 1000)
181
+
182
+ subset_ids = []
183
+ for _ in types:
184
+ subset_ids.extend(list(range(1000)))
185
+
186
+ membership_scores = torch.rand(5000).tolist()
187
+ submission_df = pd.DataFrame({
188
+ "type": type_list,
189
+ "subset_id": subset_ids,
190
+ "membership": membership_scores
191
+ })
192
+ submission_df.to_csv("example_submission.csv", index=None)
193
+
194
+ # --------------------------------
195
+ # SUBMISSION PROCESS
196
+ # --------------------------------
197
+
198
+ """
199
+ Example submission script for the LLM Dataset Membership Inference Task.
200
+
201
+ Submission Requirements (read carefully to avoid automatic rejection):
202
+
203
+ 1. CSV FORMAT
204
+ ----------------
205
+ - The file **must be a CSV** with extension `.csv`.
206
+ - It must contain **exactly three columns**, named:
207
+ type, subset_id, membership
208
+ → Column names must match exactly (lowercase, no extra spaces).
209
+ → Column order does not matter, but all three must be present.
210
+
211
+ 2. ROW COUNT AND IDENTIFIERS
212
+ -------------------------------
213
+ - Your file must contain **exactly 5000 rows**.
214
+ - Each row corresponds to one unique `subset_id`/`type` pair, with ids in the range **0–999** (inclusive).
215
+ - Every subset_id must appear **exactly once** for each type.
216
+ - Do **not** add, remove, or rename any IDs.
217
+ - Do **not** include duplicates or missing entries.
218
+ - The evaluator checks:
219
+ subset_id.min() == 0
220
+ subset_id.max() == 999
221
+ subset_id.unique().size == 1000
222
+
223
+ 3. MEMBERSHIP SCORES
224
+ ----------------------
225
+ - The `membership` column must contain **numeric values** representing your model’s predicted confidence
226
+ that the corresponding subset is a **member** of the training set.
227
+
228
+ Examples of valid membership values:
229
+ - Probabilities: values in [0.0, 1.0]
230
+ - Raw model scores: any finite numeric values (will be ranked for TPR@FPR=0.05)
231
+
232
+ - Do **not** submit string labels like "yes"/"no" or "member"/"non-member".
233
+ - The evaluator converts your `membership` column to numeric using `pd.to_numeric()`.
234
+ → Any non-numeric, NaN, or infinite entries will cause automatic rejection.
235
+
236
+ 4. TECHNICAL LIMITS
237
+ ----------------------
238
+ - Maximum file size: **20 MB**
239
+ - Encoding: UTF-8 recommended.
240
+ - Avoid extra columns, blank lines, or formulas.
241
+ - Ensure all values are numeric and finite.
242
+ - Supported data types: int, float (e.g., float32, float64)
243
+
244
+ 5. VALIDATION SUMMARY
245
+ ------------------------
246
+ Your submission will fail if:
247
+ - Columns don’t match exactly ("type", "subset_id", "membership")
248
+ - Row count differs from 5000
249
+ - Any type name is unexpected or not in the allowed set
250
+ - Any subset_id is missing, duplicated, or outside [0, 999] for any type
251
+ - Any membership value is NaN, Inf, or non-numeric
252
+ - File is too large or not a valid CSV
253
+
254
+ Two key metrics are computed:
255
+ 1. **ROC-AUC (Area Under the ROC Curve)** — measures overall discriminative ability.
256
+ 2. **TPR@FPR=0.05** — true positive rate when the false positive rate is at 5%.
257
+
258
+ """
259
+
260
+ BASE_URL = "http://35.192.205.84:80"
261
+ API_KEY = "YOUR_API_KEY_HERE" # replace with your actual API key
262
+
263
+ TASK_ID = "14-llm-dataset-inference"
264
+ FILE_PATH = "Your-Submission-File.csv" # replace with your actual file path
265
+
266
+ SUBMIT = False # Set to True to enable submission
267
+
268
+ def die(msg):
269
+ print(f"{msg}", file=sys.stderr)
270
+ sys.exit(1)
271
+
272
+ if SUBMIT:
273
+ if not os.path.isfile(FILE_PATH):
274
+ die(f"File not found: {FILE_PATH}")
275
+
276
+ try:
277
+ with open(FILE_PATH, "rb") as f:
278
+ files = {
279
+ # (fieldname) -> (filename, fileobj, content_type)
280
+ "file": (os.path.basename(FILE_PATH), f, "csv"),
281
+ }
282
+ resp = requests.post(
283
+ f"{BASE_URL}/submit/{TASK_ID}",
284
+ headers={"X-API-Key": API_KEY},
285
+ files=files,
286
+ timeout=(10, 120), # (connect timeout, read timeout)
287
+ )
288
+ # Helpful output even on non-2xx
289
+ try:
290
+ body = resp.json()
291
+ except Exception:
292
+ body = {"raw_text": resp.text}
293
+
294
+ if resp.status_code == 413:
295
+ die("Upload rejected: file too large (HTTP 413). Reduce size and try again.")
296
+
297
+ resp.raise_for_status()
298
+
299
+ submission_id = body.get("submission_id")
300
+ print("Successfully submitted.")
301
+ print("Server response:", body)
302
+ if submission_id:
303
+ print(f"Submission ID: {submission_id}")
304
+
305
+ except requests.exceptions.RequestException as e:
306
+ detail = getattr(e, "response", None)
307
+ print(f"Submission error: {e}")
308
+ if detail is not None:
309
+ try:
310
+ print("Server response:", detail.json())
311
+ except Exception:
312
+ print("Server response (text):", detail.text)
313
+ sys.exit(1)
314
+