Spaces:
Sleeping
Sleeping
bkb2135
commited on
Commit
·
12461ea
1
Parent(s):
0c772d3
Update utils using prompting-api
Browse files
utils.py
CHANGED
|
@@ -26,8 +26,6 @@ USERNAME = 'login19861986'
|
|
| 26 |
|
| 27 |
# Initialize wandb with anonymous login
|
| 28 |
wandb.login(anonymous='must')
|
| 29 |
-
|
| 30 |
-
# Your existing code
|
| 31 |
api = wandb.Api(timeout=600)
|
| 32 |
|
| 33 |
IDENTITIES = {
|
|
@@ -138,16 +136,12 @@ def load_downloaded_runs(time, cols=KEYS):
|
|
| 138 |
'date-based question answering': 'date_qa',
|
| 139 |
'question-answering': 'qa',
|
| 140 |
}
|
|
|
|
| 141 |
|
| 142 |
-
#
|
| 143 |
-
df_all
|
| 144 |
|
| 145 |
df_all.sort_values(by=['_timestamp'], inplace=True)
|
| 146 |
-
# Check if df_all has the task columns
|
| 147 |
-
if 'task' in df_all.columns:
|
| 148 |
-
df_all.task = df_all.task.apply(lambda x: task_mapping.get(x, x))
|
| 149 |
-
else:
|
| 150 |
-
df_all['task'] = "Task was not found"
|
| 151 |
|
| 152 |
return df_all
|
| 153 |
|
|
@@ -229,13 +223,13 @@ def download_runs(time, df_vali):
|
|
| 229 |
save_path = f'data/wandb/{row.run_id}.parquet'
|
| 230 |
# Create the directory if it does not exist
|
| 231 |
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
| 232 |
-
|
| 233 |
if os.path.exists(save_path):
|
| 234 |
pbar.set_description(f'>> Skipping {row.run_id!r} because file {save_path!r} already exists')
|
| 235 |
continue
|
| 236 |
|
| 237 |
try:
|
| 238 |
-
pbar.set_description(f'* Downloading run {row.run_id!r}')
|
| 239 |
run = api.run(row.run_path)
|
| 240 |
|
| 241 |
# By default we just download a subset of events (500 most recent)
|
|
@@ -418,4 +412,4 @@ def load_state_vars(username=USERNAME, percentile=0.95):
|
|
| 418 |
|
| 419 |
if __name__ == '__main__':
|
| 420 |
|
| 421 |
-
pass
|
|
|
|
| 26 |
|
| 27 |
# Initialize wandb with anonymous login
|
| 28 |
wandb.login(anonymous='must')
|
|
|
|
|
|
|
| 29 |
api = wandb.Api(timeout=600)
|
| 30 |
|
| 31 |
IDENTITIES = {
|
|
|
|
| 136 |
'date-based question answering': 'date_qa',
|
| 137 |
'question-answering': 'qa',
|
| 138 |
}
|
| 139 |
+
df_all.task = df_all.task.apply(lambda x: task_mapping.get(x, x))
|
| 140 |
|
| 141 |
+
# Runs which do not have a turn field are imputed to be turn zero (single turn)
|
| 142 |
+
df_all.turn.fillna(0, inplace=True)
|
| 143 |
|
| 144 |
df_all.sort_values(by=['_timestamp'], inplace=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
return df_all
|
| 147 |
|
|
|
|
| 223 |
save_path = f'data/wandb/{row.run_id}.parquet'
|
| 224 |
# Create the directory if it does not exist
|
| 225 |
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
| 226 |
+
|
| 227 |
if os.path.exists(save_path):
|
| 228 |
pbar.set_description(f'>> Skipping {row.run_id!r} because file {save_path!r} already exists')
|
| 229 |
continue
|
| 230 |
|
| 231 |
try:
|
| 232 |
+
pbar.set_description(f'* Downloading run {row.run_id!r}', flush=True)
|
| 233 |
run = api.run(row.run_path)
|
| 234 |
|
| 235 |
# By default we just download a subset of events (500 most recent)
|
|
|
|
| 412 |
|
| 413 |
if __name__ == '__main__':
|
| 414 |
|
| 415 |
+
pass
|