Abhishek Thakur
commited on
Commit
·
cb7ab1b
1
Parent(s):
6a5164b
competition creator
Browse files- competitions/__init__.py +4 -1
- competitions/create.py +173 -62
competitions/__init__.py
CHANGED
|
@@ -12,4 +12,7 @@ AUTOTRAIN_TOKEN = os.getenv("AUTOTRAIN_TOKEN")
|
|
| 12 |
AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API", "https://api.autotrain.huggingface.co")
|
| 13 |
BOT_TOKEN = os.getenv("BOT_TOKEN")
|
| 14 |
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API", "https://api.autotrain.huggingface.co")
|
| 13 |
BOT_TOKEN = os.getenv("BOT_TOKEN")
|
| 14 |
|
| 15 |
+
if COMPETITION_ID is not None:
|
| 16 |
+
competition_info = CompetitionInfo(competition_id=COMPETITION_ID, autotrain_token=AUTOTRAIN_TOKEN)
|
| 17 |
+
else:
|
| 18 |
+
competition_info = None
|
competitions/create.py
CHANGED
|
@@ -10,20 +10,44 @@ from . import BOT_TOKEN
|
|
| 10 |
from .utils import user_authentication
|
| 11 |
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def create_competition(
|
|
@@ -38,20 +62,56 @@ def create_competition(
|
|
| 38 |
sample_submission_file,
|
| 39 |
solution_file,
|
| 40 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
# generate a random id
|
| 42 |
suffix = str(uuid.uuid4())
|
| 43 |
private_dataset_name = f"{who_pays}/{competition_name}{suffix}"
|
| 44 |
public_dataset_name = f"{who_pays}/{competition_name}"
|
| 45 |
space_name = f"competitions/{competition_name}"
|
| 46 |
|
| 47 |
-
sample_submission_df = pd.read_csv(sample_submission_file.name
|
| 48 |
submission_columns = ",".join(sample_submission_df.columns)
|
| 49 |
|
| 50 |
conf = {
|
|
|
|
| 51 |
"SUBMISSION_LIMIT": submission_limit,
|
| 52 |
"SELECTION_LIMIT": selection_limit,
|
| 53 |
"END_DATE": end_date,
|
| 54 |
-
"EVAL_HIGHER_IS_BETTER":
|
| 55 |
"COMPETITION_NAME": competition_name,
|
| 56 |
"SUBMISSION_ID_COLUMN": "id",
|
| 57 |
"SUBMISSION_COLUMNS": submission_columns,
|
|
@@ -62,13 +122,24 @@ def create_competition(
|
|
| 62 |
api = HfApi()
|
| 63 |
|
| 64 |
# create private dataset repo
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
competition_desc = f"""
|
| 73 |
# Welcome to {competition_name}
|
| 74 |
|
|
@@ -123,39 +194,48 @@ def create_competition(
|
|
| 123 |
token=user_token,
|
| 124 |
)
|
| 125 |
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
path_or_fileobj=solution_bytes_data,
|
| 133 |
-
path_in_repo="solution.csv",
|
| 134 |
-
repo_id=private_dataset_name,
|
| 135 |
repo_type="dataset",
|
|
|
|
| 136 |
token=user_token,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
)
|
| 138 |
|
| 139 |
-
#
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
repo_id=public_dataset_name,
|
| 142 |
repo_type="dataset",
|
| 143 |
-
private=False,
|
| 144 |
token=user_token,
|
| 145 |
-
exist_ok=False,
|
| 146 |
)
|
| 147 |
-
if sample_submission_file is not None:
|
| 148 |
-
# upload sample submission file
|
| 149 |
-
with open(sample_submission_file.name, "rb") as f:
|
| 150 |
-
sample_submission_bytes_data = f.read()
|
| 151 |
-
|
| 152 |
-
api.upload_file(
|
| 153 |
-
path_or_fileobj=sample_submission_bytes_data,
|
| 154 |
-
path_in_repo="sample_submission.csv",
|
| 155 |
-
repo_id=public_dataset_name,
|
| 156 |
-
repo_type="dataset",
|
| 157 |
-
token=user_token,
|
| 158 |
-
)
|
| 159 |
|
| 160 |
dockerfile = """
|
| 161 |
FROM huggingface/competitions:latest
|
|
@@ -198,7 +278,6 @@ def create_competition(
|
|
| 198 |
"""
|
| 199 |
space_readme = space_readme.strip()
|
| 200 |
space_readme = space_readme.replace(" ", "")
|
| 201 |
-
print(repr(space_readme))
|
| 202 |
|
| 203 |
# upload space readme
|
| 204 |
space_readme_bytes = space_readme.encode("utf-8")
|
|
@@ -231,6 +310,19 @@ def create_competition(
|
|
| 231 |
token=BOT_TOKEN,
|
| 232 |
)
|
| 233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
def check_if_user_can_create_competition(user_token):
|
| 236 |
"""
|
|
@@ -239,7 +331,6 @@ def check_if_user_can_create_competition(user_token):
|
|
| 239 |
:return: True if the user can create a competition, False otherwise
|
| 240 |
"""
|
| 241 |
user_info = user_authentication(user_token)
|
| 242 |
-
print(user_info)
|
| 243 |
return_msg = None
|
| 244 |
if "error" in user_info:
|
| 245 |
return_msg = "Invalid token. You can find your HF token here: https://huggingface.co/settings/tokens"
|
|
@@ -247,9 +338,6 @@ def check_if_user_can_create_competition(user_token):
|
|
| 247 |
elif user_info["auth"]["accessToken"]["role"] != "write":
|
| 248 |
return_msg = "Please provide a token with write access"
|
| 249 |
|
| 250 |
-
elif user_info["canPay"] is False:
|
| 251 |
-
return_msg = "Please add a valid payment method in order to create and manage a competition"
|
| 252 |
-
|
| 253 |
if return_msg is not None:
|
| 254 |
return [
|
| 255 |
gr.Box.update(visible=False),
|
|
@@ -257,23 +345,45 @@ def check_if_user_can_create_competition(user_token):
|
|
| 257 |
gr.Dropdown.update(visible=False),
|
| 258 |
]
|
| 259 |
|
| 260 |
-
username = user_info["name"]
|
| 261 |
-
user_id = user_info["id"]
|
| 262 |
-
|
| 263 |
orgs = user_info["orgs"]
|
| 264 |
valid_orgs = [org for org in orgs if org["canPay"] is True]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
valid_orgs = [org for org in valid_orgs if org["roleInOrg"] in ("admin", "write")]
|
| 266 |
|
| 267 |
-
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
-
|
| 271 |
-
valid_entities = {v: k for k, v in valid_entities.items()}
|
| 272 |
|
| 273 |
return [
|
| 274 |
gr.Box.update(visible=True),
|
| 275 |
gr.Markdown.update(value="", visible=False),
|
| 276 |
-
gr.Dropdown.update(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
]
|
| 278 |
|
| 279 |
|
|
@@ -361,6 +471,8 @@ with gr.Blocks() as demo:
|
|
| 361 |
with gr.Row():
|
| 362 |
create_button = gr.Button("Create Competition")
|
| 363 |
|
|
|
|
|
|
|
| 364 |
login_button.click(
|
| 365 |
check_if_user_can_create_competition, inputs=[user_token], outputs=[create_box, message_box, who_pays]
|
| 366 |
)
|
|
@@ -377,5 +489,4 @@ with gr.Blocks() as demo:
|
|
| 377 |
sample_submission_file,
|
| 378 |
solution_file,
|
| 379 |
]
|
| 380 |
-
|
| 381 |
-
create_button.click(create_competition, inputs=create_inputs, outputs=[message_box])
|
|
|
|
| 10 |
from .utils import user_authentication
|
| 11 |
|
| 12 |
|
| 13 |
+
def verify_sample_and_solution(sample_submission, solution):
|
| 14 |
+
sample_submission = pd.read_csv(sample_submission.name)
|
| 15 |
+
solution = pd.read_csv(solution.name)
|
| 16 |
+
|
| 17 |
+
# check if both contain an id column
|
| 18 |
+
if "id" not in sample_submission.columns:
|
| 19 |
+
raise Exception("Sample submission should contain an id column")
|
| 20 |
+
|
| 21 |
+
if "id" not in solution.columns:
|
| 22 |
+
raise Exception("Solution file should contain an id column")
|
| 23 |
+
|
| 24 |
+
# check if both files have the same ids
|
| 25 |
+
if not (sample_submission["id"] == solution["id"]).all():
|
| 26 |
+
raise Exception("Sample submission and solution should have the same ids")
|
| 27 |
+
|
| 28 |
+
# check if both files have the same number of rows
|
| 29 |
+
if sample_submission.shape[0] != solution.shape[0]:
|
| 30 |
+
raise Exception("Sample submission and solution should have the same number of rows")
|
| 31 |
+
|
| 32 |
+
# check if solution contains a split column
|
| 33 |
+
if "split" not in solution.columns:
|
| 34 |
+
raise Exception("Solution file should contain a split column")
|
| 35 |
+
|
| 36 |
+
# check if split column contains only two unique values
|
| 37 |
+
if len(solution["split"].unique()) != 2:
|
| 38 |
+
raise Exception("Split column should contain only two unique values: public and private")
|
| 39 |
+
|
| 40 |
+
# check if unique values are public and private
|
| 41 |
+
if not set(solution["split"].unique()) == set(["public", "private"]):
|
| 42 |
+
raise Exception("Split column should contain only two unique values: public and private")
|
| 43 |
+
|
| 44 |
+
# except the `split` column, all other columns should be the same
|
| 45 |
+
solution_columns = solution.columns.tolist()
|
| 46 |
+
solution_columns.remove("split")
|
| 47 |
+
if not (sample_submission.columns == solution_columns).all():
|
| 48 |
+
raise Exception("Sample submission and solution should have the same columns, except for the split column")
|
| 49 |
+
|
| 50 |
+
return True
|
| 51 |
|
| 52 |
|
| 53 |
def create_competition(
|
|
|
|
| 62 |
sample_submission_file,
|
| 63 |
solution_file,
|
| 64 |
):
|
| 65 |
+
|
| 66 |
+
# verify sample submission and solution
|
| 67 |
+
try:
|
| 68 |
+
verify_sample_and_solution(sample_submission_file, solution_file)
|
| 69 |
+
except Exception as e:
|
| 70 |
+
return gr.Markdown.update(
|
| 71 |
+
value=f"""
|
| 72 |
+
<div style="text-align: center">
|
| 73 |
+
<h4>Invalid sample submission or solution file</h4>
|
| 74 |
+
<p>{e}</p>
|
| 75 |
+
</div>
|
| 76 |
+
""",
|
| 77 |
+
visible=True,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# check if end_date is valid format: YYYY-MM-DD and in the future
|
| 81 |
+
try:
|
| 82 |
+
if len(end_date.split("-")) != 3:
|
| 83 |
+
raise Exception("End date should be in the format YYYY-MM-DD")
|
| 84 |
+
end_date_pd = pd.to_datetime(end_date)
|
| 85 |
+
if end_date_pd == pd.NaT:
|
| 86 |
+
raise Exception("End date should be in the format YYYY-MM-DD")
|
| 87 |
+
if end_date_pd <= pd.to_datetime("today"):
|
| 88 |
+
raise Exception("End date should be in the future")
|
| 89 |
+
except Exception as e:
|
| 90 |
+
return gr.Markdown.update(
|
| 91 |
+
value=f"""
|
| 92 |
+
<div style="text-align: center">
|
| 93 |
+
<h4>Invalid end date</h4>
|
| 94 |
+
<p>{e}</p>
|
| 95 |
+
</div>
|
| 96 |
+
""",
|
| 97 |
+
visible=True,
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
# generate a random id
|
| 101 |
suffix = str(uuid.uuid4())
|
| 102 |
private_dataset_name = f"{who_pays}/{competition_name}{suffix}"
|
| 103 |
public_dataset_name = f"{who_pays}/{competition_name}"
|
| 104 |
space_name = f"competitions/{competition_name}"
|
| 105 |
|
| 106 |
+
sample_submission_df = pd.read_csv(sample_submission_file.name)
|
| 107 |
submission_columns = ",".join(sample_submission_df.columns)
|
| 108 |
|
| 109 |
conf = {
|
| 110 |
+
"COMPETITION_TYPE": competition_type,
|
| 111 |
"SUBMISSION_LIMIT": submission_limit,
|
| 112 |
"SELECTION_LIMIT": selection_limit,
|
| 113 |
"END_DATE": end_date,
|
| 114 |
+
"EVAL_HIGHER_IS_BETTER": 1 if eval_metric != "logloss" else 0,
|
| 115 |
"COMPETITION_NAME": competition_name,
|
| 116 |
"SUBMISSION_ID_COLUMN": "id",
|
| 117 |
"SUBMISSION_COLUMNS": submission_columns,
|
|
|
|
| 122 |
api = HfApi()
|
| 123 |
|
| 124 |
# create private dataset repo
|
| 125 |
+
try:
|
| 126 |
+
create_repo(
|
| 127 |
+
repo_id=private_dataset_name,
|
| 128 |
+
repo_type="dataset",
|
| 129 |
+
private=True,
|
| 130 |
+
token=user_token,
|
| 131 |
+
exist_ok=False,
|
| 132 |
+
)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
return gr.Markdown.update(
|
| 135 |
+
value=f"""
|
| 136 |
+
<div style="text-align: center">
|
| 137 |
+
<h4>Failed to create private dataset repo</h4>
|
| 138 |
+
<p>{e}</p>
|
| 139 |
+
</div>
|
| 140 |
+
""",
|
| 141 |
+
visible=True,
|
| 142 |
+
)
|
| 143 |
competition_desc = f"""
|
| 144 |
# Welcome to {competition_name}
|
| 145 |
|
|
|
|
| 194 |
token=user_token,
|
| 195 |
)
|
| 196 |
|
| 197 |
+
with open(solution_file.name, "rb") as f:
|
| 198 |
+
solution_bytes_data = f.read()
|
| 199 |
+
# upload solution file
|
| 200 |
+
api.upload_file(
|
| 201 |
+
path_or_fileobj=solution_bytes_data,
|
| 202 |
+
path_in_repo="solution.csv",
|
| 203 |
+
repo_id=private_dataset_name,
|
| 204 |
+
repo_type="dataset",
|
| 205 |
+
token=user_token,
|
| 206 |
+
)
|
| 207 |
|
| 208 |
+
# create public dataset repo
|
| 209 |
+
try:
|
| 210 |
+
create_repo(
|
| 211 |
+
repo_id=public_dataset_name,
|
|
|
|
|
|
|
|
|
|
| 212 |
repo_type="dataset",
|
| 213 |
+
private=False,
|
| 214 |
token=user_token,
|
| 215 |
+
exist_ok=False,
|
| 216 |
+
)
|
| 217 |
+
except Exception as e:
|
| 218 |
+
return gr.Markdown.update(
|
| 219 |
+
value=f"""
|
| 220 |
+
<div style="text-align: center">
|
| 221 |
+
<h4>Failed to create public dataset repo</h4>
|
| 222 |
+
<p>{e}</p>
|
| 223 |
+
</div>
|
| 224 |
+
""",
|
| 225 |
+
visible=True,
|
| 226 |
)
|
| 227 |
|
| 228 |
+
# upload sample submission file
|
| 229 |
+
with open(sample_submission_file.name, "rb") as f:
|
| 230 |
+
sample_submission_bytes_data = f.read()
|
| 231 |
+
|
| 232 |
+
api.upload_file(
|
| 233 |
+
path_or_fileobj=sample_submission_bytes_data,
|
| 234 |
+
path_in_repo="sample_submission.csv",
|
| 235 |
repo_id=public_dataset_name,
|
| 236 |
repo_type="dataset",
|
|
|
|
| 237 |
token=user_token,
|
|
|
|
| 238 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
dockerfile = """
|
| 241 |
FROM huggingface/competitions:latest
|
|
|
|
| 278 |
"""
|
| 279 |
space_readme = space_readme.strip()
|
| 280 |
space_readme = space_readme.replace(" ", "")
|
|
|
|
| 281 |
|
| 282 |
# upload space readme
|
| 283 |
space_readme_bytes = space_readme.encode("utf-8")
|
|
|
|
| 310 |
token=BOT_TOKEN,
|
| 311 |
)
|
| 312 |
|
| 313 |
+
return gr.Markdown.update(
|
| 314 |
+
value=f"""
|
| 315 |
+
<div style="text-align: center">
|
| 316 |
+
<h4>Competition created successfully!</h4>
|
| 317 |
+
<p>Private dataset: <a href="https://hf.co/datasets/{private_dataset_name}">{private_dataset_name}</a></p>
|
| 318 |
+
<p>Public dataset: <a href="https://hf.co/datasets/{public_dataset_name}">{public_dataset_name}</a></p>
|
| 319 |
+
<p>Competition space: <a href="https://hf.co/spaces/{space_name}">{space_name}</a></p>
|
| 320 |
+
<p>Note: Do NOT share the private dataset or link with anyone else.</p>
|
| 321 |
+
</div>
|
| 322 |
+
""",
|
| 323 |
+
visible=True,
|
| 324 |
+
)
|
| 325 |
+
|
| 326 |
|
| 327 |
def check_if_user_can_create_competition(user_token):
|
| 328 |
"""
|
|
|
|
| 331 |
:return: True if the user can create a competition, False otherwise
|
| 332 |
"""
|
| 333 |
user_info = user_authentication(user_token)
|
|
|
|
| 334 |
return_msg = None
|
| 335 |
if "error" in user_info:
|
| 336 |
return_msg = "Invalid token. You can find your HF token here: https://huggingface.co/settings/tokens"
|
|
|
|
| 338 |
elif user_info["auth"]["accessToken"]["role"] != "write":
|
| 339 |
return_msg = "Please provide a token with write access"
|
| 340 |
|
|
|
|
|
|
|
|
|
|
| 341 |
if return_msg is not None:
|
| 342 |
return [
|
| 343 |
gr.Box.update(visible=False),
|
|
|
|
| 345 |
gr.Dropdown.update(visible=False),
|
| 346 |
]
|
| 347 |
|
|
|
|
|
|
|
|
|
|
| 348 |
orgs = user_info["orgs"]
|
| 349 |
valid_orgs = [org for org in orgs if org["canPay"] is True]
|
| 350 |
+
|
| 351 |
+
if len(valid_orgs) == 0:
|
| 352 |
+
return_msg = """You are not a member of any organization with a valid payment method.
|
| 353 |
+
Please add a valid payment method for your organization in order to create competitions."""
|
| 354 |
+
return [
|
| 355 |
+
gr.Box.update(visible=False),
|
| 356 |
+
gr.Markdown.update(
|
| 357 |
+
value=return_msg,
|
| 358 |
+
visible=True,
|
| 359 |
+
),
|
| 360 |
+
gr.Dropdown.update(visible=False),
|
| 361 |
+
]
|
| 362 |
+
|
| 363 |
valid_orgs = [org for org in valid_orgs if org["roleInOrg"] in ("admin", "write")]
|
| 364 |
|
| 365 |
+
if len(valid_orgs) == 0:
|
| 366 |
+
return_msg = """You dont have write access for any organization.
|
| 367 |
+
Please contact your organization's admin to add you as a member with write privilages."""
|
| 368 |
+
return [
|
| 369 |
+
gr.Box.update(visible=False),
|
| 370 |
+
gr.Markdown.update(
|
| 371 |
+
value=return_msg,
|
| 372 |
+
visible=True,
|
| 373 |
+
),
|
| 374 |
+
gr.Dropdown.update(visible=False),
|
| 375 |
+
]
|
| 376 |
|
| 377 |
+
valid_entities = {org["name"]: org["id"] for org in valid_orgs}
|
|
|
|
| 378 |
|
| 379 |
return [
|
| 380 |
gr.Box.update(visible=True),
|
| 381 |
gr.Markdown.update(value="", visible=False),
|
| 382 |
+
gr.Dropdown.update(
|
| 383 |
+
choices=list(valid_entities.keys()),
|
| 384 |
+
visible=True,
|
| 385 |
+
value=list(valid_entities.keys())[0],
|
| 386 |
+
),
|
| 387 |
]
|
| 388 |
|
| 389 |
|
|
|
|
| 471 |
with gr.Row():
|
| 472 |
create_button = gr.Button("Create Competition")
|
| 473 |
|
| 474 |
+
final_output = gr.Markdown(visible=True)
|
| 475 |
+
|
| 476 |
login_button.click(
|
| 477 |
check_if_user_can_create_competition, inputs=[user_token], outputs=[create_box, message_box, who_pays]
|
| 478 |
)
|
|
|
|
| 489 |
sample_submission_file,
|
| 490 |
solution_file,
|
| 491 |
]
|
| 492 |
+
create_button.click(create_competition, inputs=create_inputs, outputs=[final_output])
|
|
|