Abhishek Thakur
commited on
Commit
·
a2fa160
1
Parent(s):
1094cbb
migrate to teams
Browse files- .dockerignore +1 -0
- .gitignore +1 -0
- 16337e22-7815-4ebd-a6c4-7a58dc46e214/.gitattributes +35 -0
- 16337e22-7815-4ebd-a6c4-7a58dc46e214/script.py +9 -0
- competitions/api.py +109 -0
- competitions/app.py +5 -1
- competitions/compute_metrics.py +2 -2
- competitions/evaluate.py +26 -5
- competitions/info.py +0 -4
- competitions/leaderboard.py +19 -26
- competitions/params.py +2 -1
- competitions/runner.py +101 -17
- competitions/submissions.py +151 -98
- competitions/utils.py +2 -2
.dockerignore
CHANGED
|
@@ -4,6 +4,7 @@
|
|
| 4 |
.vim/
|
| 5 |
flagged/
|
| 6 |
*.csv
|
|
|
|
| 7 |
|
| 8 |
# Byte-compiled / optimized / DLL files
|
| 9 |
__pycache__/
|
|
|
|
| 4 |
.vim/
|
| 5 |
flagged/
|
| 6 |
*.csv
|
| 7 |
+
*.db
|
| 8 |
|
| 9 |
# Byte-compiled / optimized / DLL files
|
| 10 |
__pycache__/
|
.gitignore
CHANGED
|
@@ -4,6 +4,7 @@
|
|
| 4 |
.vim/
|
| 5 |
flagged/
|
| 6 |
*.csv
|
|
|
|
| 7 |
|
| 8 |
# Byte-compiled / optimized / DLL files
|
| 9 |
__pycache__/
|
|
|
|
| 4 |
.vim/
|
| 5 |
flagged/
|
| 6 |
*.csv
|
| 7 |
+
*.db
|
| 8 |
|
| 9 |
# Byte-compiled / optimized / DLL files
|
| 10 |
__pycache__/
|
16337e22-7815-4ebd-a6c4-7a58dc46e214/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
16337e22-7815-4ebd-a6c4-7a58dc46e214/script.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
sub = []
|
| 5 |
+
for i in range(10000):
|
| 6 |
+
sub.append((i, 0.5))
|
| 7 |
+
|
| 8 |
+
sub = pd.DataFrame(sub, columns=["id", "pred"])
|
| 9 |
+
sub.to_csv("submission.csv", index=False)
|
competitions/api.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
import signal
|
| 4 |
+
import sqlite3
|
| 5 |
+
from contextlib import asynccontextmanager
|
| 6 |
+
|
| 7 |
+
import psutil
|
| 8 |
+
from fastapi import FastAPI
|
| 9 |
+
from loguru import logger
|
| 10 |
+
|
| 11 |
+
from competitions.utils import run_evaluation
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def get_process_status(pid):
|
| 15 |
+
try:
|
| 16 |
+
process = psutil.Process(pid)
|
| 17 |
+
proc_status = process.status()
|
| 18 |
+
return proc_status
|
| 19 |
+
except psutil.NoSuchProcess:
|
| 20 |
+
logger.info(f"No process found with PID: {pid}")
|
| 21 |
+
return "Completed"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def kill_process_by_pid(pid):
|
| 25 |
+
"""Kill process by PID."""
|
| 26 |
+
os.kill(pid, signal.SIGTERM)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class JobDB:
|
| 30 |
+
def __init__(self, db_path):
|
| 31 |
+
self.db_path = db_path
|
| 32 |
+
self.conn = sqlite3.connect(db_path)
|
| 33 |
+
self.c = self.conn.cursor()
|
| 34 |
+
self.create_jobs_table()
|
| 35 |
+
|
| 36 |
+
def create_jobs_table(self):
|
| 37 |
+
self.c.execute(
|
| 38 |
+
"""CREATE TABLE IF NOT EXISTS jobs
|
| 39 |
+
(id INTEGER PRIMARY KEY, pid INTEGER)"""
|
| 40 |
+
)
|
| 41 |
+
self.conn.commit()
|
| 42 |
+
|
| 43 |
+
def add_job(self, pid):
|
| 44 |
+
sql = f"INSERT INTO jobs (pid) VALUES ({pid})"
|
| 45 |
+
self.c.execute(sql)
|
| 46 |
+
self.conn.commit()
|
| 47 |
+
|
| 48 |
+
def get_running_jobs(self):
|
| 49 |
+
self.c.execute("""SELECT pid FROM jobs""")
|
| 50 |
+
running_pids = self.c.fetchall()
|
| 51 |
+
running_pids = [pid[0] for pid in running_pids]
|
| 52 |
+
return running_pids
|
| 53 |
+
|
| 54 |
+
def delete_job(self, pid):
|
| 55 |
+
sql = f"DELETE FROM jobs WHERE pid={pid}"
|
| 56 |
+
self.c.execute(sql)
|
| 57 |
+
self.conn.commit()
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
PARAMS = os.environ.get("PARAMS")
|
| 61 |
+
DB = JobDB("job.db")
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class BackgroundRunner:
|
| 65 |
+
async def run_main(self):
|
| 66 |
+
while True:
|
| 67 |
+
running_jobs = DB.get_running_jobs()
|
| 68 |
+
if running_jobs:
|
| 69 |
+
for _pid in running_jobs:
|
| 70 |
+
proc_status = get_process_status(_pid)
|
| 71 |
+
proc_status = proc_status.strip().lower()
|
| 72 |
+
if proc_status in ("completed", "error", "zombie"):
|
| 73 |
+
logger.info(f"Process {_pid} is already completed. Skipping...")
|
| 74 |
+
try:
|
| 75 |
+
kill_process_by_pid(_pid)
|
| 76 |
+
except Exception as e:
|
| 77 |
+
logger.info(f"Error while killing process: {e}")
|
| 78 |
+
DB.delete_job(_pid)
|
| 79 |
+
|
| 80 |
+
running_jobs = DB.get_running_jobs()
|
| 81 |
+
if not running_jobs:
|
| 82 |
+
logger.info("No running jobs found. Shutting down the server.")
|
| 83 |
+
os.kill(os.getpid(), signal.SIGINT)
|
| 84 |
+
await asyncio.sleep(30)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
runner = BackgroundRunner()
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
@asynccontextmanager
|
| 91 |
+
async def lifespan(app: FastAPI):
|
| 92 |
+
process_pid = run_evaluation(params=PARAMS)
|
| 93 |
+
logger.info(f"Started training with PID {process_pid}")
|
| 94 |
+
DB.add_job(process_pid)
|
| 95 |
+
asyncio.create_task(runner.run_main())
|
| 96 |
+
yield
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
api = FastAPI(lifespan=lifespan)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
@api.get("/")
|
| 103 |
+
async def root():
|
| 104 |
+
return "Your model is being evaluated..."
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
@api.get("/health")
|
| 108 |
+
async def health():
|
| 109 |
+
return "OK"
|
competitions/app.py
CHANGED
|
@@ -5,6 +5,7 @@ from fastapi import FastAPI, File, Form, Request, UploadFile
|
|
| 5 |
from fastapi.responses import HTMLResponse, JSONResponse
|
| 6 |
from fastapi.staticfiles import StaticFiles
|
| 7 |
from fastapi.templating import Jinja2Templates
|
|
|
|
| 8 |
from pydantic import BaseModel
|
| 9 |
|
| 10 |
from competitions.info import CompetitionInfo
|
|
@@ -83,6 +84,7 @@ async def get_leaderboard(request: Request, lb: str):
|
|
| 83 |
autotrain_token=HF_TOKEN,
|
| 84 |
)
|
| 85 |
df = leaderboard.fetch(private=lb == "private")
|
|
|
|
| 86 |
resp = {"response": df.to_markdown(index=False)}
|
| 87 |
return resp
|
| 88 |
|
|
@@ -94,6 +96,7 @@ async def my_submissions(request: Request, user: User):
|
|
| 94 |
submission_limit=COMP_INFO.submission_limit,
|
| 95 |
competition_id=COMPETITION_ID,
|
| 96 |
token=HF_TOKEN,
|
|
|
|
| 97 |
)
|
| 98 |
success_subs, failed_subs = sub.my_submissions(user.user_token)
|
| 99 |
success_subs = success_subs.to_markdown(index=False)
|
|
@@ -107,7 +110,7 @@ async def my_submissions(request: Request, user: User):
|
|
| 107 |
|
| 108 |
@app.post("/new_submission", response_class=JSONResponse)
|
| 109 |
async def new_submission(
|
| 110 |
-
submission_file: UploadFile = File(
|
| 111 |
hub_model: str = Form(...),
|
| 112 |
token: str = Form(...),
|
| 113 |
submission_comment: str = Form(...),
|
|
@@ -117,6 +120,7 @@ async def new_submission(
|
|
| 117 |
submission_limit=COMP_INFO.submission_limit,
|
| 118 |
competition_id=COMPETITION_ID,
|
| 119 |
token=HF_TOKEN,
|
|
|
|
| 120 |
)
|
| 121 |
if COMP_INFO.competition_type == "generic":
|
| 122 |
resp = sub.new_submission(token, submission_file, submission_comment)
|
|
|
|
| 5 |
from fastapi.responses import HTMLResponse, JSONResponse
|
| 6 |
from fastapi.staticfiles import StaticFiles
|
| 7 |
from fastapi.templating import Jinja2Templates
|
| 8 |
+
from loguru import logger
|
| 9 |
from pydantic import BaseModel
|
| 10 |
|
| 11 |
from competitions.info import CompetitionInfo
|
|
|
|
| 84 |
autotrain_token=HF_TOKEN,
|
| 85 |
)
|
| 86 |
df = leaderboard.fetch(private=lb == "private")
|
| 87 |
+
logger.info(df)
|
| 88 |
resp = {"response": df.to_markdown(index=False)}
|
| 89 |
return resp
|
| 90 |
|
|
|
|
| 96 |
submission_limit=COMP_INFO.submission_limit,
|
| 97 |
competition_id=COMPETITION_ID,
|
| 98 |
token=HF_TOKEN,
|
| 99 |
+
competition_type=COMP_INFO.competition_type,
|
| 100 |
)
|
| 101 |
success_subs, failed_subs = sub.my_submissions(user.user_token)
|
| 102 |
success_subs = success_subs.to_markdown(index=False)
|
|
|
|
| 110 |
|
| 111 |
@app.post("/new_submission", response_class=JSONResponse)
|
| 112 |
async def new_submission(
|
| 113 |
+
submission_file: UploadFile = File(None),
|
| 114 |
hub_model: str = Form(...),
|
| 115 |
token: str = Form(...),
|
| 116 |
submission_comment: str = Form(...),
|
|
|
|
| 120 |
submission_limit=COMP_INFO.submission_limit,
|
| 121 |
competition_id=COMPETITION_ID,
|
| 122 |
token=HF_TOKEN,
|
| 123 |
+
competition_type=COMP_INFO.competition_type,
|
| 124 |
)
|
| 125 |
if COMP_INFO.competition_type == "generic":
|
| 126 |
resp = sub.new_submission(token, submission_file, submission_comment)
|
competitions/compute_metrics.py
CHANGED
|
@@ -15,7 +15,7 @@ def compute_metrics(params):
|
|
| 15 |
|
| 16 |
solution_df = pd.read_csv(solution_file)
|
| 17 |
|
| 18 |
-
submission_filename = f"submissions/{params.
|
| 19 |
submission_file = hf_hub_download(
|
| 20 |
repo_id=params.competition_id,
|
| 21 |
filename=submission_filename,
|
|
@@ -47,7 +47,7 @@ def compute_metrics(params):
|
|
| 47 |
else:
|
| 48 |
_metric = getattr(metrics, params.metric)
|
| 49 |
target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
|
| 50 |
-
public_score = _metric(
|
| 51 |
private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
|
| 52 |
|
| 53 |
# scores can also be dictionaries for multiple metrics
|
|
|
|
| 15 |
|
| 16 |
solution_df = pd.read_csv(solution_file)
|
| 17 |
|
| 18 |
+
submission_filename = f"submissions/{params.team_id}-{params.submission_id}.csv"
|
| 19 |
submission_file = hf_hub_download(
|
| 20 |
repo_id=params.competition_id,
|
| 21 |
filename=submission_filename,
|
|
|
|
| 47 |
else:
|
| 48 |
_metric = getattr(metrics, params.metric)
|
| 49 |
target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
|
| 50 |
+
public_score = _metric(public_solution_df[target_cols], public_submission_df[target_cols])
|
| 51 |
private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
|
| 52 |
|
| 53 |
# scores can also be dictionaries for multiple metrics
|
competitions/evaluate.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
import argparse
|
| 2 |
import json
|
|
|
|
| 3 |
|
| 4 |
-
from huggingface_hub import snapshot_download
|
| 5 |
from loguru import logger
|
| 6 |
|
| 7 |
from competitions import utils
|
|
@@ -15,12 +16,32 @@ def parse_args():
|
|
| 15 |
return parser.parse_args()
|
| 16 |
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def generate_submission_file(params):
|
|
|
|
| 19 |
logger.info("Downloading submission dataset")
|
| 20 |
-
snapshot_download(
|
| 21 |
-
repo_id=params.
|
| 22 |
local_dir=params.output_path,
|
| 23 |
token=params.token,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
repo_type="dataset",
|
| 25 |
)
|
| 26 |
|
|
@@ -35,9 +56,9 @@ def run(params):
|
|
| 35 |
if params.competition_type == "code":
|
| 36 |
generate_submission_file(params)
|
| 37 |
|
| 38 |
-
|
| 39 |
|
| 40 |
-
utils.update_submission_score(params, public_score, private_score)
|
| 41 |
utils.update_submission_status(params, "success")
|
| 42 |
utils.pause_space(params)
|
| 43 |
|
|
|
|
| 1 |
import argparse
|
| 2 |
import json
|
| 3 |
+
import subprocess
|
| 4 |
|
| 5 |
+
from huggingface_hub import HfApi, snapshot_download
|
| 6 |
from loguru import logger
|
| 7 |
|
| 8 |
from competitions import utils
|
|
|
|
| 16 |
return parser.parse_args()
|
| 17 |
|
| 18 |
|
| 19 |
+
def upload_submission_file(params, file_path):
|
| 20 |
+
logger.info("Uploading submission file")
|
| 21 |
+
pass
|
| 22 |
+
|
| 23 |
+
|
| 24 |
def generate_submission_file(params):
|
| 25 |
+
base_user = params.competition_id.split("/")[0]
|
| 26 |
logger.info("Downloading submission dataset")
|
| 27 |
+
submission_dir = snapshot_download(
|
| 28 |
+
repo_id=f"{base_user}/{params.submission_id}",
|
| 29 |
local_dir=params.output_path,
|
| 30 |
token=params.token,
|
| 31 |
+
repo_type="model",
|
| 32 |
+
)
|
| 33 |
+
# submission_dir has a script.py file
|
| 34 |
+
# start a subprocess to run the script.py
|
| 35 |
+
# the script.py will generate a submission.csv file in the submission_dir
|
| 36 |
+
# push the submission.csv file to the repo using upload_submission_file
|
| 37 |
+
logger.info("Generating submission file")
|
| 38 |
+
subprocess.run(["python", "script.py"], cwd=submission_dir)
|
| 39 |
+
|
| 40 |
+
api = HfApi(token=params.token)
|
| 41 |
+
api.upload_file(
|
| 42 |
+
path_or_fileobj=f"{submission_dir}/submission.csv",
|
| 43 |
+
path_in_repo=f"submissions/{params.team_id}-{params.submission_id}.csv",
|
| 44 |
+
repo_id=params.competition_id,
|
| 45 |
repo_type="dataset",
|
| 46 |
)
|
| 47 |
|
|
|
|
| 56 |
if params.competition_type == "code":
|
| 57 |
generate_submission_file(params)
|
| 58 |
|
| 59 |
+
evaluation = compute_metrics(params)
|
| 60 |
|
| 61 |
+
utils.update_submission_score(params, evaluation["public_score"], evaluation["private_score"])
|
| 62 |
utils.update_submission_status(params, "success")
|
| 63 |
utils.pause_space(params)
|
| 64 |
|
competitions/info.py
CHANGED
|
@@ -84,10 +84,6 @@ class CompetitionInfo:
|
|
| 84 |
def competition_description(self):
|
| 85 |
return self.competition_desc
|
| 86 |
|
| 87 |
-
@property
|
| 88 |
-
def competition_name(self):
|
| 89 |
-
return self.config["COMPETITION_NAME"]
|
| 90 |
-
|
| 91 |
@property
|
| 92 |
def submission_columns(self):
|
| 93 |
return self.config["SUBMISSION_COLUMNS"].split(",")
|
|
|
|
| 84 |
def competition_description(self):
|
| 85 |
return self.competition_desc
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
@property
|
| 88 |
def submission_columns(self):
|
| 89 |
return self.config["SUBMISSION_COLUMNS"].split(",")
|
competitions/leaderboard.py
CHANGED
|
@@ -25,13 +25,13 @@ class Leaderboard:
|
|
| 25 |
def _refresh_columns(self):
|
| 26 |
self.private_columns = [
|
| 27 |
"rank",
|
| 28 |
-
"
|
| 29 |
"private_score",
|
| 30 |
"submission_datetime",
|
| 31 |
]
|
| 32 |
self.public_columns = [
|
| 33 |
"rank",
|
| 34 |
-
"
|
| 35 |
"public_score",
|
| 36 |
"submission_datetime",
|
| 37 |
]
|
|
@@ -49,14 +49,16 @@ class Leaderboard:
|
|
| 49 |
start_time = time.time()
|
| 50 |
submissions = []
|
| 51 |
for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
|
| 52 |
-
with open(submission, "r") as f:
|
| 53 |
submission_info = json.load(f)
|
| 54 |
# only select submissions that are done
|
| 55 |
-
submission_info["submissions"] = [
|
|
|
|
|
|
|
| 56 |
submission_info["submissions"] = [
|
| 57 |
sub
|
| 58 |
for sub in submission_info["submissions"]
|
| 59 |
-
if datetime.strptime(sub["
|
| 60 |
]
|
| 61 |
if len(submission_info["submissions"]) == 0:
|
| 62 |
continue
|
|
@@ -83,15 +85,13 @@ class Leaderboard:
|
|
| 83 |
submission_info["submissions"] = submission_info["submissions"][0]
|
| 84 |
temp_info = {
|
| 85 |
"id": submission_info["id"],
|
| 86 |
-
"name": submission_info["name"],
|
| 87 |
"submission_id": submission_info["submissions"]["submission_id"],
|
| 88 |
"submission_comment": submission_info["submissions"]["submission_comment"],
|
| 89 |
"status": submission_info["submissions"]["status"],
|
| 90 |
"selected": submission_info["submissions"]["selected"],
|
| 91 |
"public_score": submission_info["submissions"]["public_score"],
|
| 92 |
# "private_score": submission_info["submissions"]["private_score"],
|
| 93 |
-
"
|
| 94 |
-
"submission_time": submission_info["submissions"]["time"],
|
| 95 |
}
|
| 96 |
for score in other_scores:
|
| 97 |
temp_info[score] = submission_info["submissions"][score]
|
|
@@ -112,10 +112,10 @@ class Leaderboard:
|
|
| 112 |
start_time = time.time()
|
| 113 |
submissions = []
|
| 114 |
for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
|
| 115 |
-
with open(submission, "r") as f:
|
| 116 |
submission_info = json.load(f)
|
| 117 |
submission_info["submissions"] = [
|
| 118 |
-
sub for sub in submission_info["submissions"] if sub["status"] == "
|
| 119 |
]
|
| 120 |
if len(submission_info["submissions"]) == 0:
|
| 121 |
continue
|
|
@@ -146,8 +146,7 @@ class Leaderboard:
|
|
| 146 |
if selected_submissions == 0:
|
| 147 |
# select submissions with best public score
|
| 148 |
submission_info["submissions"].sort(
|
| 149 |
-
key=lambda x: x["public_score"],
|
| 150 |
-
reverse=True if self.eval_higher_is_better else False,
|
| 151 |
)
|
| 152 |
# select only the best submission
|
| 153 |
submission_info["submissions"] = submission_info["submissions"][0]
|
|
@@ -156,8 +155,7 @@ class Leaderboard:
|
|
| 156 |
submission_info["submissions"] = [sub for sub in submission_info["submissions"] if sub["selected"]]
|
| 157 |
# sort by private score
|
| 158 |
submission_info["submissions"].sort(
|
| 159 |
-
key=lambda x: x["private_score"],
|
| 160 |
-
reverse=True if self.eval_higher_is_better else False,
|
| 161 |
)
|
| 162 |
# select only the best submission
|
| 163 |
submission_info["submissions"] = submission_info["submissions"][0]
|
|
@@ -167,28 +165,24 @@ class Leaderboard:
|
|
| 167 |
sub for sub in submission_info["submissions"] if not sub["selected"]
|
| 168 |
]
|
| 169 |
temp_best_public_submissions.sort(
|
| 170 |
-
key=lambda x: x["public_score"],
|
| 171 |
-
reverse=True if self.eval_higher_is_better else False,
|
| 172 |
)
|
| 173 |
missing_candidates = self.max_selected_submissions - len(temp_selected_submissions)
|
| 174 |
temp_best_public_submissions = temp_best_public_submissions[:missing_candidates]
|
| 175 |
submission_info["submissions"] = temp_selected_submissions + temp_best_public_submissions
|
| 176 |
submission_info["submissions"].sort(
|
| 177 |
-
key=lambda x: x["private_score"],
|
| 178 |
-
reverse=True if self.eval_higher_is_better else False,
|
| 179 |
)
|
| 180 |
submission_info["submissions"] = submission_info["submissions"][0]
|
| 181 |
|
| 182 |
temp_info = {
|
| 183 |
"id": submission_info["id"],
|
| 184 |
-
"name": submission_info["name"],
|
| 185 |
"submission_id": submission_info["submissions"]["submission_id"],
|
| 186 |
"submission_comment": submission_info["submissions"]["submission_comment"],
|
| 187 |
"status": submission_info["submissions"]["status"],
|
| 188 |
"selected": submission_info["submissions"]["selected"],
|
| 189 |
"private_score": submission_info["submissions"]["private_score"],
|
| 190 |
-
"
|
| 191 |
-
"submission_time": submission_info["submissions"]["time"],
|
| 192 |
}
|
| 193 |
for score in other_scores:
|
| 194 |
temp_info[score] = submission_info["submissions"][score]
|
|
@@ -206,10 +200,10 @@ class Leaderboard:
|
|
| 206 |
return pd.DataFrame()
|
| 207 |
|
| 208 |
df = pd.DataFrame(submissions)
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
# only keep submissions before the end date
|
| 214 |
df = df[df["submission_datetime"] < self.end_date].reset_index(drop=True)
|
| 215 |
|
|
@@ -254,7 +248,6 @@ class Leaderboard:
|
|
| 254 |
columns = self.public_columns if not private else self.private_columns
|
| 255 |
logger.info(columns)
|
| 256 |
# remove duplicate columns
|
| 257 |
-
# ['rank', 'name', 'public_score', 'submission_datetime', 'public_score_track1', 'public_score_track1', 'public_score_track1', 'public_score_track1']
|
| 258 |
columns = list(dict.fromkeys(columns))
|
| 259 |
|
| 260 |
# send submission_datetime to the end
|
|
|
|
| 25 |
def _refresh_columns(self):
|
| 26 |
self.private_columns = [
|
| 27 |
"rank",
|
| 28 |
+
"id",
|
| 29 |
"private_score",
|
| 30 |
"submission_datetime",
|
| 31 |
]
|
| 32 |
self.public_columns = [
|
| 33 |
"rank",
|
| 34 |
+
"id",
|
| 35 |
"public_score",
|
| 36 |
"submission_datetime",
|
| 37 |
]
|
|
|
|
| 49 |
start_time = time.time()
|
| 50 |
submissions = []
|
| 51 |
for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
|
| 52 |
+
with open(submission, "r", encoding="utf-8") as f:
|
| 53 |
submission_info = json.load(f)
|
| 54 |
# only select submissions that are done
|
| 55 |
+
submission_info["submissions"] = [
|
| 56 |
+
sub for sub in submission_info["submissions"] if sub["status"] == "success"
|
| 57 |
+
]
|
| 58 |
submission_info["submissions"] = [
|
| 59 |
sub
|
| 60 |
for sub in submission_info["submissions"]
|
| 61 |
+
if datetime.strptime(sub["datetime"], "%Y-%m-%d %H:%M:%S") < self.end_date
|
| 62 |
]
|
| 63 |
if len(submission_info["submissions"]) == 0:
|
| 64 |
continue
|
|
|
|
| 85 |
submission_info["submissions"] = submission_info["submissions"][0]
|
| 86 |
temp_info = {
|
| 87 |
"id": submission_info["id"],
|
|
|
|
| 88 |
"submission_id": submission_info["submissions"]["submission_id"],
|
| 89 |
"submission_comment": submission_info["submissions"]["submission_comment"],
|
| 90 |
"status": submission_info["submissions"]["status"],
|
| 91 |
"selected": submission_info["submissions"]["selected"],
|
| 92 |
"public_score": submission_info["submissions"]["public_score"],
|
| 93 |
# "private_score": submission_info["submissions"]["private_score"],
|
| 94 |
+
"submission_datetime": submission_info["submissions"]["datetime"],
|
|
|
|
| 95 |
}
|
| 96 |
for score in other_scores:
|
| 97 |
temp_info[score] = submission_info["submissions"][score]
|
|
|
|
| 112 |
start_time = time.time()
|
| 113 |
submissions = []
|
| 114 |
for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
|
| 115 |
+
with open(submission, "r", encoding="utf-8") as f:
|
| 116 |
submission_info = json.load(f)
|
| 117 |
submission_info["submissions"] = [
|
| 118 |
+
sub for sub in submission_info["submissions"] if sub["status"] == "success"
|
| 119 |
]
|
| 120 |
if len(submission_info["submissions"]) == 0:
|
| 121 |
continue
|
|
|
|
| 146 |
if selected_submissions == 0:
|
| 147 |
# select submissions with best public score
|
| 148 |
submission_info["submissions"].sort(
|
| 149 |
+
key=lambda x: x["public_score"], reverse=self.eval_higher_is_better
|
|
|
|
| 150 |
)
|
| 151 |
# select only the best submission
|
| 152 |
submission_info["submissions"] = submission_info["submissions"][0]
|
|
|
|
| 155 |
submission_info["submissions"] = [sub for sub in submission_info["submissions"] if sub["selected"]]
|
| 156 |
# sort by private score
|
| 157 |
submission_info["submissions"].sort(
|
| 158 |
+
key=lambda x: x["private_score"], reverse=self.eval_higher_is_better
|
|
|
|
| 159 |
)
|
| 160 |
# select only the best submission
|
| 161 |
submission_info["submissions"] = submission_info["submissions"][0]
|
|
|
|
| 165 |
sub for sub in submission_info["submissions"] if not sub["selected"]
|
| 166 |
]
|
| 167 |
temp_best_public_submissions.sort(
|
| 168 |
+
key=lambda x: x["public_score"], reverse=self.eval_higher_is_better
|
|
|
|
| 169 |
)
|
| 170 |
missing_candidates = self.max_selected_submissions - len(temp_selected_submissions)
|
| 171 |
temp_best_public_submissions = temp_best_public_submissions[:missing_candidates]
|
| 172 |
submission_info["submissions"] = temp_selected_submissions + temp_best_public_submissions
|
| 173 |
submission_info["submissions"].sort(
|
| 174 |
+
key=lambda x: x["private_score"], reverse=self.eval_higher_is_better
|
|
|
|
| 175 |
)
|
| 176 |
submission_info["submissions"] = submission_info["submissions"][0]
|
| 177 |
|
| 178 |
temp_info = {
|
| 179 |
"id": submission_info["id"],
|
|
|
|
| 180 |
"submission_id": submission_info["submissions"]["submission_id"],
|
| 181 |
"submission_comment": submission_info["submissions"]["submission_comment"],
|
| 182 |
"status": submission_info["submissions"]["status"],
|
| 183 |
"selected": submission_info["submissions"]["selected"],
|
| 184 |
"private_score": submission_info["submissions"]["private_score"],
|
| 185 |
+
"submission_datetime": submission_info["submissions"]["datetime"],
|
|
|
|
| 186 |
}
|
| 187 |
for score in other_scores:
|
| 188 |
temp_info[score] = submission_info["submissions"][score]
|
|
|
|
| 200 |
return pd.DataFrame()
|
| 201 |
|
| 202 |
df = pd.DataFrame(submissions)
|
| 203 |
+
|
| 204 |
+
# convert submission datetime to pandas datetime
|
| 205 |
+
df["submission_datetime"] = pd.to_datetime(df["submission_datetime"], format="%Y-%m-%d %H:%M:%S")
|
| 206 |
+
|
| 207 |
# only keep submissions before the end date
|
| 208 |
df = df[df["submission_datetime"] < self.end_date].reset_index(drop=True)
|
| 209 |
|
|
|
|
| 248 |
columns = self.public_columns if not private else self.private_columns
|
| 249 |
logger.info(columns)
|
| 250 |
# remove duplicate columns
|
|
|
|
| 251 |
columns = list(dict.fromkeys(columns))
|
| 252 |
|
| 253 |
# send submission_datetime to the end
|
competitions/params.py
CHANGED
|
@@ -9,12 +9,13 @@ class EvalParams(BaseModel):
|
|
| 9 |
competition_type: str
|
| 10 |
metric: str
|
| 11 |
token: str
|
| 12 |
-
|
| 13 |
submission_id: str
|
| 14 |
submission_id_col: str
|
| 15 |
submission_cols: List[str]
|
| 16 |
submission_rows: int
|
| 17 |
output_path: str
|
|
|
|
| 18 |
|
| 19 |
class Config:
|
| 20 |
protected_namespaces = ()
|
|
|
|
| 9 |
competition_type: str
|
| 10 |
metric: str
|
| 11 |
token: str
|
| 12 |
+
team_id: str
|
| 13 |
submission_id: str
|
| 14 |
submission_id_col: str
|
| 15 |
submission_cols: List[str]
|
| 16 |
submission_rows: int
|
| 17 |
output_path: str
|
| 18 |
+
submission_repo: str
|
| 19 |
|
| 20 |
class Config:
|
| 21 |
protected_namespaces = ()
|
competitions/runner.py
CHANGED
|
@@ -1,17 +1,30 @@
|
|
| 1 |
import glob
|
|
|
|
| 2 |
import json
|
| 3 |
import os
|
|
|
|
|
|
|
| 4 |
import time
|
| 5 |
from dataclasses import dataclass
|
| 6 |
|
| 7 |
import pandas as pd
|
| 8 |
-
from huggingface_hub import snapshot_download
|
| 9 |
from loguru import logger
|
| 10 |
|
| 11 |
from competitions.info import CompetitionInfo
|
| 12 |
from competitions.utils import run_evaluation
|
| 13 |
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
@dataclass
|
| 16 |
class JobRunner:
|
| 17 |
competition_info: CompetitionInfo
|
|
@@ -27,56 +40,121 @@ class JobRunner:
|
|
| 27 |
self.submission_rows = self.competition_info.submission_rows
|
| 28 |
|
| 29 |
def get_pending_subs(self):
|
| 30 |
-
|
| 31 |
repo_id=self.competition_id,
|
| 32 |
allow_patterns="submission_info/*.json",
|
| 33 |
token=self.token,
|
| 34 |
repo_type="dataset",
|
| 35 |
)
|
| 36 |
-
|
| 37 |
pending_submissions = []
|
| 38 |
-
for _json in
|
| 39 |
_json = json.load(open(_json, "r", encoding="utf-8"))
|
| 40 |
-
|
| 41 |
for sub in _json["submissions"]:
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
if len(pending_submissions) == 0:
|
| 52 |
logger.info("No pending submissions.")
|
| 53 |
return None
|
| 54 |
logger.info(f"Found {len(pending_submissions)} pending submissions.")
|
| 55 |
pending_submissions = pd.DataFrame(pending_submissions)
|
| 56 |
-
pending_submissions =
|
|
|
|
| 57 |
pending_submissions = pending_submissions.reset_index(drop=True)
|
| 58 |
return pending_submissions
|
| 59 |
|
| 60 |
def run_local(self, pending_submissions):
|
| 61 |
for _, row in pending_submissions.iterrows():
|
| 62 |
-
|
| 63 |
submission_id = row["submission_id"]
|
| 64 |
eval_params = {
|
| 65 |
"competition_id": self.competition_id,
|
| 66 |
"competition_type": self.competition_type,
|
| 67 |
"metric": self.metric,
|
| 68 |
"token": self.token,
|
| 69 |
-
"
|
| 70 |
"submission_id": submission_id,
|
| 71 |
"submission_id_col": self.submission_id_col,
|
| 72 |
"submission_cols": self.submission_cols,
|
| 73 |
"submission_rows": self.submission_rows,
|
| 74 |
"output_path": self.output_path,
|
|
|
|
| 75 |
}
|
| 76 |
eval_params = json.dumps(eval_params)
|
| 77 |
eval_pid = run_evaluation(eval_params, local=True, wait=True)
|
| 78 |
logger.info(f"New evaluation process started with pid {eval_pid}.")
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
def run(self):
|
| 81 |
while True:
|
| 82 |
pending_submissions = self.get_pending_subs()
|
|
@@ -85,4 +163,10 @@ class JobRunner:
|
|
| 85 |
continue
|
| 86 |
if self.competition_type == "generic":
|
| 87 |
self.run_local(pending_submissions)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
time.sleep(5)
|
|
|
|
| 1 |
import glob
|
| 2 |
+
import io
|
| 3 |
import json
|
| 4 |
import os
|
| 5 |
+
import random
|
| 6 |
+
import string
|
| 7 |
import time
|
| 8 |
from dataclasses import dataclass
|
| 9 |
|
| 10 |
import pandas as pd
|
| 11 |
+
from huggingface_hub import HfApi, snapshot_download
|
| 12 |
from loguru import logger
|
| 13 |
|
| 14 |
from competitions.info import CompetitionInfo
|
| 15 |
from competitions.utils import run_evaluation
|
| 16 |
|
| 17 |
|
| 18 |
+
_DOCKERFILE = """
|
| 19 |
+
FROM huggingface/competitions:latest
|
| 20 |
+
|
| 21 |
+
CMD uvicorn competitions.app:app --port 7860 --host 0.0.0.0
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
# format _DOCKERFILE
|
| 25 |
+
_DOCKERFILE = _DOCKERFILE.replace("\n", " ").replace(" ", "\n").strip()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
@dataclass
|
| 29 |
class JobRunner:
|
| 30 |
competition_info: CompetitionInfo
|
|
|
|
| 40 |
self.submission_rows = self.competition_info.submission_rows
|
| 41 |
|
| 42 |
def get_pending_subs(self):
|
| 43 |
+
submission_jsons = snapshot_download(
|
| 44 |
repo_id=self.competition_id,
|
| 45 |
allow_patterns="submission_info/*.json",
|
| 46 |
token=self.token,
|
| 47 |
repo_type="dataset",
|
| 48 |
)
|
| 49 |
+
submission_jsons = glob.glob(os.path.join(submission_jsons, "submission_info/*.json"))
|
| 50 |
pending_submissions = []
|
| 51 |
+
for _json in submission_jsons:
|
| 52 |
_json = json.load(open(_json, "r", encoding="utf-8"))
|
| 53 |
+
team_id = _json["id"]
|
| 54 |
for sub in _json["submissions"]:
|
| 55 |
+
if sub["status"] == "pending":
|
| 56 |
+
pending_submissions.append(
|
| 57 |
+
{
|
| 58 |
+
"team_id": team_id,
|
| 59 |
+
"submission_id": sub["submission_id"],
|
| 60 |
+
"datetime": sub["datetime"],
|
| 61 |
+
"submission_repo": sub["submission_repo"],
|
| 62 |
+
}
|
| 63 |
+
)
|
| 64 |
if len(pending_submissions) == 0:
|
| 65 |
logger.info("No pending submissions.")
|
| 66 |
return None
|
| 67 |
logger.info(f"Found {len(pending_submissions)} pending submissions.")
|
| 68 |
pending_submissions = pd.DataFrame(pending_submissions)
|
| 69 |
+
pending_submissions["datetime"] = pd.to_datetime(pending_submissions["datetime"])
|
| 70 |
+
pending_submissions = pending_submissions.sort_values("datetime")
|
| 71 |
pending_submissions = pending_submissions.reset_index(drop=True)
|
| 72 |
return pending_submissions
|
| 73 |
|
| 74 |
def run_local(self, pending_submissions):
|
| 75 |
for _, row in pending_submissions.iterrows():
|
| 76 |
+
team_id = row["team_id"]
|
| 77 |
submission_id = row["submission_id"]
|
| 78 |
eval_params = {
|
| 79 |
"competition_id": self.competition_id,
|
| 80 |
"competition_type": self.competition_type,
|
| 81 |
"metric": self.metric,
|
| 82 |
"token": self.token,
|
| 83 |
+
"team_id": team_id,
|
| 84 |
"submission_id": submission_id,
|
| 85 |
"submission_id_col": self.submission_id_col,
|
| 86 |
"submission_cols": self.submission_cols,
|
| 87 |
"submission_rows": self.submission_rows,
|
| 88 |
"output_path": self.output_path,
|
| 89 |
+
"submission_repo": row["submission_repo"],
|
| 90 |
}
|
| 91 |
eval_params = json.dumps(eval_params)
|
| 92 |
eval_pid = run_evaluation(eval_params, local=True, wait=True)
|
| 93 |
logger.info(f"New evaluation process started with pid {eval_pid}.")
|
| 94 |
|
| 95 |
+
def _create_readme(self, project_name):
|
| 96 |
+
_readme = "---\n"
|
| 97 |
+
_readme += f"title: {project_name}\n"
|
| 98 |
+
_readme += "emoji: 🚀\n"
|
| 99 |
+
_readme += "colorFrom: green\n"
|
| 100 |
+
_readme += "colorTo: indigo\n"
|
| 101 |
+
_readme += "sdk: docker\n"
|
| 102 |
+
_readme += "pinned: false\n"
|
| 103 |
+
_readme += "duplicated_from: autotrain-projects/autotrain-advanced\n"
|
| 104 |
+
_readme += "---\n"
|
| 105 |
+
_readme = io.BytesIO(_readme.encode())
|
| 106 |
+
return _readme
|
| 107 |
+
|
| 108 |
+
def create_space(self, team_id, submission_id, submission_repo):
|
| 109 |
+
project_name = "".join(
|
| 110 |
+
random.choices(
|
| 111 |
+
string.ascii_lowercase + string.digits,
|
| 112 |
+
k=10,
|
| 113 |
+
)
|
| 114 |
+
)
|
| 115 |
+
api = HfApi(token=self.token)
|
| 116 |
+
username = self.competition_id.split("/")[0]
|
| 117 |
+
repo_id = f"{username}/competitions-{project_name}"
|
| 118 |
+
api.create_repo(
|
| 119 |
+
repo_id=repo_id,
|
| 120 |
+
repo_type="space",
|
| 121 |
+
space_sdk="docker",
|
| 122 |
+
space_hardware="cpu-basic",
|
| 123 |
+
private=True,
|
| 124 |
+
)
|
| 125 |
+
params = {
|
| 126 |
+
"competition_id": self.competition_id,
|
| 127 |
+
"competition_type": self.competition_type,
|
| 128 |
+
"metric": self.metric,
|
| 129 |
+
"token": self.token,
|
| 130 |
+
"team_id": team_id,
|
| 131 |
+
"submission_id": submission_id,
|
| 132 |
+
"submission_id_col": self.submission_id_col,
|
| 133 |
+
"submission_cols": self.submission_cols,
|
| 134 |
+
"submission_rows": self.submission_rows,
|
| 135 |
+
"output_path": self.output_path,
|
| 136 |
+
"submission_repo": submission_repo,
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
api.add_space_secret(repo_id=repo_id, key="PARAMS", value=json.dumps(params))
|
| 140 |
+
|
| 141 |
+
readme = self._create_readme(project_name)
|
| 142 |
+
api.upload_file(
|
| 143 |
+
path_or_fileobj=readme,
|
| 144 |
+
path_in_repo="README.md",
|
| 145 |
+
repo_id=repo_id,
|
| 146 |
+
repo_type="space",
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
_dockerfile = io.BytesIO(_DOCKERFILE.encode())
|
| 150 |
+
api.upload_file(
|
| 151 |
+
path_or_fileobj=_dockerfile,
|
| 152 |
+
path_in_repo="Dockerfile",
|
| 153 |
+
repo_id=repo_id,
|
| 154 |
+
repo_type="space",
|
| 155 |
+
)
|
| 156 |
+
return repo_id
|
| 157 |
+
|
| 158 |
def run(self):
|
| 159 |
while True:
|
| 160 |
pending_submissions = self.get_pending_subs()
|
|
|
|
| 163 |
continue
|
| 164 |
if self.competition_type == "generic":
|
| 165 |
self.run_local(pending_submissions)
|
| 166 |
+
elif self.competition_type == "code":
|
| 167 |
+
for _, row in pending_submissions.iterrows():
|
| 168 |
+
team_id = row["team_id"]
|
| 169 |
+
submission_id = row["submission_id"]
|
| 170 |
+
submission_repo = row["submission_repo"]
|
| 171 |
+
self.create_space(team_id, submission_id, submission_repo)
|
| 172 |
time.sleep(5)
|
competitions/submissions.py
CHANGED
|
@@ -5,7 +5,7 @@ from dataclasses import dataclass
|
|
| 5 |
from datetime import datetime
|
| 6 |
|
| 7 |
import pandas as pd
|
| 8 |
-
from huggingface_hub import HfApi, hf_hub_download
|
| 9 |
from huggingface_hub.utils._errors import EntryNotFoundError
|
| 10 |
from loguru import logger
|
| 11 |
|
|
@@ -16,6 +16,7 @@ from .utils import user_authentication
|
|
| 16 |
@dataclass
|
| 17 |
class Submissions:
|
| 18 |
competition_id: str
|
|
|
|
| 19 |
submission_limit: str
|
| 20 |
end_date: datetime
|
| 21 |
token: str
|
|
@@ -42,38 +43,35 @@ class Submissions:
|
|
| 42 |
def _verify_submission(self, bytes_data):
|
| 43 |
return True
|
| 44 |
|
| 45 |
-
def
|
| 46 |
api = HfApi(token=self.token)
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
|
| 54 |
-
user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
|
| 55 |
|
| 56 |
api.upload_file(
|
| 57 |
-
path_or_fileobj=
|
| 58 |
-
path_in_repo=f"submission_info/{
|
| 59 |
repo_id=self.competition_id,
|
| 60 |
repo_type="dataset",
|
| 61 |
)
|
| 62 |
|
| 63 |
-
def
|
| 64 |
-
user_id = user_info["id"]
|
| 65 |
try:
|
| 66 |
-
|
| 67 |
repo_id=self.competition_id,
|
| 68 |
-
filename=f"submission_info/{
|
| 69 |
token=self.token,
|
| 70 |
repo_type="dataset",
|
| 71 |
)
|
| 72 |
except EntryNotFoundError:
|
| 73 |
-
self.
|
| 74 |
-
|
| 75 |
repo_id=self.competition_id,
|
| 76 |
-
filename=f"submission_info/{
|
| 77 |
token=self.token,
|
| 78 |
repo_type="dataset",
|
| 79 |
)
|
|
@@ -81,36 +79,37 @@ class Submissions:
|
|
| 81 |
logger.error(e)
|
| 82 |
raise Exception("Hugging Face Hub is unreachable, please try again later.")
|
| 83 |
|
| 84 |
-
with open(
|
| 85 |
-
|
| 86 |
|
| 87 |
todays_date = datetime.now().strftime("%Y-%m-%d")
|
| 88 |
-
if len(
|
| 89 |
-
|
| 90 |
|
| 91 |
# count the number of times user has submitted today
|
| 92 |
todays_submissions = 0
|
| 93 |
-
for sub in
|
| 94 |
-
|
|
|
|
|
|
|
| 95 |
todays_submissions += 1
|
| 96 |
if todays_submissions >= self.submission_limit:
|
| 97 |
return False
|
| 98 |
return True
|
| 99 |
|
| 100 |
-
def _submissions_today(self,
|
| 101 |
-
user_id = user_info["id"]
|
| 102 |
try:
|
| 103 |
-
|
| 104 |
repo_id=self.competition_id,
|
| 105 |
-
filename=f"submission_info/{
|
| 106 |
token=self.token,
|
| 107 |
repo_type="dataset",
|
| 108 |
)
|
| 109 |
except EntryNotFoundError:
|
| 110 |
-
self.
|
| 111 |
-
|
| 112 |
repo_id=self.competition_id,
|
| 113 |
-
filename=f"submission_info/{
|
| 114 |
token=self.token,
|
| 115 |
repo_type="dataset",
|
| 116 |
)
|
|
@@ -118,39 +117,43 @@ class Submissions:
|
|
| 118 |
logger.error(e)
|
| 119 |
raise Exception("Hugging Face Hub is unreachable, please try again later.")
|
| 120 |
|
| 121 |
-
with open(
|
| 122 |
-
|
| 123 |
|
| 124 |
todays_date = datetime.now().strftime("%Y-%m-%d")
|
| 125 |
-
if len(
|
| 126 |
-
|
| 127 |
|
| 128 |
# count the number of times user has submitted today
|
| 129 |
todays_submissions = 0
|
| 130 |
-
for sub in
|
| 131 |
-
|
|
|
|
|
|
|
| 132 |
todays_submissions += 1
|
| 133 |
return todays_submissions
|
| 134 |
|
| 135 |
-
def _increment_submissions(self, user_id, submission_id, submission_comment):
|
| 136 |
-
|
|
|
|
|
|
|
| 137 |
repo_id=self.competition_id,
|
| 138 |
-
filename=f"submission_info/{
|
| 139 |
token=self.token,
|
| 140 |
repo_type="dataset",
|
| 141 |
)
|
| 142 |
-
with open(
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
current_time = datetime.now().strftime("%H:%M:%S")
|
| 146 |
|
| 147 |
# here goes all the default stuff for submission
|
| 148 |
-
|
| 149 |
{
|
| 150 |
-
"
|
| 151 |
-
"time": current_time,
|
| 152 |
"submission_id": submission_id,
|
| 153 |
"submission_comment": submission_comment,
|
|
|
|
|
|
|
| 154 |
"status": "pending",
|
| 155 |
"selected": False,
|
| 156 |
"public_score": -1,
|
|
@@ -159,33 +162,35 @@ class Submissions:
|
|
| 159 |
)
|
| 160 |
# count the number of times user has submitted today
|
| 161 |
todays_submissions = 0
|
| 162 |
-
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
| 164 |
todays_submissions += 1
|
| 165 |
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
|
| 170 |
api = HfApi(token=self.token)
|
| 171 |
api.upload_file(
|
| 172 |
-
path_or_fileobj=
|
| 173 |
-
path_in_repo=f"submission_info/{
|
| 174 |
repo_id=self.competition_id,
|
| 175 |
repo_type="dataset",
|
| 176 |
)
|
| 177 |
return todays_submissions
|
| 178 |
|
| 179 |
-
def
|
| 180 |
-
|
| 181 |
repo_id=self.competition_id,
|
| 182 |
-
filename=f"submission_info/{
|
| 183 |
token=self.token,
|
| 184 |
repo_type="dataset",
|
| 185 |
)
|
| 186 |
-
with open(
|
| 187 |
-
|
| 188 |
-
return
|
| 189 |
|
| 190 |
def update_selected_submissions(self, user_token, selected_submission_ids):
|
| 191 |
current_datetime = datetime.now()
|
|
@@ -194,44 +199,44 @@ class Submissions:
|
|
| 194 |
|
| 195 |
user_info = self._get_user_info(user_token)
|
| 196 |
user_id = user_info["id"]
|
|
|
|
| 197 |
|
| 198 |
-
|
| 199 |
repo_id=self.competition_id,
|
| 200 |
-
filename=f"submission_info/{
|
| 201 |
token=self.token,
|
| 202 |
repo_type="dataset",
|
| 203 |
)
|
| 204 |
-
with open(
|
| 205 |
-
|
| 206 |
|
| 207 |
-
for sub in
|
| 208 |
if sub["submission_id"] in selected_submission_ids:
|
| 209 |
sub["selected"] = True
|
| 210 |
else:
|
| 211 |
sub["selected"] = False
|
| 212 |
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
|
| 217 |
api = HfApi(token=self.token)
|
| 218 |
api.upload_file(
|
| 219 |
-
path_or_fileobj=
|
| 220 |
-
path_in_repo=f"submission_info/{
|
| 221 |
repo_id=self.competition_id,
|
| 222 |
repo_type="dataset",
|
| 223 |
)
|
| 224 |
|
| 225 |
-
def
|
| 226 |
-
# get user submissions
|
| 227 |
user_id = user_info["id"]
|
|
|
|
| 228 |
try:
|
| 229 |
-
|
| 230 |
except EntryNotFoundError:
|
| 231 |
logger.warning("No submissions found for user")
|
| 232 |
return pd.DataFrame(), pd.DataFrame()
|
| 233 |
|
| 234 |
-
submissions_df = pd.DataFrame(
|
| 235 |
|
| 236 |
if not private:
|
| 237 |
submissions_df = submissions_df.drop(columns=["private_score"])
|
|
@@ -314,47 +319,95 @@ class Submissions:
|
|
| 314 |
private = False
|
| 315 |
if current_date_time >= self.end_date:
|
| 316 |
private = True
|
| 317 |
-
success_subs, failed_subs = self.
|
| 318 |
return success_subs, failed_subs
|
| 319 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
def new_submission(self, user_token, uploaded_file, submission_comment):
|
| 321 |
# verify token
|
| 322 |
user_info = self._get_user_info(user_token)
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
-
# check if
|
| 325 |
-
if self.
|
| 326 |
raise SubmissionLimitError("Submission limit reached")
|
| 327 |
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
-
# verify file is valid
|
| 332 |
-
if not self._verify_submission(bytes_data):
|
| 333 |
-
raise SubmissionError("Invalid submission file")
|
| 334 |
-
else:
|
| 335 |
-
user_id = user_info["id"]
|
| 336 |
-
submission_id = str(uuid.uuid4())
|
| 337 |
file_extension = uploaded_file.filename.split(".")[-1]
|
| 338 |
# upload file to hf hub
|
| 339 |
api = HfApi(token=self.token)
|
| 340 |
api.upload_file(
|
| 341 |
path_or_fileobj=bytes_data,
|
| 342 |
-
path_in_repo=f"submissions/{
|
| 343 |
repo_id=self.competition_id,
|
| 344 |
repo_type="dataset",
|
| 345 |
)
|
| 346 |
-
# update submission limit
|
| 347 |
submissions_made = self._increment_submissions(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
user_id=user_id,
|
| 349 |
submission_id=submission_id,
|
| 350 |
-
submission_comment=
|
|
|
|
| 351 |
)
|
| 352 |
-
# TODO: schedule submission for evaluation
|
| 353 |
-
# self._create_autotrain_project(
|
| 354 |
-
# submission_id=f"{submission_id}",
|
| 355 |
-
# competition_id=f"{self.competition_id}",
|
| 356 |
-
# user_id=user_id,
|
| 357 |
-
# competition_type="generic",
|
| 358 |
-
# )
|
| 359 |
remaining_submissions = self.submission_limit - submissions_made
|
| 360 |
return remaining_submissions
|
|
|
|
| 5 |
from datetime import datetime
|
| 6 |
|
| 7 |
import pandas as pd
|
| 8 |
+
from huggingface_hub import HfApi, hf_hub_download, snapshot_download
|
| 9 |
from huggingface_hub.utils._errors import EntryNotFoundError
|
| 10 |
from loguru import logger
|
| 11 |
|
|
|
|
| 16 |
@dataclass
|
| 17 |
class Submissions:
|
| 18 |
competition_id: str
|
| 19 |
+
competition_type: str
|
| 20 |
submission_limit: str
|
| 21 |
end_date: datetime
|
| 22 |
token: str
|
|
|
|
| 43 |
def _verify_submission(self, bytes_data):
|
| 44 |
return True
|
| 45 |
|
| 46 |
+
def _add_new_team(self, team_id):
|
| 47 |
api = HfApi(token=self.token)
|
| 48 |
+
team_submission_info = {}
|
| 49 |
+
team_submission_info["id"] = team_id
|
| 50 |
+
team_submission_info["submissions"] = []
|
| 51 |
+
team_submission_info_json = json.dumps(team_submission_info, indent=4)
|
| 52 |
+
team_submission_info_json_bytes = team_submission_info_json.encode("utf-8")
|
| 53 |
+
team_submission_info_json_buffer = io.BytesIO(team_submission_info_json_bytes)
|
|
|
|
|
|
|
| 54 |
|
| 55 |
api.upload_file(
|
| 56 |
+
path_or_fileobj=team_submission_info_json_buffer,
|
| 57 |
+
path_in_repo=f"submission_info/{team_id}.json",
|
| 58 |
repo_id=self.competition_id,
|
| 59 |
repo_type="dataset",
|
| 60 |
)
|
| 61 |
|
| 62 |
+
def _check_team_submission_limit(self, team_id):
|
|
|
|
| 63 |
try:
|
| 64 |
+
team_fname = hf_hub_download(
|
| 65 |
repo_id=self.competition_id,
|
| 66 |
+
filename=f"submission_info/{team_id}.json",
|
| 67 |
token=self.token,
|
| 68 |
repo_type="dataset",
|
| 69 |
)
|
| 70 |
except EntryNotFoundError:
|
| 71 |
+
self._add_new_team(team_id)
|
| 72 |
+
team_fname = hf_hub_download(
|
| 73 |
repo_id=self.competition_id,
|
| 74 |
+
filename=f"submission_info/{team_id}.json",
|
| 75 |
token=self.token,
|
| 76 |
repo_type="dataset",
|
| 77 |
)
|
|
|
|
| 79 |
logger.error(e)
|
| 80 |
raise Exception("Hugging Face Hub is unreachable, please try again later.")
|
| 81 |
|
| 82 |
+
with open(team_fname, "r", encoding="utf-8") as f:
|
| 83 |
+
team_submission_info = json.load(f)
|
| 84 |
|
| 85 |
todays_date = datetime.now().strftime("%Y-%m-%d")
|
| 86 |
+
if len(team_submission_info["submissions"]) == 0:
|
| 87 |
+
team_submission_info["submissions"] = []
|
| 88 |
|
| 89 |
# count the number of times user has submitted today
|
| 90 |
todays_submissions = 0
|
| 91 |
+
for sub in team_submission_info["submissions"]:
|
| 92 |
+
submission_datetime = sub["datetime"]
|
| 93 |
+
submission_date = submission_datetime.split(" ")[0]
|
| 94 |
+
if submission_date == todays_date:
|
| 95 |
todays_submissions += 1
|
| 96 |
if todays_submissions >= self.submission_limit:
|
| 97 |
return False
|
| 98 |
return True
|
| 99 |
|
| 100 |
+
def _submissions_today(self, team_id):
|
|
|
|
| 101 |
try:
|
| 102 |
+
team_fname = hf_hub_download(
|
| 103 |
repo_id=self.competition_id,
|
| 104 |
+
filename=f"submission_info/{team_id}.json",
|
| 105 |
token=self.token,
|
| 106 |
repo_type="dataset",
|
| 107 |
)
|
| 108 |
except EntryNotFoundError:
|
| 109 |
+
self._add_new_team(team_id)
|
| 110 |
+
team_fname = hf_hub_download(
|
| 111 |
repo_id=self.competition_id,
|
| 112 |
+
filename=f"submission_info/{team_id}.json",
|
| 113 |
token=self.token,
|
| 114 |
repo_type="dataset",
|
| 115 |
)
|
|
|
|
| 117 |
logger.error(e)
|
| 118 |
raise Exception("Hugging Face Hub is unreachable, please try again later.")
|
| 119 |
|
| 120 |
+
with open(team_fname, "r", encoding="utf-8") as f:
|
| 121 |
+
team_submission_info = json.load(f)
|
| 122 |
|
| 123 |
todays_date = datetime.now().strftime("%Y-%m-%d")
|
| 124 |
+
if len(team_submission_info["submissions"]) == 0:
|
| 125 |
+
team_submission_info["submissions"] = []
|
| 126 |
|
| 127 |
# count the number of times user has submitted today
|
| 128 |
todays_submissions = 0
|
| 129 |
+
for sub in team_submission_info["submissions"]:
|
| 130 |
+
submission_datetime = sub["datetime"]
|
| 131 |
+
submission_date = submission_datetime.split(" ")[0]
|
| 132 |
+
if submission_date == todays_date:
|
| 133 |
todays_submissions += 1
|
| 134 |
return todays_submissions
|
| 135 |
|
| 136 |
+
def _increment_submissions(self, team_id, user_id, submission_id, submission_comment, submission_repo=None):
|
| 137 |
+
if submission_repo is None:
|
| 138 |
+
submission_repo = ""
|
| 139 |
+
team_fname = hf_hub_download(
|
| 140 |
repo_id=self.competition_id,
|
| 141 |
+
filename=f"submission_info/{team_id}.json",
|
| 142 |
token=self.token,
|
| 143 |
repo_type="dataset",
|
| 144 |
)
|
| 145 |
+
with open(team_fname, "r", encoding="utf-8") as f:
|
| 146 |
+
team_submission_info = json.load(f)
|
| 147 |
+
datetime_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
| 148 |
|
| 149 |
# here goes all the default stuff for submission
|
| 150 |
+
team_submission_info["submissions"].append(
|
| 151 |
{
|
| 152 |
+
"datetime": datetime_now,
|
|
|
|
| 153 |
"submission_id": submission_id,
|
| 154 |
"submission_comment": submission_comment,
|
| 155 |
+
"submission_repo": submission_repo,
|
| 156 |
+
"submitted_by": user_id,
|
| 157 |
"status": "pending",
|
| 158 |
"selected": False,
|
| 159 |
"public_score": -1,
|
|
|
|
| 162 |
)
|
| 163 |
# count the number of times user has submitted today
|
| 164 |
todays_submissions = 0
|
| 165 |
+
todays_date = datetime.now().strftime("%Y-%m-%d")
|
| 166 |
+
for sub in team_submission_info["submissions"]:
|
| 167 |
+
submission_datetime = sub["datetime"]
|
| 168 |
+
submission_date = submission_datetime.split(" ")[0]
|
| 169 |
+
if submission_date == todays_date:
|
| 170 |
todays_submissions += 1
|
| 171 |
|
| 172 |
+
team_submission_info_json = json.dumps(team_submission_info, indent=4)
|
| 173 |
+
team_submission_info_json_bytes = team_submission_info_json.encode("utf-8")
|
| 174 |
+
team_submission_info_json_buffer = io.BytesIO(team_submission_info_json_bytes)
|
|
|
|
| 175 |
api = HfApi(token=self.token)
|
| 176 |
api.upload_file(
|
| 177 |
+
path_or_fileobj=team_submission_info_json_buffer,
|
| 178 |
+
path_in_repo=f"submission_info/{team_id}.json",
|
| 179 |
repo_id=self.competition_id,
|
| 180 |
repo_type="dataset",
|
| 181 |
)
|
| 182 |
return todays_submissions
|
| 183 |
|
| 184 |
+
def _download_team_subs(self, team_id):
|
| 185 |
+
team_fname = hf_hub_download(
|
| 186 |
repo_id=self.competition_id,
|
| 187 |
+
filename=f"submission_info/{team_id}.json",
|
| 188 |
token=self.token,
|
| 189 |
repo_type="dataset",
|
| 190 |
)
|
| 191 |
+
with open(team_fname, "r", encoding="utf-8") as f:
|
| 192 |
+
team_submission_info = json.load(f)
|
| 193 |
+
return team_submission_info["submissions"]
|
| 194 |
|
| 195 |
def update_selected_submissions(self, user_token, selected_submission_ids):
|
| 196 |
current_datetime = datetime.now()
|
|
|
|
| 199 |
|
| 200 |
user_info = self._get_user_info(user_token)
|
| 201 |
user_id = user_info["id"]
|
| 202 |
+
team_id = self._get_team_id(user_id)
|
| 203 |
|
| 204 |
+
team_fname = hf_hub_download(
|
| 205 |
repo_id=self.competition_id,
|
| 206 |
+
filename=f"submission_info/{team_id}.json",
|
| 207 |
token=self.token,
|
| 208 |
repo_type="dataset",
|
| 209 |
)
|
| 210 |
+
with open(team_fname, "r", encoding="utf-8") as f:
|
| 211 |
+
team_submission_info = json.load(f)
|
| 212 |
|
| 213 |
+
for sub in team_submission_info["submissions"]:
|
| 214 |
if sub["submission_id"] in selected_submission_ids:
|
| 215 |
sub["selected"] = True
|
| 216 |
else:
|
| 217 |
sub["selected"] = False
|
| 218 |
|
| 219 |
+
team_submission_info_json = json.dumps(team_submission_info, indent=4)
|
| 220 |
+
team_submission_info_json_bytes = team_submission_info_json.encode("utf-8")
|
| 221 |
+
team_submission_info_json_buffer = io.BytesIO(team_submission_info_json_bytes)
|
|
|
|
| 222 |
api = HfApi(token=self.token)
|
| 223 |
api.upload_file(
|
| 224 |
+
path_or_fileobj=team_submission_info_json_buffer,
|
| 225 |
+
path_in_repo=f"submission_info/{team_id}.json",
|
| 226 |
repo_id=self.competition_id,
|
| 227 |
repo_type="dataset",
|
| 228 |
)
|
| 229 |
|
| 230 |
+
def _get_team_subs(self, user_info, private=False):
|
|
|
|
| 231 |
user_id = user_info["id"]
|
| 232 |
+
team_id = self._get_team_id(user_id)
|
| 233 |
try:
|
| 234 |
+
team_submissions = self._download_team_subs(team_id)
|
| 235 |
except EntryNotFoundError:
|
| 236 |
logger.warning("No submissions found for user")
|
| 237 |
return pd.DataFrame(), pd.DataFrame()
|
| 238 |
|
| 239 |
+
submissions_df = pd.DataFrame(team_submissions)
|
| 240 |
|
| 241 |
if not private:
|
| 242 |
submissions_df = submissions_df.drop(columns=["private_score"])
|
|
|
|
| 319 |
private = False
|
| 320 |
if current_date_time >= self.end_date:
|
| 321 |
private = True
|
| 322 |
+
success_subs, failed_subs = self._get_team_subs(user_info, private=private)
|
| 323 |
return success_subs, failed_subs
|
| 324 |
|
| 325 |
+
def _get_team_id(self, user_id):
|
| 326 |
+
user_team = hf_hub_download(
|
| 327 |
+
repo_id=self.competition_id,
|
| 328 |
+
filename="user_team.json",
|
| 329 |
+
token=self.token,
|
| 330 |
+
repo_type="dataset",
|
| 331 |
+
)
|
| 332 |
+
with open(user_team, "r", encoding="utf-8") as f:
|
| 333 |
+
user_team = json.load(f)
|
| 334 |
+
|
| 335 |
+
if user_id in user_team:
|
| 336 |
+
return user_team[user_id]
|
| 337 |
+
|
| 338 |
+
# create a new team, if user is not in any team
|
| 339 |
+
team_id = str(uuid.uuid4())
|
| 340 |
+
user_team[user_id] = team_id
|
| 341 |
+
user_team_json = json.dumps(user_team, indent=4)
|
| 342 |
+
user_team_json_bytes = user_team_json.encode("utf-8")
|
| 343 |
+
user_team_json_buffer = io.BytesIO(user_team_json_bytes)
|
| 344 |
+
api = HfApi(token=self.token)
|
| 345 |
+
api.upload_file(
|
| 346 |
+
path_or_fileobj=user_team_json_buffer,
|
| 347 |
+
path_in_repo="user_team.json",
|
| 348 |
+
repo_id=self.competition_id,
|
| 349 |
+
repo_type="dataset",
|
| 350 |
+
)
|
| 351 |
+
return team_id
|
| 352 |
+
|
| 353 |
def new_submission(self, user_token, uploaded_file, submission_comment):
|
| 354 |
# verify token
|
| 355 |
user_info = self._get_user_info(user_token)
|
| 356 |
+
submission_id = str(uuid.uuid4())
|
| 357 |
+
user_id = user_info["id"]
|
| 358 |
+
team_id = self._get_team_id(user_id)
|
| 359 |
|
| 360 |
+
# check if team can submit to the competition
|
| 361 |
+
if self._check_team_submission_limit(team_id) is False:
|
| 362 |
raise SubmissionLimitError("Submission limit reached")
|
| 363 |
|
| 364 |
+
if self.competition_type == "generic":
|
| 365 |
+
bytes_data = uploaded_file.file.read()
|
| 366 |
+
# verify file is valid
|
| 367 |
+
if not self._verify_submission(bytes_data):
|
| 368 |
+
raise SubmissionError("Invalid submission file")
|
| 369 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
file_extension = uploaded_file.filename.split(".")[-1]
|
| 371 |
# upload file to hf hub
|
| 372 |
api = HfApi(token=self.token)
|
| 373 |
api.upload_file(
|
| 374 |
path_or_fileobj=bytes_data,
|
| 375 |
+
path_in_repo=f"submissions/{team_id}-{submission_id}.{file_extension}",
|
| 376 |
repo_id=self.competition_id,
|
| 377 |
repo_type="dataset",
|
| 378 |
)
|
|
|
|
| 379 |
submissions_made = self._increment_submissions(
|
| 380 |
+
team_id=team_id,
|
| 381 |
+
user_id=user_id,
|
| 382 |
+
submission_id=submission_id,
|
| 383 |
+
submission_comment=submission_comment,
|
| 384 |
+
submission_repo="",
|
| 385 |
+
)
|
| 386 |
+
else:
|
| 387 |
+
submission_repo = snapshot_download(
|
| 388 |
+
repo_id=uploaded_file,
|
| 389 |
+
local_dir=submission_id,
|
| 390 |
+
token=user_token,
|
| 391 |
+
repo_type="model",
|
| 392 |
+
)
|
| 393 |
+
api = HfApi(token=self.token)
|
| 394 |
+
competition_user = self.competition_id.split("/")[0]
|
| 395 |
+
api.create_repo(
|
| 396 |
+
repo_id=f"{competition_user}/{submission_id}",
|
| 397 |
+
repo_type="model",
|
| 398 |
+
private=True,
|
| 399 |
+
)
|
| 400 |
+
api.upload_folder(
|
| 401 |
+
folder_path=submission_repo,
|
| 402 |
+
repo_id=f"{competition_user}/{submission_id}",
|
| 403 |
+
repo_type="model",
|
| 404 |
+
)
|
| 405 |
+
submissions_made = self._increment_submissions(
|
| 406 |
+
team_id=team_id,
|
| 407 |
user_id=user_id,
|
| 408 |
submission_id=submission_id,
|
| 409 |
+
submission_comment=submission_comment,
|
| 410 |
+
submission_repo=uploaded_file,
|
| 411 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
remaining_submissions = self.submission_limit - submissions_made
|
| 413 |
return remaining_submissions
|
competitions/utils.py
CHANGED
|
@@ -73,7 +73,7 @@ def pause_space(params):
|
|
| 73 |
def download_submission_info(params):
|
| 74 |
user_fname = hf_hub_download(
|
| 75 |
repo_id=params.competition_id,
|
| 76 |
-
filename=f"submission_info/{params.
|
| 77 |
token=params.token,
|
| 78 |
repo_type="dataset",
|
| 79 |
)
|
|
@@ -90,7 +90,7 @@ def upload_submission_info(params, user_submission_info):
|
|
| 90 |
api = HfApi(token=params.token)
|
| 91 |
api.upload_file(
|
| 92 |
path_or_fileobj=user_submission_info_json_buffer,
|
| 93 |
-
path_in_repo=f"submission_info/{params.
|
| 94 |
repo_id=params.competition_id,
|
| 95 |
repo_type="dataset",
|
| 96 |
)
|
|
|
|
| 73 |
def download_submission_info(params):
|
| 74 |
user_fname = hf_hub_download(
|
| 75 |
repo_id=params.competition_id,
|
| 76 |
+
filename=f"submission_info/{params.team_id}.json",
|
| 77 |
token=params.token,
|
| 78 |
repo_type="dataset",
|
| 79 |
)
|
|
|
|
| 90 |
api = HfApi(token=params.token)
|
| 91 |
api.upload_file(
|
| 92 |
path_or_fileobj=user_submission_info_json_buffer,
|
| 93 |
+
path_in_repo=f"submission_info/{params.team_id}.json",
|
| 94 |
repo_id=params.competition_id,
|
| 95 |
repo_type="dataset",
|
| 96 |
)
|