File size: 6,246 Bytes
141f1e0
 
 
38308df
141f1e0
 
 
 
 
 
38308df
141f1e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcb04e7
141f1e0
 
38308df
 
 
 
 
 
 
 
 
 
 
 
 
 
6a8a748
 
 
a2556f7
 
6a8a748
a2556f7
38308df
 
 
 
 
 
141f1e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38308df
 
 
 
141f1e0
 
38308df
 
141f1e0
 
 
 
 
 
 
 
bc714de
 
 
 
 
 
30f0c04
141f1e0
 
 
bc714de
141f1e0
dcb04e7
141f1e0
 
dcb04e7
141f1e0
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import json
import os
import uuid
from datetime import datetime, timedelta, timezone

from src.envs import API, SUBMISSIONS_PATH, SUBMISSIONS_REPO
from src.evaluation.compute_metrics import compute_metrics
from src.evaluation.load_labels import load_true_labels
from src.submission.validate_csv import validate_csv
from src.teams.auth import validate_token
from src.teams.storage import get_team_by_name, hash_token, update_last_valid_submission


def get_team_best_scores(team_name: str) -> dict | None:
    results_file = os.path.join(SUBMISSIONS_PATH, "results", f"{team_name}.json")
    if os.path.exists(results_file):
        try:
            with open(results_file, "r") as f:
                return json.load(f)
        except Exception:
            pass
    return None


def save_team_best_scores(team_name: str, scores: dict):
    results_dir = os.path.join(SUBMISSIONS_PATH, "results")
    os.makedirs(results_dir, exist_ok=True)
    results_file = os.path.join(results_dir, f"{team_name}.json")

    with open(results_file, "w") as f:
        json.dump(scores, f)

    try:
        API.upload_file(
            path_or_fileobj=results_file,
            path_in_repo=f"results/{team_name}.json",
            repo_id=SUBMISSIONS_REPO,
            repo_type="dataset",
            commit_message=f"Update scores for team: {team_name}",
        )
    except Exception as e:
        print(f"Warning: Could not upload results to hub: {e}")


def save_submission(team_name: str, token_hash: str, csv_content: str, scores: dict, status: str):
    os.makedirs(SUBMISSIONS_PATH, exist_ok=True)
    submission_id = str(uuid.uuid4())
    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

    submission_data = {
        "submission_id": submission_id,
        "team_name": team_name,
        "token_hash": token_hash,
        "timestamp": timestamp,
        "scores": scores,
        "status": status,
    }

    submission_file = os.path.join(SUBMISSIONS_PATH, f"{submission_id}.json")
    with open(submission_file, "w") as f:
        json.dump(submission_data, f)

    csv_file = os.path.join(SUBMISSIONS_PATH, f"{submission_id}.csv")
    with open(csv_file, "w") as f:
        f.write(csv_content)

    try:
        API.upload_file(
            path_or_fileobj=submission_file,
            path_in_repo=f"{submission_id}.json",
            repo_id=SUBMISSIONS_REPO,
            repo_type="dataset",
            commit_message=f"Submission from {team_name}",
        )
        API.upload_file(
            path_or_fileobj=csv_file,
            path_in_repo=f"{submission_id}.csv",
            repo_id=SUBMISSIONS_REPO,
            repo_type="dataset",
            commit_message=f"CSV for submission {submission_id}",
        )
    except Exception as e:
        print(f"Warning: Could not upload submission to hub: {e}")


def should_update_scores(new_scores: dict, best_scores: dict | None) -> bool:
    if best_scores is None:
        return True

    new_f1 = new_scores.get("f1", 0.0)
    best_f1 = best_scores.get("best_f1", 0.0)

    return new_f1 > best_f1


def check_rate_limit(team_name: str) -> tuple[bool, str]:
    team_data = get_team_by_name(team_name)
    if not team_data:
        return True, ""

    last_submission = team_data.get("last_valid_submission")
    if not last_submission:
        return True, ""

    try:
        last_time = datetime.strptime(last_submission, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
        now = datetime.now(timezone.utc)
        time_diff = now - last_time

        if time_diff < timedelta(seconds=2):
            remaining_seconds = (timedelta(seconds=2) - time_diff).total_seconds()
            remaining_secs = int(remaining_seconds)
            return (
                False,
                f"Rate limit exceeded. Please wait {remaining_secs} seconds before submitting again.",
            )

        return True, ""
    except Exception:
        return True, ""


def submit_csv(token: str, csv_content: str) -> tuple[bool, str]:
    team = validate_token(token)
    if not team:
        return False, "Invalid token. Please check your team token."

    team_name = team["team_name"]
    token_hash = hash_token(token)

    true_labels = load_true_labels()
    if not true_labels:
        return False, "Error: True labels not available. Please contact administrators."

    is_valid, error_msg, predictions_df = validate_csv(csv_content, true_labels)
    if not is_valid:
        return False, f"CSV validation failed: {error_msg}"

    can_submit, rate_limit_msg = check_rate_limit(team_name)
    if not can_submit:
        return False, rate_limit_msg

    scores = compute_metrics(predictions_df, true_labels)

    update_last_valid_submission(team_name)

    best_scores = get_team_best_scores(team_name)

    if should_update_scores(scores, best_scores):
        timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
        updated_scores = {
            "team_name": team_name,
            "best_accuracy": scores["accuracy"],
            "best_f1": scores["f1"],
            "best_precision": scores["precision"],
            "best_recall": scores["recall"],
            "best_tp": scores["tp"],
            "best_fp": scores["fp"],
            "best_fn": scores["fn"],
            "best_tn": scores["tn"],
            "best_submission_date": timestamp,
        }
        save_team_best_scores(team_name, updated_scores)
        status = "ACCEPTED"
        message = f"Submission accepted! Your scores: Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}, Precision={scores['precision']:.4f}, Recall={scores['recall']:.4f}, TP={scores['tp']}, FP={scores['fp']}, FN={scores['fn']}, TN={scores['tn']}"
    else:
        status = "ACCEPTED, BUT WORST"
        best_acc = best_scores.get("best_accuracy", 0.0) if best_scores else 0.0
        best_f1 = best_scores.get("best_f1", 0.0) if best_scores else 0.0
        message = f"Submission accepted but did not improve your best score. Your scores (Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}) vs. your best scores (Accuracy={best_acc:.4f}, F1={best_f1:.4f})."

    save_submission(team_name, token_hash, csv_content, scores, status)

    return True, message