Spaces:
Running
Running
File size: 5,573 Bytes
50aa233 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import json
import os
from datetime import datetime
import gradio
import pytest
import pyarrow as pa
from agenteval.models import SubmissionMetadata
from datasets import load_dataset, VerificationMode
from huggingface_hub import HfApi, hf_hub_download
from aliases import CANONICAL_TOOL_USAGE_STANDARD, CANONICAL_OPENNESS_CLOSED_UI_ONLY
from config import IS_INTERNAL, CONFIG_NAME, CONTACT_DATASET, SUBMISSION_DATASET
from submission import add_new_eval
_hf = HfApi()
class TestSubmission:
@pytest.fixture(autouse=True)
def setup(self):
# These need to be set before imports are evaluated so all we can do here
# is check that they have been set correctly.
assert IS_INTERNAL == True
assert CONFIG_NAME == "continuous-integration"
def test_add_new_eval(self, mocker):
# Bypass some checks so that the test can cover later parts of the code.
mocker.patch("submission._is_hf_acct_too_new", return_value=False)
mocker.patch("submission._is_last_submission_too_recent", return_value=False)
# We use this to find records corresponding to this test.
agent_description = f"CI run at {datetime.now().isoformat()}"
print(f"Using unique agent description: {agent_description}")
print("Submitting test submission...")
with open(os.path.join(os.path.dirname(__file__), "test-submission.tar.gz"), "rb") as f:
result = add_new_eval(
val_or_test="test",
agent_name="TestSubmissionIntegration",
agent_description=agent_description,
agent_url="https://github.com/allenai/asta-bench-leaderboard/blob/main/tests/integration/test_submission.py",
openness=CANONICAL_OPENNESS_CLOSED_UI_ONLY,
degree_of_control=CANONICAL_TOOL_USAGE_STANDARD,
path_to_file=f,
username="test_user",
role="Other",
email="jasond+asta_testing@allenai.org",
email_opt_in=True,
profile=gradio.OAuthProfile({
"name": "Test User",
"preferred_username": "test_user",
"profile": "test_user_profile",
"picture": "https://placecats.com/150/150",
}),
)
message, error_modal, success_modal, loading_modal = result
assert message == "" # Success
assert error_modal == {'__type__': 'update', 'visible': False}
assert success_modal == {'__type__': 'update', 'visible': True}
assert loading_modal == {'__type__': 'update', 'visible': False}
print("Looking up contact record...")
contacts = load_dataset(path=CONTACT_DATASET,
name=CONFIG_NAME,
download_mode="force_redownload",
verification_mode=VerificationMode.NO_CHECKS)
# There should have been a new entry due to this test with our unique description.
found_contact = next(row for row in contacts['test'] if row['agent_description'] == agent_description)
assert found_contact
# This contains an attribute that should lead us to files in the submissions dataset.
dataset_url = found_contact['dataset_url']
print(f"Found dataset URL: {dataset_url}")
assert dataset_url.startswith(
"hf://datasets/allenai/asta-bench-internal-submissions/continuous-integration/test/")
print("Checking submission dataset...")
# Commit message itself should link this and the contact record together unambiguously.
recent_commits = _hf.list_repo_commits(repo_type="dataset", repo_id=SUBMISSION_DATASET)
assert any(dataset_url in c.title for c in recent_commits)
print("Checking that files are present...")
rel_path = dataset_url[len("hf://datasets/allenai/asta-bench-internal-submissions/"):]
ds_info = _hf.dataset_info(SUBMISSION_DATASET)
# These are the files in our test-submission.tar.gz
assert any(f"{rel_path}/eval_config.json" == f.rfilename for f in ds_info.siblings)
assert any(f"{rel_path}/task_sqa_solver_openscilm.eval" == f.rfilename for f in ds_info.siblings)
# This is the generated metadata put into the dataset itself.
assert any(f"{rel_path}/submission.json" == f.rfilename for f in ds_info.siblings)
print("Checking contact record against submission.json...")
# Checks on contact record which is stored in a private dataset.
local_path = hf_hub_download(repo_type="dataset",
repo_id=SUBMISSION_DATASET,
filename=f"{rel_path}/submission.json")
with open(local_path) as f:
contact_from_json = json.load(f)
# Assert that all keys and values in submission.json are present in the contact record
for key, value_from_json in contact_from_json.items():
value_from_dataset = found_contact[key]
if isinstance(value_from_dataset, datetime):
value_from_dataset = found_contact[key].isoformat().replace('+00:00', 'Z')
assert value_from_dataset == value_from_json
# submission.json should not contain sensitive PII, specifically, email.
assert 'email' in found_contact
assert 'email' not in contact_from_json
# submission.json is defined by a specific data model.
SubmissionMetadata.model_validate(contact_from_json)
|