| import os |
| import json |
| import argparse |
|
|
| from datasets import Dataset |
| from dotenv import load_dotenv |
| load_dotenv() |
|
|
| DATASET_NAME = "bkhmsi/test-realign-hackathon-red-team" |
| TOKEN_ENV = "HF_TOKEN" |
|
|
| def push_to_hf(token: str, stimuli: list[dict[str, str]]) -> None: |
| dataset = Dataset.from_list(stimuli) |
| print(f"> Pushing {len(dataset)} records to HF dataset {DATASET_NAME}") |
| dataset.push_to_hub(DATASET_NAME, token=token, private=True) |
|
|
| def read_jsonl(path: str) -> list[str]: |
| with open(path, "r") as f: |
| data = [json.loads(line) for line in f] |
| return data |
|
|
| def main() -> None: |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--path", type=str, required=False, default="test_submissions/red_submission.jsonl", help="Path to the JSON file containing stimuli names.") |
| args = parser.parse_args() |
| path = args.path |
|
|
| token = os.environ.get(TOKEN_ENV, "").strip() |
| if not token: |
| raise ValueError("Set HF_TOKEN before running this script.") |
|
|
| stimuli = read_jsonl(path) |
| push_to_hf(token, stimuli) |
|
|
| if __name__ == "__main__": |
| main() |
|
|