mila-robot-learning-course / create_datasets.py
Neo-X
Adding column
7899f4b
import pandas as pd
from huggingface_hub import HfApi, login
import os
# --- CONFIGURATION ---
HF_USERNAME = "gberseth" # <--- REPLACE THIS
HF_TOKEN = os.environ.get("HF_TOKEN")
# Define Repo IDs
REQUESTS_REPO = f"{HF_USERNAME}/rl-leaderboard-requests"
RESULTS_REPO = f"{HF_USERNAME}/rl-leaderboard-results"
# Authenticate
# login(token=HF_TOKEN)
api = HfApi()
def create_dataset(repo_id, csv_filename, columns):
print(f"Processing {repo_id}...")
# 1. Create the Repository on Hub (if it doesn't exist)
try:
api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
print(f"βœ… Repo '{repo_id}' created (or already exists).")
except Exception as e:
print(f"❌ Error creating repo: {e}")
return
# 2. Create a local empty CSV with headers
# We create a dummy row to ensure columns are recognized, then drop it if preferred,
# but an empty dataframe with columns is standard.
df = pd.DataFrame(columns=columns)
df.to_csv(csv_filename, index=False)
# 3. Upload the CSV
try:
api.upload_file(
path_or_fileobj=csv_filename,
path_in_repo=csv_filename, # e.g. "requests.csv"
repo_id=repo_id,
repo_type="dataset"
)
print(f"βœ… Initial '{csv_filename}' uploaded to {repo_id}.")
except Exception as e:
print(f"❌ Error uploading file: {e}")
# Cleanup local file
if os.path.exists(csv_filename):
os.remove(csv_filename)
# --- EXECUTION ---
# 1. Setup Requests Dataset
# Columns used by Frontend and Backend to track queue
create_dataset(
repo_id=REQUESTS_REPO,
csv_filename="requests.csv",
columns=["model_id", "status", "created_at", "submitted_by"]
)
# 2. Setup Results Dataset
# Columns used by Backend to store scores
create_dataset(
repo_id=RESULTS_REPO,
csv_filename="results.csv",
columns=["model_id", "mean_reward", "run_time", "status", "completed_at"]
)
print("\nπŸŽ‰ Setup complete! You can now link these datasets in your app.py files.")