File size: 3,859 Bytes
8ef4c38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | """Upload initial data to private HuggingFace repository.
This script uploads:
1. ground_truth.json - Private test set
2. leaderboard.json - Initial leaderboard with MedGRPO model
Run this once during initial setup.
"""
import os
import sys
import json
from pathlib import Path
from huggingface_hub import HfApi
# Configuration
REPO_ID = "UIIAmerica/MedVidBench-GroundTruth"
REPO_TYPE = "dataset"
def create_initial_leaderboard():
"""Create initial leaderboard.json with MedGRPO model data."""
leaderboard_data = [
{
"rank": 1,
"model_name": "Qwen2.5-VL-7B-MedGRPO",
"organization": "UII",
"cvs_acc": 0.914,
"nap_acc": 0.427,
"sa_acc": 0.244,
"stg_miou": 0.202,
"tag_miou_03": 0.216,
"tag_miou_05": 0.156,
"dvc_llm": 3.797,
"dvc_f1": 0.210,
"vs_llm": 4.184,
"rc_llm": 3.442,
"date": "2025-01-14",
"contact": "gaozhongpai@gmail.com"
}
]
leaderboard_file = Path("leaderboard.json")
with open(leaderboard_file, 'w') as f:
json.dump(leaderboard_data, f, indent=2)
print(f"β Created leaderboard.json with 1 entry (Qwen2.5-VL-7B-MedGRPO)")
return leaderboard_file
def main():
"""Upload initial files to private repo."""
# Check token
token = os.environ.get('HF_TOKEN')
if not token:
print("β HF_TOKEN environment variable not set")
print(" Please run: export HF_TOKEN='your_token_here'")
sys.exit(1)
print("=" * 80)
print(f"UPLOADING INITIAL DATA TO {REPO_ID}")
print("=" * 80)
api = HfApi()
# 1. Upload ground truth
print("\n[1/2] Uploading ground_truth.json...")
ground_truth_file = Path("data/ground_truth.json")
if not ground_truth_file.exists():
print(f" β File not found: {ground_truth_file}")
print(f" Skipping ground_truth.json upload...")
else:
file_size = ground_truth_file.stat().st_size / (1024 * 1024) # MB
print(f" File size: {file_size:.2f} MB")
try:
api.upload_file(
path_or_fileobj=str(ground_truth_file),
path_in_repo="ground_truth.json",
repo_id=REPO_ID,
repo_type=REPO_TYPE,
token=token,
commit_message="Upload ground truth data"
)
print(f" β Uploaded ground_truth.json")
except Exception as e:
print(f" β Failed: {e}")
sys.exit(1)
# 2. Upload leaderboard
print("\n[2/2] Uploading leaderboard.json...")
# Create leaderboard with MedGRPO data
leaderboard_file = create_initial_leaderboard()
file_size = leaderboard_file.stat().st_size
print(f" File size: {file_size} bytes")
try:
api.upload_file(
path_or_fileobj=str(leaderboard_file),
path_in_repo="leaderboard.json",
repo_id=REPO_ID,
repo_type=REPO_TYPE,
token=token,
commit_message="Initialize leaderboard with Qwen2.5-VL-7B-MedGRPO"
)
print(f" β Uploaded leaderboard.json")
except Exception as e:
print(f" β Failed: {e}")
sys.exit(1)
print("\n" + "=" * 80)
print("β
UPLOAD COMPLETE")
print("=" * 80)
print(f"\nRepository: https://huggingface.co/datasets/{REPO_ID}")
print("\nUploaded:")
print(" β ground_truth.json (if available)")
print(" β leaderboard.json with Qwen2.5-VL-7B-MedGRPO")
print("\nNext steps:")
print("1. Verify files in repository")
print("2. Add HF_TOKEN secret to HuggingFace Space")
print("3. Deploy app.py to Space")
print("4. Check app logs for successful loading")
if __name__ == "__main__":
main()
|