Spaces:

UIIAmerica
/

MedVidBench-Leaderboard

Running

MedVidBench-Leaderboard / upload_initial_data.py

MedGRPO Team

update

8ef4c38 about 2 months ago

3.86 kB

	"""Upload initial data to private HuggingFace repository.

	This script uploads:
	1. ground_truth.json - Private test set
	2. leaderboard.json - Initial leaderboard with MedGRPO model

	Run this once during initial setup.
	"""

	import os
	import sys
	import json
	from pathlib import Path
	from huggingface_hub import HfApi

	# Configuration
	REPO_ID = "UIIAmerica/MedVidBench-GroundTruth"
	REPO_TYPE = "dataset"

	def create_initial_leaderboard():
	"""Create initial leaderboard.json with MedGRPO model data."""
	leaderboard_data = [
	{
	"rank": 1,
	"model_name": "Qwen2.5-VL-7B-MedGRPO",
	"organization": "UII",
	"cvs_acc": 0.914,
	"nap_acc": 0.427,
	"sa_acc": 0.244,
	"stg_miou": 0.202,
	"tag_miou_03": 0.216,
	"tag_miou_05": 0.156,
	"dvc_llm": 3.797,
	"dvc_f1": 0.210,
	"vs_llm": 4.184,
	"rc_llm": 3.442,
	"date": "2025-01-14",
	"contact": "gaozhongpai@gmail.com"
	}
	]

	leaderboard_file = Path("leaderboard.json")
	with open(leaderboard_file, 'w') as f:
	json.dump(leaderboard_data, f, indent=2)

	print(f"✓ Created leaderboard.json with 1 entry (Qwen2.5-VL-7B-MedGRPO)")
	return leaderboard_file


	def main():
	"""Upload initial files to private repo."""
	# Check token
	token = os.environ.get('HF_TOKEN')
	if not token:
	print("❌ HF_TOKEN environment variable not set")
	print(" Please run: export HF_TOKEN='your_token_here'")
	sys.exit(1)

	print("=" * 80)
	print(f"UPLOADING INITIAL DATA TO {REPO_ID}")
	print("=" * 80)

	api = HfApi()

	# 1. Upload ground truth
	print("\n[1/2] Uploading ground_truth.json...")
	ground_truth_file = Path("data/ground_truth.json")

	if not ground_truth_file.exists():
	print(f" ❌ File not found: {ground_truth_file}")
	print(f" Skipping ground_truth.json upload...")
	else:
	file_size = ground_truth_file.stat().st_size / (1024 * 1024) # MB
	print(f" File size: {file_size:.2f} MB")

	try:
	api.upload_file(
	path_or_fileobj=str(ground_truth_file),
	path_in_repo="ground_truth.json",
	repo_id=REPO_ID,
	repo_type=REPO_TYPE,
	token=token,
	commit_message="Upload ground truth data"
	)
	print(f" ✓ Uploaded ground_truth.json")
	except Exception as e:
	print(f" ❌ Failed: {e}")
	sys.exit(1)

	# 2. Upload leaderboard
	print("\n[2/2] Uploading leaderboard.json...")

	# Create leaderboard with MedGRPO data
	leaderboard_file = create_initial_leaderboard()

	file_size = leaderboard_file.stat().st_size
	print(f" File size: {file_size} bytes")

	try:
	api.upload_file(
	path_or_fileobj=str(leaderboard_file),
	path_in_repo="leaderboard.json",
	repo_id=REPO_ID,
	repo_type=REPO_TYPE,
	token=token,
	commit_message="Initialize leaderboard with Qwen2.5-VL-7B-MedGRPO"
	)
	print(f" ✓ Uploaded leaderboard.json")
	except Exception as e:
	print(f" ❌ Failed: {e}")
	sys.exit(1)

	print("\n" + "=" * 80)
	print("✅ UPLOAD COMPLETE")
	print("=" * 80)
	print(f"\nRepository: https://huggingface.co/datasets/{REPO_ID}")
	print("\nUploaded:")
	print(" ✓ ground_truth.json (if available)")
	print(" ✓ leaderboard.json with Qwen2.5-VL-7B-MedGRPO")
	print("\nNext steps:")
	print("1. Verify files in repository")
	print("2. Add HF_TOKEN secret to HuggingFace Space")
	print("3. Deploy app.py to Space")
	print("4. Check app logs for successful loading")


	if __name__ == "__main__":
	main()