Spaces:

tfrere
/

leaderboard-parser-agent

Build error

App Files Files Community

leaderboard-parser-agent / src /hub_utils.py

tfrere HF Staff

first commit

0821095 11 months ago

raw

history blame contribute delete

6.73 kB

	"""
	Utilities for interacting with the Hugging Face Hub.
	"""
	import os
	from huggingface_hub import HfApi, login, hf_hub_download


	def upload_to_hub(to_parse_file, results_file, repo_id=None):
	"""
	Uploads files to the Hugging Face Hub.

	Args:
	to_parse_file: Path to the categories file
	results_file: Path to the results file
	repo_id: Hub repository ID

	Returns:
	True if upload succeeded, False otherwise
	"""
	try:
	# Use environment variable HUGGING_FACE_STORAGE_REPO if available
	# Otherwise, use default value
	if repo_id is None:
	repo_id = os.getenv("HUGGING_FACE_STORAGE_REPO", "leaderboard-explorer/leaderboard_explorer")
	if os.getenv("HUGGING_FACE_STORAGE_REPO"):
	print(f"Using target dataset specified in HUGGING_FACE_STORAGE_REPO: {repo_id}")
	else:
	print(f"No target dataset specified, using default value: {repo_id}")

	# Check if token is available
	token = os.getenv("HUGGING_FACE_HUB_TOKEN")
	if not token:
	print("ERROR: Environment variable HUGGING_FACE_HUB_TOKEN is not defined.")
	return False

	# Connect to Hub
	print("Connecting to Hugging Face Hub...")
	login(token=token)
	api = HfApi()

	# Upload JSON files
	print(f"\n--- UPLOADING CATEGORIES FILE ---")
	print(f"Local file: {to_parse_file}")
	print(f"Destination: {repo_id}/best_model_for_category_list.json")
	print(f"Uploading...")

	try:
	api.upload_file(
	path_or_fileobj=to_parse_file,
	path_in_repo="best_model_for_category_list.json",
	repo_id=repo_id,
	repo_type="dataset",
	commit_message="Update leaderboard categories"
	)
	print(f"Upload of {to_parse_file} successful!")
	except Exception as e:
	print(f"Note when uploading {to_parse_file}: {e}")
	if "No files have been modified since last commit" in str(e):
	print("→ The categories file is identical to the one already on the Hub. No changes needed.")
	else:
	print(f"→ ERROR: Upload failed for another reason.")
	raise e

	print(f"\n--- UPLOADING RESULTS FILE ---")
	print(f"Local file: {results_file}")
	print(f"Destination: {repo_id}/best_model_for_results.json")
	print(f"Uploading...")

	try:
	api.upload_file(
	path_or_fileobj=results_file,
	path_in_repo="best_model_for_results.json",
	repo_id=repo_id,
	repo_type="dataset",
	commit_message="Update leaderboard results"
	)
	print(f"Upload of {results_file} successful!")
	except Exception as e:
	print(f"Note when uploading {results_file}: {e}")
	if "No files have been modified since last commit" in str(e):
	print("→ The results file is identical to the one already on the Hub. No changes needed.")
	else:
	print(f"→ ERROR: Upload failed for another reason.")
	raise e

	print(f"\nUpload operation completed: files have been processed!")
	return True
	except Exception as e:
	print(f"GENERAL ERROR during file upload to Hub: {e}")
	return False


	def download_from_hub(repo_id=None):
	"""
	Downloads files from the Hugging Face Hub.

	Args:
	repo_id: Hub repository ID

	Returns:
	True if download succeeded, False otherwise
	"""
	try:
	# Use environment variable HUGGING_FACE_STORAGE_REPO if available
	# Otherwise, use default value
	if repo_id is None:
	repo_id = os.getenv("HUGGING_FACE_STORAGE_REPO", "leaderboard-explorer/leaderboard_explorer")
	if os.getenv("HUGGING_FACE_STORAGE_REPO"):
	print(f"Using source dataset specified in HUGGING_FACE_STORAGE_REPO: {repo_id}")
	else:
	print(f"No source dataset specified, using default value: {repo_id}")

	# Check if token is available
	token = os.getenv("HUGGING_FACE_HUB_TOKEN")
	if not token:
	print("ERROR: Environment variable HUGGING_FACE_HUB_TOKEN is not defined.")
	return False

	# Connect to Hub
	login(token=token)

	# Create data directory if it doesn't exist
	script_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	data_dir = os.path.join(script_dir, "data")
	os.makedirs(data_dir, exist_ok=True)

	# List of required and optional files
	required_files = [
	"final_leaderboards.json",
	"best_model_for_category_list.json"
	]

	optional_files = [
	"best_model_for_results.json"
	]

	# Download required files first
	for filename in required_files:
	local_path = os.path.join(data_dir, filename)
	try:
	# Download file
	print(f"Downloading {filename} from {repo_id}...")
	hf_hub_download(
	repo_id=repo_id,
	filename=filename,
	repo_type="dataset",
	local_dir=data_dir,
	local_dir_use_symlinks=False
	)
	print(f"File {filename} successfully downloaded to {local_path}")
	except Exception as e:
	print(f"ERROR: Unable to download required file {filename}: {e}")
	return False

	# Download optional files next
	for filename in optional_files:
	local_path = os.path.join(data_dir, filename)
	try:
	print(f"Downloading {filename} from {repo_id}...")
	hf_hub_download(
	repo_id=repo_id,
	filename=filename,
	repo_type="dataset",
	local_dir=data_dir,
	local_dir_use_symlinks=False
	)
	print(f"File {filename} successfully downloaded to {local_path}")
	except Exception as e:
	print(f"WARNING: Unable to download optional file {filename}: {e}")
	print(f"This is not a problem, a new file will be created if necessary.")

	return True
	except Exception as e:
	print(f"ERROR during file download from Hub: {e}")
	return False