USStocksHeatMap / utils.py
AmirTrader's picture
Create utils.py
08966f1 verified
def upload_to_hf_dataset(file_path, dataset_name, token, repo_type="dataset"):
"""
Upload a file to a Hugging Face dataset repository.
Args:
file_path (str): Path to the file to upload
dataset_name (str): Name of the dataset in format 'username/dataset-name'
token (str): Hugging Face API token
repo_type (str): Repository type, defaults to 'dataset'
"""
from huggingface_hub import HfApi
import os
# Initialize the Hugging Face API client
api = HfApi()
try:
# Upload the file to the dataset repository
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=os.path.basename(file_path), # Use filename as path in repo
repo_id=dataset_name,
repo_type=repo_type,
token=token,
commit_message=f"Upload {os.path.basename(file_path)}",
commit_description=f"Automated upload of {os.path.basename(file_path)} to dataset",
)
print(f"Successfully uploaded {file_path} to {dataset_name}")
except Exception as e:
print(f"Error uploading file: {str(e)}")
def download_from_hf_dataset(file_path, dataset_name, token, repo_type="dataset"):
"""
Download a file from a Hugging Face dataset repository.
Args:
file_path (str): Path in the repository to download from
dataset_name (str): Name of the dataset in format 'username/dataset-name'
token (str): Hugging Face API token
repo_type (str): Repository type, defaults to 'dataset'
"""
from huggingface_hub import HfApi
import os
# Initialize the Hugging Face API client
api = HfApi()
try:
# Download the file from the dataset repository
api.hf_hub_download(
repo_id=dataset_name,
filename=file_path,
repo_type=repo_type,
local_dir=".",
token=token,
)
print(f"Successfully downloaded {file_path} from {dataset_name}")
except Exception as e:
print(f"Error downloading file: {str(e)}")
def load_hf_dataset(csv_filename, token, dataset_name_input):
"""
Load a CSV dataset from Hugging Face and return as pandas DataFrame
Args:
csv_filename (str): Name of the CSV file in the dataset
token (str): Hugging Face authentication token
Returns:
pandas.DataFrame: DataFrame containing the dataset
"""
from datasets import load_dataset
try:
dataset = load_dataset(
dataset_name_input, data_files=csv_filename, split="train", token=token
)
return dataset.to_pandas()
except Exception as e:
print(f"Error loading dataset: {e}")
return None