Spaces:
Running
Running
File size: 2,750 Bytes
08966f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
def upload_to_hf_dataset(file_path, dataset_name, token, repo_type="dataset"):
"""
Upload a file to a Hugging Face dataset repository.
Args:
file_path (str): Path to the file to upload
dataset_name (str): Name of the dataset in format 'username/dataset-name'
token (str): Hugging Face API token
repo_type (str): Repository type, defaults to 'dataset'
"""
from huggingface_hub import HfApi
import os
# Initialize the Hugging Face API client
api = HfApi()
try:
# Upload the file to the dataset repository
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=os.path.basename(file_path), # Use filename as path in repo
repo_id=dataset_name,
repo_type=repo_type,
token=token,
commit_message=f"Upload {os.path.basename(file_path)}",
commit_description=f"Automated upload of {os.path.basename(file_path)} to dataset",
)
print(f"Successfully uploaded {file_path} to {dataset_name}")
except Exception as e:
print(f"Error uploading file: {str(e)}")
def download_from_hf_dataset(file_path, dataset_name, token, repo_type="dataset"):
"""
Download a file from a Hugging Face dataset repository.
Args:
file_path (str): Path in the repository to download from
dataset_name (str): Name of the dataset in format 'username/dataset-name'
token (str): Hugging Face API token
repo_type (str): Repository type, defaults to 'dataset'
"""
from huggingface_hub import HfApi
import os
# Initialize the Hugging Face API client
api = HfApi()
try:
# Download the file from the dataset repository
api.hf_hub_download(
repo_id=dataset_name,
filename=file_path,
repo_type=repo_type,
local_dir=".",
token=token,
)
print(f"Successfully downloaded {file_path} from {dataset_name}")
except Exception as e:
print(f"Error downloading file: {str(e)}")
def load_hf_dataset(csv_filename, token, dataset_name_input):
"""
Load a CSV dataset from Hugging Face and return as pandas DataFrame
Args:
csv_filename (str): Name of the CSV file in the dataset
token (str): Hugging Face authentication token
Returns:
pandas.DataFrame: DataFrame containing the dataset
"""
from datasets import load_dataset
try:
dataset = load_dataset(
dataset_name_input, data_files=csv_filename, split="train", token=token
)
return dataset.to_pandas()
except Exception as e:
print(f"Error loading dataset: {e}")
return None |