AmirTrader commited on
Commit
7470496
·
verified ·
1 Parent(s): 45c1bdf

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +89 -0
utils.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def upload_to_hf_dataset(file_path, dataset_name, token, repo_type="dataset"):
2
+ """
3
+ Upload a file to a Hugging Face dataset repository.
4
+
5
+ Args:
6
+ file_path (str): Path to the file to upload
7
+ dataset_name (str): Name of the dataset in format 'username/dataset-name'
8
+ token (str): Hugging Face API token
9
+ repo_type (str): Repository type, defaults to 'dataset'
10
+ """
11
+ from huggingface_hub import HfApi
12
+ import os
13
+
14
+ # Initialize the Hugging Face API client
15
+ api = HfApi()
16
+
17
+ try:
18
+ # Upload the file to the dataset repository
19
+ api.upload_file(
20
+ path_or_fileobj=file_path,
21
+ path_in_repo=os.path.basename(file_path), # Use filename as path in repo
22
+ repo_id=dataset_name,
23
+ repo_type=repo_type,
24
+ token=token,
25
+ commit_message=f"Upload {os.path.basename(file_path)}",
26
+ commit_description=f"Automated upload of {os.path.basename(file_path)} to dataset"
27
+ )
28
+ print(f"Successfully uploaded {file_path} to {dataset_name}")
29
+ except Exception as e:
30
+ print(f"Error uploading file: {str(e)}")
31
+
32
+
33
+
34
+
35
+ def download_from_hf_dataset(file_path, dataset_name, token, repo_type="dataset"):
36
+ """
37
+ Download a file from a Hugging Face dataset repository.
38
+
39
+ Args:
40
+ file_path (str): Path in the repository to download from
41
+ dataset_name (str): Name of the dataset in format 'username/dataset-name'
42
+ token (str): Hugging Face API token
43
+ repo_type (str): Repository type, defaults to 'dataset'
44
+ """
45
+ from huggingface_hub import HfApi
46
+ import os
47
+
48
+ # Initialize the Hugging Face API client
49
+ api = HfApi()
50
+
51
+ try:
52
+ # Download the file from the dataset repository
53
+ api.hf_hub_download(
54
+ repo_id=dataset_name,
55
+ filename=file_path,
56
+ repo_type=repo_type,
57
+ local_dir=".",
58
+ token=token
59
+ )
60
+ print(f"Successfully downloaded {file_path} from {dataset_name}")
61
+ except Exception as e:
62
+ print(f"Error downloading file: {str(e)}")
63
+
64
+
65
+
66
+
67
+ def load_hf_dataset(csv_filename, token, dataset_name_input):
68
+ """
69
+ Load a CSV dataset from Hugging Face and return as pandas DataFrame
70
+
71
+ Args:
72
+ csv_filename (str): Name of the CSV file in the dataset
73
+ token (str): Hugging Face authentication token
74
+
75
+ Returns:
76
+ pandas.DataFrame: DataFrame containing the dataset
77
+ """
78
+ from datasets import load_dataset
79
+
80
+ try:
81
+ dataset = load_dataset(dataset_name_input,
82
+ data_files=csv_filename,
83
+ split="train",
84
+ token=token)
85
+ return dataset.to_pandas()
86
+ except Exception as e:
87
+ print(f"Error loading dataset: {e}")
88
+ return None
89
+