rahgadda commited on
Commit
0a7cf71
·
1 Parent(s): d74d0dc

Initial Draft

Browse files
Files changed (1) hide show
  1. Dashboard.py +26 -5
Dashboard.py CHANGED
@@ -6,18 +6,39 @@ import os
6
  ################################
7
  ######### Variables ############
8
  ################################
9
-
10
  HF_API_KEY = os.environ.get("HF_API_KEY")
11
  DATA_SET = os.environ.get("DATA_SET")
12
 
13
- st.write(HF_API_KEY)
14
- st.write(DATA_SET)
15
-
16
  ################################
17
  ####### GenericFunctions #######
18
  ################################
 
 
 
 
 
 
 
 
 
 
19
 
 
 
 
 
 
 
 
 
 
 
 
20
 
 
 
 
21
  ################################
22
  ####### Display of data ########
23
- ################################
 
 
6
  ################################
7
  ######### Variables ############
8
  ################################
 
9
  HF_API_KEY = os.environ.get("HF_API_KEY")
10
  DATA_SET = os.environ.get("DATA_SET")
11
 
 
 
 
12
  ################################
13
  ####### GenericFunctions #######
14
  ################################
15
+ def load_dataset():
16
+ dataset = load_dataset(DATA_SET, use_auth_token=True)
17
+
18
+ def save_dataset():
19
+ # Load a dataset from the Hugging Face Hub
20
+ load_dataset()
21
+
22
+ # Create a directory to save the files
23
+ output_directory = "data"
24
+ os.makedirs(output_directory, exist_ok=True)
25
 
26
+ # Iterate through the dataset and save each file to the data folder
27
+ for split in dataset.keys():
28
+ for i, example in enumerate(dataset[split]):
29
+ file_content = example['column_name'] # Replace 'column_name' with the actual column containing file data
30
+ file_name = f"{split}_{i}.txt" # Create a unique file name
31
+ file_path = os.path.join(output_directory, file_name)
32
+
33
+ with open(file_path, 'w', encoding='utf-8') as file:
34
+ file.write(file_content)
35
+
36
+ print(f"Saved: {file_path}")
37
 
38
+ print("All files saved successfully.")
39
+
40
+
41
  ################################
42
  ####### Display of data ########
43
+ ################################
44
+ save_dataset()