Spaces:
Running
Running
File size: 1,762 Bytes
954b9d0 1804ce0 3595c1e 1804ce0 954b9d0 1804ce0 954b9d0 3595c1e 1804ce0 3595c1e 1804ce0 954b9d0 1804ce0 954b9d0 1804ce0 954b9d0 1804ce0 954b9d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from huggingface_hub import HfApi
from config.settings import (
DATASET_ID,
DATASET_VECTOR_STORE_PATH,
DATASET_CHAT_HISTORY_PATH,
DATASET_FINE_TUNED_PATH,
DATASET_ANNOTATIONS_PATH,
DATASET_TRAINING_DATA_PATH,
DATASET_TRAINING_LOGS_PATH,
HF_TOKEN
)
api = HfApi(token=HF_TOKEN)
dataset_name = DATASET_ID
def ensure_dataset_directory(directory: str) -> None:
"""
Check if directory exists in dataset and create if not
"""
try:
files = api.list_repo_files(repo_id=dataset_name, repo_type="dataset")
if not any(f.startswith(f"{directory}/") for f in files):
api.upload_file(
path_or_fileobj=b"",
path_in_repo=f"{directory}/.gitkeep",
repo_id=dataset_name,
repo_type="dataset"
)
print(f"✓ Created missing directory: {directory}")
except Exception as e:
print(f"Error ensuring directory {directory}: {str(e)}")
# Ensure training directories exist
ensure_dataset_directory(DATASET_TRAINING_DATA_PATH)
ensure_dataset_directory(DATASET_TRAINING_LOGS_PATH)
# Initialize dataset structure
directories = [
DATASET_VECTOR_STORE_PATH,
DATASET_CHAT_HISTORY_PATH,
DATASET_FINE_TUNED_PATH,
DATASET_ANNOTATIONS_PATH,
DATASET_TRAINING_DATA_PATH,
DATASET_TRAINING_LOGS_PATH
]
try:
for directory in directories:
api.upload_file(
path_or_fileobj=b"",
path_in_repo=f"{directory}/.gitkeep",
repo_id=dataset_name,
repo_type="dataset"
)
print(f"✓ Created directory: {directory}")
print("\nDataset structure successfully initialized!")
except Exception as e:
print(f"Error occurred: {str(e)}")
|