Spaces:
Running
Running
| from huggingface_hub import HfApi | |
| from config.settings import ( | |
| DATASET_ID, | |
| DATASET_VECTOR_STORE_PATH, | |
| DATASET_CHAT_HISTORY_PATH, | |
| DATASET_FINE_TUNED_PATH, | |
| DATASET_ANNOTATIONS_PATH, | |
| DATASET_TRAINING_DATA_PATH, | |
| DATASET_TRAINING_LOGS_PATH, | |
| HF_TOKEN | |
| ) | |
| api = HfApi(token=HF_TOKEN) | |
| dataset_name = DATASET_ID | |
| def ensure_dataset_directory(directory: str) -> None: | |
| """ | |
| Check if directory exists in dataset and create if not | |
| """ | |
| try: | |
| files = api.list_repo_files(repo_id=dataset_name, repo_type="dataset") | |
| if not any(f.startswith(f"{directory}/") for f in files): | |
| api.upload_file( | |
| path_or_fileobj=b"", | |
| path_in_repo=f"{directory}/.gitkeep", | |
| repo_id=dataset_name, | |
| repo_type="dataset" | |
| ) | |
| print(f"β Created missing directory: {directory}") | |
| except Exception as e: | |
| print(f"Error ensuring directory {directory}: {str(e)}") | |
| # Ensure training directories exist | |
| ensure_dataset_directory(DATASET_TRAINING_DATA_PATH) | |
| ensure_dataset_directory(DATASET_TRAINING_LOGS_PATH) | |
| # Initialize dataset structure | |
| directories = [ | |
| DATASET_VECTOR_STORE_PATH, | |
| DATASET_CHAT_HISTORY_PATH, | |
| DATASET_FINE_TUNED_PATH, | |
| DATASET_ANNOTATIONS_PATH, | |
| DATASET_TRAINING_DATA_PATH, | |
| DATASET_TRAINING_LOGS_PATH | |
| ] | |
| try: | |
| for directory in directories: | |
| api.upload_file( | |
| path_or_fileobj=b"", | |
| path_in_repo=f"{directory}/.gitkeep", | |
| repo_id=dataset_name, | |
| repo_type="dataset" | |
| ) | |
| print(f"β Created directory: {directory}") | |
| print("\nDataset structure successfully initialized!") | |
| except Exception as e: | |
| print(f"Error occurred: {str(e)}") | |