Spaces:
Sleeping
Sleeping
| from hf_scrapper import get_file_structure | |
| from dotenv import load_dotenv | |
| import os | |
| import json | |
| load_dotenv() | |
| def index_repository(token, repo, current_path=""): | |
| file_structure = get_file_structure(repo, token, current_path) | |
| full_structure = [] | |
| for item in file_structure: | |
| if item['type'] == 'directory': | |
| sub_directory_structure = index_repository(token, repo, item['path']) | |
| full_structure.append({ | |
| "type": "directory", | |
| "path": item['path'], | |
| "contents": sub_directory_structure | |
| }) | |
| else: | |
| file_item = { | |
| "type": item['type'], | |
| "size": item['size'], | |
| "path": item['path'], | |
| } | |
| full_structure.append(file_item) | |
| return full_structure | |
| def indexer(): | |
| token = os.getenv("TOKEN") | |
| repo = os.getenv("REPO") | |
| full_structure = index_repository(token, repo, "") | |
| print(f"Full file structure for repository '{repo}' has been indexed.") | |
| return full_structure | |
| if __name__ == '__main__': | |
| files = indexer() | |
| # Define the output file name based on the repository name | |
| output_file = f"{os.getenv('CACHE_DIR')}file_structure.json" | |
| # Save the structure to a JSON file | |
| with open(output_file, 'w') as f: | |
| json.dump(files, f, indent=4) | |
| print(f"File structure saved to {output_file}") | |