| import json |
| from hf_scrapper import get_system_proxies, get_file_structure, write_file_structure_to_json |
| from dotenv import load_dotenv |
| import os |
|
|
| load_dotenv() |
|
|
| def index_repository(token, repo, current_path="", proxies=None): |
| file_structure = get_file_structure(repo, token, current_path, proxies) |
| full_structure = [] |
| for item in file_structure: |
| if item['type'] == 'directory': |
| sub_directory_structure = index_repository(token, repo, item['path'], proxies) |
| full_structure.append({ |
| "type": "directory", |
| "path": item['path'], |
| "contents": sub_directory_structure |
| }) |
| else: |
| full_structure.append(item) |
| return full_structure |
|
|
| def indexer(): |
| token = os.getenv("TOKEN") |
| repo = os.getenv("REPO") |
| output_path = os.getenv("INDEX_FILE") |
| |
| proxies = get_system_proxies() |
| full_structure = index_repository(token, repo, "", proxies) |
| write_file_structure_to_json(full_structure, output_path) |
| print(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}") |
| |