from hf_scrapper import get_file_structure from dotenv import load_dotenv import os import json load_dotenv() def index_repository(token, repo, current_path=""): file_structure = get_file_structure(repo, token, current_path) full_structure = [] for item in file_structure: if item['type'] == 'directory': sub_directory_structure = index_repository(token, repo, item['path']) full_structure.append({ "type": "directory", "path": item['path'], "contents": sub_directory_structure }) else: file_item = { "type": item['type'], "size": item['size'], "path": item['path'], } full_structure.append(file_item) return full_structure def indexer(): token = os.getenv("TOKEN") repo = os.getenv("REPO") full_structure = index_repository(token, repo, "") print(f"Full file structure for repository '{repo}' has been indexed.") return full_structure if __name__ == '__main__': files = indexer() # Define the output file name based on the repository name output_file = f"{os.getenv('CACHE_DIR')}file_structure.json" # Save the structure to a JSON file with open(output_file, 'w') as f: json.dump(files, f, indent=4) print(f"File structure saved to {output_file}")