|
|
from hf_scrapper import get_file_structure |
|
|
from dotenv import load_dotenv |
|
|
import os |
|
|
import json |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
def index_repository(token, repo, current_path=""): |
|
|
file_structure = get_file_structure(repo, token, current_path) |
|
|
full_structure = [] |
|
|
for item in file_structure: |
|
|
if item['type'] == 'directory': |
|
|
sub_directory_structure = index_repository(token, repo, item['path']) |
|
|
full_structure.append({ |
|
|
"type": "directory", |
|
|
"path": item['path'], |
|
|
"contents": sub_directory_structure |
|
|
}) |
|
|
else: |
|
|
file_item = { |
|
|
"type": item['type'], |
|
|
"size": item['size'], |
|
|
"path": item['path'], |
|
|
} |
|
|
full_structure.append(file_item) |
|
|
return full_structure |
|
|
|
|
|
def indexer(): |
|
|
token = os.getenv("TOKEN") |
|
|
repo = os.getenv("REPO") |
|
|
full_structure = index_repository(token, repo, "") |
|
|
print(f"Full file structure for repository '{repo}' has been indexed.") |
|
|
return full_structure |
|
|
|
|
|
if __name__ == '__main__': |
|
|
files = indexer() |
|
|
|
|
|
|
|
|
output_file = f"{os.getenv('CACHE_DIR')}file_structure.json" |
|
|
|
|
|
|
|
|
with open(output_file, 'w') as f: |
|
|
json.dump(files, f, indent=4) |
|
|
|
|
|
print(f"File structure saved to {output_file}") |
|
|
|