Spaces:

Hans-Den
/

lb

Paused

lb / indexer.py

add recent

0fe31f5 over 1 year ago

1.42 kB

	from hf_scrapper import get_file_structure
	from dotenv import load_dotenv
	import os
	import json

	load_dotenv()

	def index_repository(token, repo, current_path=""):
	file_structure = get_file_structure(repo, token, current_path)
	full_structure = []
	for item in file_structure:
	if item['type'] == 'directory':
	sub_directory_structure = index_repository(token, repo, item['path'])
	full_structure.append({
	"type": "directory",
	"path": item['path'],
	"contents": sub_directory_structure
	})
	else:
	file_item = {
	"type": item['type'],
	"size": item['size'],
	"path": item['path'],
	}
	full_structure.append(file_item)
	return full_structure

	def indexer():
	token = os.getenv("TOKEN")
	repo = os.getenv("REPO")
	full_structure = index_repository(token, repo, "")
	print(f"Full file structure for repository '{repo}' has been indexed.")
	return full_structure

	if __name__ == '__main__':
	files = indexer()

	# Define the output file name based on the repository name
	output_file = f"{os.getenv('CACHE_DIR')}file_structure.json"

	# Save the structure to a JSON file
	with open(output_file, 'w') as f:
	json.dump(files, f, indent=4)

	print(f"File structure saved to {output_file}")