import os import re import glob def needs_hf_token(content): """Check if file needs HF token based on imports and content""" hf_related_patterns = [ r'from\s+huggingface_hub\s+import', r'import\s+huggingface_hub', r'hf://datasets', r'HfApi', r'HfFileSystem', r'from\s+transformers\s+import' ] return any(re.search(pattern, content) for pattern in hf_related_patterns) def has_token_setup(content): """Check if file already has token setup""" token_patterns = [ r'HF_TOKEN\s*=\s*os\.getenv', r'os\.environ\[["\']\s*HF_TOKEN\s*["\']\]', r'get_hf_token', # For cases using your config.get_hf_token_cached() ] return any(re.search(pattern, content) for pattern in token_patterns) def add_token_setup(file_path): """Add HF token setup to file if needed""" with open(file_path, 'r', encoding='utf-8') as f: content = f.read() if not needs_hf_token(content) or has_token_setup(content): return False # Find the imports section import_section_end = 0 lines = content.split('\n') for i, line in enumerate(lines): if line.startswith(('import ', 'from ')): import_section_end = i + 1 # Add os import if not present if 'import os' not in content: lines.insert(import_section_end, 'import os') import_section_end += 1 # Add token setup after imports lines.insert(import_section_end, '\n# Initialize HuggingFace token from environment') lines.insert(import_section_end + 1, 'HF_TOKEN = os.getenv("HF_TOKEN")') lines.insert(import_section_end + 2, '') # Empty line for spacing # Write changes backup_path = file_path + '.bak' os.rename(file_path, backup_path) with open(file_path, 'w', encoding='utf-8') as f: f.write('\n'.join(lines)) print(f"Added HF token setup to {file_path}") return True def main(): root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) python_files = glob.glob(os.path.join(root_dir, '**/*.py'), recursive=True) modified_files = [] skipped_files = [] print("Scanning Python files for HuggingFace usage...") for file_path in python_files: try: if add_token_setup(file_path): modified_files.append(file_path) else: skipped_files.append(file_path) except Exception as e: print(f"Error processing {file_path}: {e}") print("\nSummary:") print(f"Modified {len(modified_files)} files:") for file in modified_files: print(f" - {os.path.relpath(file, root_dir)}") print(f"\nSkipped {len(skipped_files)} files (no HF usage or already configured)") print("\nBackup files (.bak) were created for modified files.") if __name__ == "__main__": main()