|
|
import os
|
|
|
import re
|
|
|
import glob
|
|
|
|
|
|
def needs_hf_token(content):
|
|
|
"""Check if file needs HF token based on imports and content"""
|
|
|
hf_related_patterns = [
|
|
|
r'from\s+huggingface_hub\s+import',
|
|
|
r'import\s+huggingface_hub',
|
|
|
r'hf://datasets',
|
|
|
r'HfApi',
|
|
|
r'HfFileSystem',
|
|
|
r'from\s+transformers\s+import'
|
|
|
]
|
|
|
return any(re.search(pattern, content) for pattern in hf_related_patterns)
|
|
|
|
|
|
def has_token_setup(content):
|
|
|
"""Check if file already has token setup"""
|
|
|
token_patterns = [
|
|
|
r'HF_TOKEN\s*=\s*os\.getenv',
|
|
|
r'os\.environ\[["\']\s*HF_TOKEN\s*["\']\]',
|
|
|
r'get_hf_token',
|
|
|
]
|
|
|
return any(re.search(pattern, content) for pattern in token_patterns)
|
|
|
|
|
|
def add_token_setup(file_path):
|
|
|
"""Add HF token setup to file if needed"""
|
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
content = f.read()
|
|
|
|
|
|
if not needs_hf_token(content) or has_token_setup(content):
|
|
|
return False
|
|
|
|
|
|
|
|
|
import_section_end = 0
|
|
|
lines = content.split('\n')
|
|
|
for i, line in enumerate(lines):
|
|
|
if line.startswith(('import ', 'from ')):
|
|
|
import_section_end = i + 1
|
|
|
|
|
|
|
|
|
if 'import os' not in content:
|
|
|
lines.insert(import_section_end, 'import os')
|
|
|
import_section_end += 1
|
|
|
|
|
|
|
|
|
lines.insert(import_section_end, '\n# Initialize HuggingFace token from environment')
|
|
|
lines.insert(import_section_end + 1, 'HF_TOKEN = os.getenv("HF_TOKEN")')
|
|
|
lines.insert(import_section_end + 2, '')
|
|
|
|
|
|
|
|
|
backup_path = file_path + '.bak'
|
|
|
os.rename(file_path, backup_path)
|
|
|
|
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
|
f.write('\n'.join(lines))
|
|
|
|
|
|
print(f"Added HF token setup to {file_path}")
|
|
|
return True
|
|
|
|
|
|
def main():
|
|
|
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
python_files = glob.glob(os.path.join(root_dir, '**/*.py'), recursive=True)
|
|
|
|
|
|
modified_files = []
|
|
|
skipped_files = []
|
|
|
|
|
|
print("Scanning Python files for HuggingFace usage...")
|
|
|
for file_path in python_files:
|
|
|
try:
|
|
|
if add_token_setup(file_path):
|
|
|
modified_files.append(file_path)
|
|
|
else:
|
|
|
skipped_files.append(file_path)
|
|
|
except Exception as e:
|
|
|
print(f"Error processing {file_path}: {e}")
|
|
|
|
|
|
print("\nSummary:")
|
|
|
print(f"Modified {len(modified_files)} files:")
|
|
|
for file in modified_files:
|
|
|
print(f" - {os.path.relpath(file, root_dir)}")
|
|
|
|
|
|
print(f"\nSkipped {len(skipped_files)} files (no HF usage or already configured)")
|
|
|
print("\nBackup files (.bak) were created for modified files.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
|