File size: 3,016 Bytes
7a0c684
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import re
import glob

def needs_hf_token(content):
    """Check if file needs HF token based on imports and content"""
    hf_related_patterns = [
        r'from\s+huggingface_hub\s+import',
        r'import\s+huggingface_hub',
        r'hf://datasets',
        r'HfApi',
        r'HfFileSystem',
        r'from\s+transformers\s+import'
    ]
    return any(re.search(pattern, content) for pattern in hf_related_patterns)

def has_token_setup(content):
    """Check if file already has token setup"""
    token_patterns = [
        r'HF_TOKEN\s*=\s*os\.getenv',
        r'os\.environ\[["\']\s*HF_TOKEN\s*["\']\]',
        r'get_hf_token',  # For cases using your config.get_hf_token_cached()
    ]
    return any(re.search(pattern, content) for pattern in token_patterns)

def add_token_setup(file_path):
    """Add HF token setup to file if needed"""
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    if not needs_hf_token(content) or has_token_setup(content):
        return False
        
    # Find the imports section
    import_section_end = 0
    lines = content.split('\n')
    for i, line in enumerate(lines):
        if line.startswith(('import ', 'from ')):
            import_section_end = i + 1
            
    # Add os import if not present
    if 'import os' not in content:
        lines.insert(import_section_end, 'import os')
        import_section_end += 1
            
    # Add token setup after imports
    lines.insert(import_section_end, '\n# Initialize HuggingFace token from environment')
    lines.insert(import_section_end + 1, 'HF_TOKEN = os.getenv("HF_TOKEN")')
    lines.insert(import_section_end + 2, '')  # Empty line for spacing
    
    # Write changes
    backup_path = file_path + '.bak'
    os.rename(file_path, backup_path)
    
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write('\n'.join(lines))
        
    print(f"Added HF token setup to {file_path}")
    return True

def main():
    root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    python_files = glob.glob(os.path.join(root_dir, '**/*.py'), recursive=True)
    
    modified_files = []
    skipped_files = []
    
    print("Scanning Python files for HuggingFace usage...")
    for file_path in python_files:
        try:
            if add_token_setup(file_path):
                modified_files.append(file_path)
            else:
                skipped_files.append(file_path)
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
    
    print("\nSummary:")
    print(f"Modified {len(modified_files)} files:")
    for file in modified_files:
        print(f"  - {os.path.relpath(file, root_dir)}")
    
    print(f"\nSkipped {len(skipped_files)} files (no HF usage or already configured)")
    print("\nBackup files (.bak) were created for modified files.")

if __name__ == "__main__":
    main()