File size: 1,668 Bytes
7a0c684
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import os
import re

# Initialize HuggingFace token from environment
HF_TOKEN = os.getenv("HF_TOKEN")


def process_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    modified = False
    
    # Remove the specific token line
    token_line = ''
    if token_line in content:
        content = content.replace(token_line + '\n', '')  # With newline
        content = content.replace(token_line, '')  # Without newline
        modified = True
    
    # Replace HF dataset URLs
    hf_pattern = r'hf://datasets/[^/"\s]+/[^/"\s]+/[^"\s]+'
    matches = re.finditer(hf_pattern, content)
    for match in matches:
        if match.group() != "hf://datasets/Fred808/helium/storage.json":
            content = content.replace(match.group(), "hf://datasets/Fred808/helium/storage.json")
            modified = True
    
    if modified:
        # Create backup
        backup_path = file_path + '.bak'
        os.rename(file_path, backup_path)
        
        # Write updated content
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"Updated {file_path}")
    
def main():
    root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    
    for root, _, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.py') and not file.endswith('.bak'):
                file_path = os.path.join(root, file)
                process_file(file_path)

if __name__ == "__main__":
    main()
    print("Standardization complete. Review changes and delete .bak files if satisfied.")