import os import re # Initialize HuggingFace token from environment HF_TOKEN = os.getenv("HF_TOKEN") def process_file(file_path): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() modified = False # Remove the specific token line token_line = '' if token_line in content: content = content.replace(token_line + '\n', '') # With newline content = content.replace(token_line, '') # Without newline modified = True # Replace HF dataset URLs hf_pattern = r'hf://datasets/[^/"\s]+/[^/"\s]+/[^"\s]+' matches = re.finditer(hf_pattern, content) for match in matches: if match.group() != "hf://datasets/Fred808/helium/storage.json": content = content.replace(match.group(), "hf://datasets/Fred808/helium/storage.json") modified = True if modified: # Create backup backup_path = file_path + '.bak' os.rename(file_path, backup_path) # Write updated content with open(file_path, 'w', encoding='utf-8') as f: f.write(content) print(f"Updated {file_path}") def main(): root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) for root, _, files in os.walk(root_dir): for file in files: if file.endswith('.py') and not file.endswith('.bak'): file_path = os.path.join(root, file) process_file(file_path) if __name__ == "__main__": main() print("Standardization complete. Review changes and delete .bak files if satisfied.")