INV / scripts /standardize_hf_config.py
Fred808's picture
Upload 256 files
7a0c684 verified
import os
import re
# Initialize HuggingFace token from environment
HF_TOKEN = os.getenv("HF_TOKEN")
def process_file(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
modified = False
# Remove the specific token line
token_line = ''
if token_line in content:
content = content.replace(token_line + '\n', '') # With newline
content = content.replace(token_line, '') # Without newline
modified = True
# Replace HF dataset URLs
hf_pattern = r'hf://datasets/[^/"\s]+/[^/"\s]+/[^"\s]+'
matches = re.finditer(hf_pattern, content)
for match in matches:
if match.group() != "hf://datasets/Fred808/helium/storage.json":
content = content.replace(match.group(), "hf://datasets/Fred808/helium/storage.json")
modified = True
if modified:
# Create backup
backup_path = file_path + '.bak'
os.rename(file_path, backup_path)
# Write updated content
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"Updated {file_path}")
def main():
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
for root, _, files in os.walk(root_dir):
for file in files:
if file.endswith('.py') and not file.endswith('.bak'):
file_path = os.path.join(root, file)
process_file(file_path)
if __name__ == "__main__":
main()
print("Standardization complete. Review changes and delete .bak files if satisfied.")