|
|
import os
|
|
|
import re
|
|
|
|
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
|
|
|
|
|
def process_file(file_path):
|
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
content = f.read()
|
|
|
|
|
|
modified = False
|
|
|
|
|
|
|
|
|
token_line = ''
|
|
|
if token_line in content:
|
|
|
content = content.replace(token_line + '\n', '')
|
|
|
content = content.replace(token_line, '')
|
|
|
modified = True
|
|
|
|
|
|
|
|
|
hf_pattern = r'hf://datasets/[^/"\s]+/[^/"\s]+/[^"\s]+'
|
|
|
matches = re.finditer(hf_pattern, content)
|
|
|
for match in matches:
|
|
|
if match.group() != "hf://datasets/Fred808/helium/storage.json":
|
|
|
content = content.replace(match.group(), "hf://datasets/Fred808/helium/storage.json")
|
|
|
modified = True
|
|
|
|
|
|
if modified:
|
|
|
|
|
|
backup_path = file_path + '.bak'
|
|
|
os.rename(file_path, backup_path)
|
|
|
|
|
|
|
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
|
f.write(content)
|
|
|
print(f"Updated {file_path}")
|
|
|
|
|
|
def main():
|
|
|
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
|
for root, _, files in os.walk(root_dir):
|
|
|
for file in files:
|
|
|
if file.endswith('.py') and not file.endswith('.bak'):
|
|
|
file_path = os.path.join(root, file)
|
|
|
process_file(file_path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
print("Standardization complete. Review changes and delete .bak files if satisfied.")
|
|
|
|