score-graph-ae-training / anonymize_mlflow.py
hroth's picture
Upload 3 files
362e18c verified
#!/usr/bin/env python3
import os
import re
import glob
def anonymize_file(file_path):
"""Anonymize personal data in files."""
with open(file_path, 'r') as f:
lines = f.readlines()
modified = False
for i, line in enumerate(lines):
# Anonymize artifact_uri or artifact_location
if 'artifact_uri:' in line or 'artifact_location:' in line:
new_line = re.sub(r'file://(.+?)(/mlruns/)', r'file://<placeholder>\2', line)
if new_line != line:
lines[i] = new_line
modified = True
# Anonymize user_id
elif 'user_id:' in line:
new_line = re.sub(r'user_id:\s*.+', 'user_id: <user_placeholder>', line)
if new_line != line:
lines[i] = new_line
modified = True
# Anonymize data_dir and csv_path in config files
elif 'data_dir:' in line or 'csv_path:' in line:
new_line = re.sub(r':\s*(.+?)/pdmx/', r': pdmx/', line)
if new_line != line:
lines[i] = new_line
modified = True
if modified:
with open(file_path, 'w') as f:
f.writelines(lines)
print(f"Anonymized: {file_path}")
def main():
# Find all relevant files recursively
yaml_files = glob.glob('**/*.yaml', recursive=True)
for file_path in yaml_files:
anonymize_file(file_path)
# Also anonymize mlflow.user tag files
user_files = glob.glob('**/tags/mlflow.user', recursive=True)
for file_path in user_files:
with open(file_path, 'w') as f:
f.write('<user_placeholder>\n')
print(f"Anonymized: {file_path}")
print("Anonymization complete.")
if __name__ == "__main__":
main()