| |
| import os |
| import re |
| import glob |
|
|
| def anonymize_file(file_path): |
| """Anonymize personal data in files.""" |
| with open(file_path, 'r') as f: |
| lines = f.readlines() |
| |
| modified = False |
| for i, line in enumerate(lines): |
| |
| if 'artifact_uri:' in line or 'artifact_location:' in line: |
| new_line = re.sub(r'file://(.+?)(/mlruns/)', r'file://<placeholder>\2', line) |
| if new_line != line: |
| lines[i] = new_line |
| modified = True |
| |
| elif 'user_id:' in line: |
| new_line = re.sub(r'user_id:\s*.+', 'user_id: <user_placeholder>', line) |
| if new_line != line: |
| lines[i] = new_line |
| modified = True |
| |
| elif 'data_dir:' in line or 'csv_path:' in line: |
| new_line = re.sub(r':\s*(.+?)/pdmx/', r': pdmx/', line) |
| if new_line != line: |
| lines[i] = new_line |
| modified = True |
| |
| if modified: |
| with open(file_path, 'w') as f: |
| f.writelines(lines) |
| print(f"Anonymized: {file_path}") |
|
|
| def main(): |
| |
| yaml_files = glob.glob('**/*.yaml', recursive=True) |
| for file_path in yaml_files: |
| anonymize_file(file_path) |
| |
| |
| user_files = glob.glob('**/tags/mlflow.user', recursive=True) |
| for file_path in user_files: |
| with open(file_path, 'w') as f: |
| f.write('<user_placeholder>\n') |
| print(f"Anonymized: {file_path}") |
| |
| print("Anonymization complete.") |
|
|
| if __name__ == "__main__": |
| main() |