hroth commited on
Commit
362e18c
·
verified ·
1 Parent(s): 0da975c

Upload 3 files

Browse files
Files changed (3) hide show
  1. anonymize_mlflow.py +53 -0
  2. mlruns.zip +3 -0
  3. restore_mlflow_paths.py +58 -0
anonymize_mlflow.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import re
4
+ import glob
5
+
6
+ def anonymize_file(file_path):
7
+ """Anonymize personal data in files."""
8
+ with open(file_path, 'r') as f:
9
+ lines = f.readlines()
10
+
11
+ modified = False
12
+ for i, line in enumerate(lines):
13
+ # Anonymize artifact_uri or artifact_location
14
+ if 'artifact_uri:' in line or 'artifact_location:' in line:
15
+ new_line = re.sub(r'file://(.+?)(/mlruns/)', r'file://<placeholder>\2', line)
16
+ if new_line != line:
17
+ lines[i] = new_line
18
+ modified = True
19
+ # Anonymize user_id
20
+ elif 'user_id:' in line:
21
+ new_line = re.sub(r'user_id:\s*.+', 'user_id: <user_placeholder>', line)
22
+ if new_line != line:
23
+ lines[i] = new_line
24
+ modified = True
25
+ # Anonymize data_dir and csv_path in config files
26
+ elif 'data_dir:' in line or 'csv_path:' in line:
27
+ new_line = re.sub(r':\s*(.+?)/pdmx/', r': pdmx/', line)
28
+ if new_line != line:
29
+ lines[i] = new_line
30
+ modified = True
31
+
32
+ if modified:
33
+ with open(file_path, 'w') as f:
34
+ f.writelines(lines)
35
+ print(f"Anonymized: {file_path}")
36
+
37
+ def main():
38
+ # Find all relevant files recursively
39
+ yaml_files = glob.glob('**/*.yaml', recursive=True)
40
+ for file_path in yaml_files:
41
+ anonymize_file(file_path)
42
+
43
+ # Also anonymize mlflow.user tag files
44
+ user_files = glob.glob('**/tags/mlflow.user', recursive=True)
45
+ for file_path in user_files:
46
+ with open(file_path, 'w') as f:
47
+ f.write('<user_placeholder>\n')
48
+ print(f"Anonymized: {file_path}")
49
+
50
+ print("Anonymization complete.")
51
+
52
+ if __name__ == "__main__":
53
+ main()
mlruns.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b7e943d91f708ea6291d208a3f97705c7851961fa3efecb456d95637f07e511
3
+ size 1558271080
restore_mlflow_paths.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import glob
3
+ import sys
4
+
5
+ def restore_file(file_path, absolute_path, user_id):
6
+ """Restore placeholders in files with the specified absolute path and user ID."""
7
+ with open(file_path, 'r') as f:
8
+ lines = f.readlines()
9
+
10
+ modified = False
11
+ for i, line in enumerate(lines):
12
+ # Restore artifact_uri or artifact_location
13
+ if 'artifact_uri:' in line or 'artifact_location:' in line:
14
+ new_line = re.sub(r'file://<placeholder>(/mlruns/)', rf'file://{absolute_path}\1', line)
15
+ if new_line != line:
16
+ lines[i] = new_line
17
+ modified = True
18
+ # Restore user_id
19
+ elif 'user_id:' in line and '<user_placeholder>' in line:
20
+ new_line = re.sub(r'user_id:\s*<user_placeholder>', f'user_id: {user_id}', line)
21
+ if new_line != line:
22
+ lines[i] = new_line
23
+ modified = True
24
+
25
+ if modified:
26
+ with open(file_path, 'w') as f:
27
+ f.writelines(lines)
28
+ print(f"Restored: {file_path}")
29
+
30
+ def main():
31
+ if len(sys.argv) != 3:
32
+ print("Usage: python restore_mlflow_paths.py <absolute_path> <user_id>")
33
+ print("Example: python restore_mlflow_paths.py /home/user/path/to/mlruns hendrik-roth")
34
+ sys.exit(1)
35
+
36
+ absolute_path = sys.argv[1]
37
+ user_id = sys.argv[2]
38
+ # Ensure the path starts with / and does not end with /
39
+ if not absolute_path.startswith('/'):
40
+ absolute_path = '/' + absolute_path
41
+ absolute_path = absolute_path.rstrip('/')
42
+
43
+ # Find all yaml files recursively
44
+ yaml_files = glob.glob('**/*.yaml', recursive=True)
45
+ for file_path in yaml_files:
46
+ restore_file(file_path, absolute_path, user_id)
47
+
48
+ # Also restore mlflow.user tag files
49
+ user_files = glob.glob('**/tags/mlflow.user', recursive=True)
50
+ for file_path in user_files:
51
+ with open(file_path, 'w') as f:
52
+ f.write(f'{user_id}\n')
53
+ print(f"Restored: {file_path}")
54
+
55
+ print("Path restoration complete.")
56
+
57
+ if __name__ == "__main__":
58
+ main()