stefanches7 commited on
Commit
51f795f
·
1 Parent(s): 177d033

extract xml stucture script

Browse files
Files changed (1) hide show
  1. extract_dir_structure_xml.py +66 -0
extract_dir_structure_xml.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+ import xml.etree.ElementTree as ET
4
+
5
+ FOLDER_LOCATION = r"C:\src\AI-assisted-Neuroimaging-harmonization"
6
+
7
+ def build_xml(dir_path: Path, root_level: bool = True) -> ET.Element:
8
+ """
9
+ Recursively build the XML tree.
10
+ Root: <dataset>
11
+ Subfolders: <folder name="...">
12
+ Files: <file name="...">
13
+ """
14
+ if root_level:
15
+ elem = ET.Element("dataset")
16
+ else:
17
+ elem = ET.Element("folder", {"name": dir_path.name})
18
+
19
+ # Add files
20
+ for file in sorted(dir_path.iterdir()):
21
+ if file.is_file():
22
+ elem.append(ET.Element("file", {"name": file.name}))
23
+
24
+ # Add subdirectories
25
+ for sub in sorted(dir_path.iterdir()):
26
+ if sub.is_dir():
27
+ elem.append(build_xml(sub, root_level=False))
28
+
29
+ return elem
30
+
31
+ def indent(elem: ET.Element, level: int = 0) -> None:
32
+ """
33
+ In-place pretty printer (ElementTree doesn't indent by default).
34
+ """
35
+ i = "\n" + (" " * level)
36
+ if len(elem):
37
+ if not elem.text or not elem.text.strip():
38
+ elem.text = i + " "
39
+ for child in elem:
40
+ indent(child, level + 1)
41
+ if not child.tail or not child.tail.strip(): # last child's tail
42
+ child.tail = i
43
+ if level and (not elem.tail or not elem.tail.strip()):
44
+ elem.tail = i
45
+
46
+ def main(directory: str = FOLDER_LOCATION) -> None:
47
+ path = Path(directory)
48
+ if not path.is_dir():
49
+ print(f"Error: '{directory}' is not a directory.", file=sys.stderr)
50
+ sys.exit(1)
51
+
52
+ root_elem = build_xml(path, root_level=True)
53
+ indent(root_elem)
54
+
55
+ tree = ET.ElementTree(root_elem)
56
+ # Write to stdout with XML declaration
57
+ xml_bytes = ET.tostring(root_elem, encoding="utf-8")
58
+ xml_string = b'<?xml version="1.0" encoding="UTF-8"?>\n' + xml_bytes
59
+ print(xml_string.decode("utf-8"))
60
+
61
+ if __name__ == "__main__":
62
+ # Optional: allow passing a custom path
63
+ if len(sys.argv) > 1:
64
+ main(sys.argv[1])
65
+ else:
66
+ main()