Ekain Arrieta commited on
Commit
57f68aa
Β·
2 Parent(s): 9e17db1 ba7c80f

Merge branch 'main' into testing

Browse files
.gitignore CHANGED
@@ -2,3 +2,4 @@
2
  __pycache__/
3
  .venv
4
  testing_structure.xml
 
 
2
  __pycache__/
3
  .venv
4
  testing_structure.xml
5
+ /Non_Bids_Dataset
LM_Studio_chat.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ url = "http://localhost:1234/v1/chat/completions"
5
+ headers = {
6
+ "Content-Type": "application/json"
7
+ }
8
+
9
+ # Initialize conversation history with the system message
10
+ conversation_history = [
11
+ {
12
+ "role": "system",
13
+ "content": "Be a helpful assistant. Be concise."
14
+ }
15
+ ]
16
+
17
+ while True:
18
+ # Get user input
19
+ user_input = input("You: ")
20
+
21
+ # Exit the loop if the user types 'exit'
22
+ if user_input.lower() == 'exit':
23
+ print("Ending conversation...")
24
+ break
25
+
26
+ # Add user's message to the conversation history
27
+ conversation_history.append({
28
+ "role": "user",
29
+ "content": user_input
30
+ })
31
+
32
+ # Prepare the data for the API call
33
+ data = {
34
+ "model": "deepseek/deepseek-r1-0528-qwen3-8b",
35
+ "messages": conversation_history,
36
+ "temperature": 0.7,
37
+ "max_tokens": -1,
38
+ "stream": False
39
+ }
40
+
41
+ # Make the POST request to the API
42
+ response = requests.post(url, headers=headers, data=json.dumps(data))
43
+
44
+ # Get the model's response
45
+ model_response = response.json()
46
+
47
+ # Extract and print the last model response (the assistant's content)
48
+ last_message = model_response['choices'][0]['message']['content']
49
+ print(f"Model: {last_message}")
50
+
51
+ # Add model's response to the conversation history for the next round
52
+ conversation_history.append({
53
+ "role": "assistant",
54
+ "content": last_message
55
+ })
__pycache__/agent.cpython-314.pyc ADDED
Binary file (4.11 kB). View file
 
agent.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  from dotenv import load_dotenv
4
  import dspy
5
 
6
- import prompts as prompts_mod
7
 
8
 
9
  class BIDSifierAgent:
@@ -13,14 +13,21 @@ class BIDSifierAgent:
13
  load_dotenv()
14
 
15
  if provider=="openai":
16
- lm = dspy.LM(f"{provider}/{model}", api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
 
17
  else:
18
- lm = dspy.LM(f"{provider}/{model}", api_key="")
 
 
 
19
  dspy.configure(lm=lm)
20
  self.llm = lm
21
  self.model = model or os.getenv("BIDSIFIER_MODEL", "gpt-4o-mini")
22
  self.temperature = temperature
23
-
24
  def _build_user_prompt(self, step: str, context: Dict[str, Any]) -> str:
25
  dataset_xml = context.get("dataset_xml")
26
  readme_text = context.get("readme_text")
@@ -28,34 +35,27 @@ class BIDSifierAgent:
28
  output_root = context.get("output_root", "./bids_output")
29
 
30
  if step == "summary":
31
- return prompts_mod.summarize_dataset_prompt(
32
- dataset_xml=dataset_xml,
33
- readme_text=readme_text,
34
- publication_text=publication_text,
35
- )
36
- if step == "create_root":
37
- return prompts_mod.create_root_prompt(
38
- output_root=output_root,
39
  dataset_xml=dataset_xml,
40
  readme_text=readme_text,
41
  publication_text=publication_text,
42
  )
43
  if step == "create_metadata":
44
- return prompts_mod.create_metadata_prompt(
45
  output_root=output_root,
46
  dataset_xml=dataset_xml,
47
  readme_text=readme_text,
48
  publication_text=publication_text,
49
  )
50
  if step == "create_structure":
51
- return prompts_mod.create_structure_prompt(
52
  output_root=output_root,
53
  dataset_xml=dataset_xml,
54
  readme_text=readme_text,
55
  publication_text=publication_text,
56
  )
57
  if step == "rename_move":
58
- return prompts_mod.rename_and_move_prompt(
59
  output_root=output_root,
60
  dataset_xml=dataset_xml,
61
  readme_text=readme_text,
@@ -64,7 +64,7 @@ class BIDSifierAgent:
64
  raise ValueError(f"Unknown step: {step}")
65
 
66
  def run_step(self, step: str, context: Dict[str, Any]) -> str:
67
- system_msg = prompts_mod.system_prompt()
68
  user_msg = self._build_user_prompt(step, context)
69
  resp = self.llm(
70
  messages=[
@@ -75,6 +75,17 @@ class BIDSifierAgent:
75
  )
76
  return resp[0]
77
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  __all__ = ["BIDSifierAgent"]
80
 
 
3
  from dotenv import load_dotenv
4
  import dspy
5
 
6
+ import prompts
7
 
8
 
9
  class BIDSifierAgent:
 
13
  load_dotenv()
14
 
15
  if provider=="openai":
16
+ if model == "gpt-5": #reasoning model that requires special handling
17
+ temperature = 1.0
18
+ lm = dspy.LM(f"{provider}/{model}", api_key=os.getenv("OPENAI_API_KEY"), temperature = temperature, max_tokens = 40000)
19
+ else:
20
+ lm = dspy.LM(f"{provider}/{model}", api_key=os.getenv("OPENAI_API_KEY"), temperature = temperature, max_tokens = 10000)
21
  else:
22
+ lm = dspy.LM(f"{provider}/{model}", api_key="", max_tokens=10000)
23
+
24
+
25
+
26
  dspy.configure(lm=lm)
27
  self.llm = lm
28
  self.model = model or os.getenv("BIDSIFIER_MODEL", "gpt-4o-mini")
29
  self.temperature = temperature
30
+
31
  def _build_user_prompt(self, step: str, context: Dict[str, Any]) -> str:
32
  dataset_xml = context.get("dataset_xml")
33
  readme_text = context.get("readme_text")
 
35
  output_root = context.get("output_root", "./bids_output")
36
 
37
  if step == "summary":
38
+ return prompts.summarize_dataset_prompt(
 
 
 
 
 
 
 
39
  dataset_xml=dataset_xml,
40
  readme_text=readme_text,
41
  publication_text=publication_text,
42
  )
43
  if step == "create_metadata":
44
+ return prompts.create_metadata_prompt(
45
  output_root=output_root,
46
  dataset_xml=dataset_xml,
47
  readme_text=readme_text,
48
  publication_text=publication_text,
49
  )
50
  if step == "create_structure":
51
+ return prompts.create_structure_prompt(
52
  output_root=output_root,
53
  dataset_xml=dataset_xml,
54
  readme_text=readme_text,
55
  publication_text=publication_text,
56
  )
57
  if step == "rename_move":
58
+ return prompts.rename_and_move_prompt(
59
  output_root=output_root,
60
  dataset_xml=dataset_xml,
61
  readme_text=readme_text,
 
64
  raise ValueError(f"Unknown step: {step}")
65
 
66
  def run_step(self, step: str, context: Dict[str, Any]) -> str:
67
+ system_msg = prompts.system_prompt()
68
  user_msg = self._build_user_prompt(step, context)
69
  resp = self.llm(
70
  messages=[
 
75
  )
76
  return resp[0]
77
 
78
+ def run_query(self, query: str) -> str:
79
+ system_msg = prompts.system_prompt()
80
+ resp = self.llm(
81
+ messages=[
82
+ {"role": "system", "content": system_msg},
83
+ {"role": "user", "content": query},
84
+ ],
85
+ temperature=self.temperature,
86
+ )
87
+ return resp[0]
88
+
89
 
90
  __all__ = ["BIDSifierAgent"]
91
 
cli.py CHANGED
@@ -60,7 +60,11 @@ def _print_commands(commands: List[str]) -> None:
60
  if not commands:
61
  print("(No commands detected in fenced bash block.)")
62
  return
63
- print("\nProposed commands (NOT executed):")
 
 
 
 
64
  for c in commands:
65
  print(f" {c}")
66
 
@@ -77,7 +81,15 @@ def short_divider(title: str) -> None:
77
  print("\n" + "=" * 80)
78
  print(title)
79
  print("=" * 80 + "\n")
80
-
 
 
 
 
 
 
 
 
81
 
82
  def main(argv: Optional[List[str]] = None) -> int:
83
  parser = argparse.ArgumentParser(
@@ -106,6 +118,7 @@ def main(argv: Optional[List[str]] = None) -> int:
106
  "readme_text": readme_text,
107
  "publication_text": publication_text,
108
  "output_root": args.output_root,
 
109
  }
110
 
111
  command_env = {
@@ -123,40 +136,36 @@ def main(argv: Optional[List[str]] = None) -> int:
123
  short_divider("Step 1: Understand dataset")
124
  summary = agent.run_step("summary", context)
125
  print(summary)
 
126
  if not prompt_yes_no("Proceed to create BIDS root?", default=True):
127
  return 0
128
-
129
- short_divider("Step 2: Propose commands to create BIDS root")
130
- root_plan = agent.run_step("create_root", context)
131
- print(root_plan)
132
- cmds = parse_commands_from_markdown(root_plan)
133
- _print_commands(cmds)
134
- if not prompt_yes_no("Proceed to create metadata files?", default=True):
135
- return 0
136
-
137
- short_divider("Step 3: Propose commands to create metadata files")
138
  meta_plan = agent.run_step("create_metadata", context)
139
  print(meta_plan)
140
  cmds = parse_commands_from_markdown(meta_plan)
141
  _print_commands(cmds)
 
142
  if not prompt_yes_no("Proceed to create empty BIDS structure?", default=True):
143
  return 0
144
 
145
- short_divider("Step 4: Propose commands to create dataset structure")
146
  struct_plan = agent.run_step("create_structure", context)
147
  print(struct_plan)
148
  cmds = parse_commands_from_markdown(struct_plan)
149
  _print_commands(cmds)
 
150
  if not prompt_yes_no("Proceed to propose renaming/moving?", default=True):
151
  return 0
152
 
153
- short_divider("Step 5: Propose commands to rename/move files")
154
  move_plan = agent.run_step("rename_move", context)
155
  print(move_plan)
156
  cmds = parse_commands_from_markdown(move_plan)
157
  _print_commands(cmds)
 
158
 
159
- print("\nAll steps completed. Commands were only displayed (never executed). Use them manually or in a future Gradio/HF Space interface.")
160
  return 0
161
 
162
 
 
60
  if not commands:
61
  print("(No commands detected in fenced bash block.)")
62
  return
63
+ print("-----"*10)
64
+
65
+ print("COMMANDS TO EXECUTE:")
66
+
67
+ print("-----"*10)
68
  for c in commands:
69
  print(f" {c}")
70
 
 
81
  print("\n" + "=" * 80)
82
  print(title)
83
  print("=" * 80 + "\n")
84
+
85
+ def enter_feedback_loop(agent: BIDSifierAgent, context: dict) -> dict:
86
+ feedback = input("\nAny comments or corrections to the summary? (press Enter to skip): ").strip()
87
+ while feedback:
88
+ context["user_feedback"] += feedback
89
+ agent_response = agent.run_query(feedback)
90
+ print(agent_response)
91
+ feedback = input("\nAny additional comments or corrections? (press Enter to skip): ").strip()
92
+ return context
93
 
94
  def main(argv: Optional[List[str]] = None) -> int:
95
  parser = argparse.ArgumentParser(
 
118
  "readme_text": readme_text,
119
  "publication_text": publication_text,
120
  "output_root": args.output_root,
121
+ "user_feedback": "",
122
  }
123
 
124
  command_env = {
 
136
  short_divider("Step 1: Understand dataset")
137
  summary = agent.run_step("summary", context)
138
  print(summary)
139
+ context = enter_feedback_loop(agent, context)
140
  if not prompt_yes_no("Proceed to create BIDS root?", default=True):
141
  return 0
142
+
143
+ short_divider("Step 2: Propose commands to create metadata files")
 
 
 
 
 
 
 
 
144
  meta_plan = agent.run_step("create_metadata", context)
145
  print(meta_plan)
146
  cmds = parse_commands_from_markdown(meta_plan)
147
  _print_commands(cmds)
148
+ context = enter_feedback_loop(agent, context)
149
  if not prompt_yes_no("Proceed to create empty BIDS structure?", default=True):
150
  return 0
151
 
152
+ short_divider("Step 3: Propose commands to create dataset structure")
153
  struct_plan = agent.run_step("create_structure", context)
154
  print(struct_plan)
155
  cmds = parse_commands_from_markdown(struct_plan)
156
  _print_commands(cmds)
157
+ context = enter_feedback_loop(agent, context)
158
  if not prompt_yes_no("Proceed to propose renaming/moving?", default=True):
159
  return 0
160
 
161
+ short_divider("Step 4: Propose commands to rename/move files")
162
  move_plan = agent.run_step("rename_move", context)
163
  print(move_plan)
164
  cmds = parse_commands_from_markdown(move_plan)
165
  _print_commands(cmds)
166
+ context = enter_feedback_loop(agent, context)
167
 
168
+ print("\nAll steps completed. Commands were only displayed - use them manually")
169
  return 0
170
 
171
 
extract_dir_structure_xml.py CHANGED
@@ -2,8 +2,6 @@ import sys
2
  from pathlib import Path
3
  import xml.etree.ElementTree as ET
4
 
5
- FOLDER_LOCATION = sys.argv[1] if len(sys.argv) > 1 else "sample_dataset"
6
-
7
  def build_xml(dir_path: Path, root_level: bool = True) -> ET.Element:
8
  """
9
  Recursively build the XML tree.
@@ -43,7 +41,7 @@ def indent(elem: ET.Element, level: int = 0) -> None:
43
  if level and (not elem.tail or not elem.tail.strip()):
44
  elem.tail = i
45
 
46
- def main(directory: str = FOLDER_LOCATION) -> None:
47
  path = Path(directory)
48
  if not path.is_dir():
49
  print(f"Error: '{directory}' is not a directory.", file=sys.stderr)
 
2
  from pathlib import Path
3
  import xml.etree.ElementTree as ET
4
 
 
 
5
  def build_xml(dir_path: Path, root_level: bool = True) -> ET.Element:
6
  """
7
  Recursively build the XML tree.
 
41
  if level and (not elem.tail or not elem.tail.strip()):
42
  elem.tail = i
43
 
44
+ def main(directory: str = "sample_dataset") -> None:
45
  path = Path(directory)
46
  if not path.is_dir():
47
  print(f"Error: '{directory}' is not a directory.", file=sys.stderr)
prompts.py CHANGED
@@ -33,7 +33,7 @@ def system_prompt() -> str:
33
 
34
  def summarize_dataset_prompt(*, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
35
  return f"""
36
- Step 1/5 β€” Understand the dataset and produce a short summary.
37
 
38
  Requirements:
39
  - 8–15 concise bullets covering subjects/sessions, modalities (T1w/T2w/DWI/fMRI/etc.), tasks, naming patterns, id conventions.
@@ -47,25 +47,11 @@ Output:
47
  """
48
 
49
 
50
- def create_root_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
51
- return f"""
52
- Step 2/5 β€” Propose commands to create a new BIDS root directory.
53
-
54
- Constraints:
55
- - Use $OUTPUT_ROOT if present, otherwise use: {output_root}
56
- - Use mkdir -p; don't overwrite existing files.
57
- - Optionally create a minimal skeleton (.bidsignore, empty dirs if helpful).
58
-
59
- Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
60
-
61
- Output:
62
- - A brief plan (2–5 bullets) followed by exactly one fenced bash block with commands only.
63
- """
64
 
65
 
66
  def create_metadata_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
67
  return f"""
68
- Step 3/5 β€” Propose commands to create required BIDS metadata files.
69
 
70
  Must include:
71
  - dataset_description.json (Name, BIDSVersion, License if known)
@@ -86,7 +72,7 @@ Output:
86
 
87
  def create_structure_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
88
  return f"""
89
- Step 4/5 β€” Propose commands to create the BIDS directory structure.
90
 
91
  Goals:
92
  - Infer subjects, sessions, and modalities; create sub-<label>/, optional ses-<label>/, and modality folders (anat, dwi, func, fmap, etc.).
@@ -105,7 +91,7 @@ Output:
105
 
106
  def rename_and_move_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
107
  return f"""
108
- Step 5/5 β€” Propose commands to rename and move files into the BIDS structure.
109
 
110
  Requirements:
111
  - Map original names to BIDS filenames; demonstrate patterns (e.g., with find/xargs) carefully.
 
33
 
34
  def summarize_dataset_prompt(*, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
35
  return f"""
36
+ Step 1/4 β€” Understand the dataset and produce a short summary.
37
 
38
  Requirements:
39
  - 8–15 concise bullets covering subjects/sessions, modalities (T1w/T2w/DWI/fMRI/etc.), tasks, naming patterns, id conventions.
 
47
  """
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
 
52
  def create_metadata_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
53
  return f"""
54
+ Step 2/4 β€” Propose commands to create required BIDS metadata files.
55
 
56
  Must include:
57
  - dataset_description.json (Name, BIDSVersion, License if known)
 
72
 
73
  def create_structure_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
74
  return f"""
75
+ Step 3/4 β€” Propose commands to create the BIDS directory structure.
76
 
77
  Goals:
78
  - Infer subjects, sessions, and modalities; create sub-<label>/, optional ses-<label>/, and modality folders (anat, dwi, func, fmap, etc.).
 
91
 
92
  def rename_and_move_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
93
  return f"""
94
+ Step 4/4 β€” Propose commands to rename and move files into the BIDS structure.
95
 
96
  Requirements:
97
  - Map original names to BIDS filenames; demonstrate patterns (e.g., with find/xargs) carefully.
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  openai>=1.52.0
2
  python-dotenv>=1.0.1
 
 
1
  openai>=1.52.0
2
  python-dotenv>=1.0.1
3
+ dspy