earrieta commited on
Commit
4379b25
·
1 Parent(s): 7428353

vibe coded the cli and the llm

Browse files
Files changed (6) hide show
  1. __main__.py +4 -0
  2. agent.py +73 -4
  3. cli.py +139 -0
  4. prompts.py +124 -0
  5. requirements.txt +2 -18
  6. tools.py +0 -0
__main__.py CHANGED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())
agent.py CHANGED
@@ -1,10 +1,79 @@
1
- import openai
2
  import os
3
  from dotenv import load_dotenv
4
 
5
- load_dotenv()
6
- openai.api_key = os.getenv("OPENAI_API_KEY")
7
 
8
- client = OpenAI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
 
1
+ from typing import Optional, Dict, Any
2
  import os
3
  from dotenv import load_dotenv
4
 
5
+ from openai import OpenAI
 
6
 
7
+ import prompts as prompts_mod
8
+
9
+
10
+ class BIDSifierAgent:
11
+ """Wrapper around OpenAI chat API for step-wise BIDSification."""
12
+
13
+ def __init__(self, *, model: Optional[str] = None, temperature: float = 0.2):
14
+ load_dotenv()
15
+ if not os.getenv("OPENAI_API_KEY"):
16
+ raise RuntimeError("OPENAI_API_KEY not set in environment.")
17
+ self.client = OpenAI()
18
+ self.model = model or os.getenv("BIDSIFIER_MODEL", "gpt-4o-mini")
19
+ self.temperature = temperature
20
+
21
+ def _build_user_prompt(self, step: str, context: Dict[str, Any]) -> str:
22
+ dataset_xml = context.get("dataset_xml")
23
+ readme_text = context.get("readme_text")
24
+ publication_text = context.get("publication_text")
25
+ output_root = context.get("output_root", "./bids_output")
26
+
27
+ if step == "summary":
28
+ return prompts_mod.summarize_dataset_prompt(
29
+ dataset_xml=dataset_xml,
30
+ readme_text=readme_text,
31
+ publication_text=publication_text,
32
+ )
33
+ if step == "create_root":
34
+ return prompts_mod.create_root_prompt(
35
+ output_root=output_root,
36
+ dataset_xml=dataset_xml,
37
+ readme_text=readme_text,
38
+ publication_text=publication_text,
39
+ )
40
+ if step == "create_metadata":
41
+ return prompts_mod.create_metadata_prompt(
42
+ output_root=output_root,
43
+ dataset_xml=dataset_xml,
44
+ readme_text=readme_text,
45
+ publication_text=publication_text,
46
+ )
47
+ if step == "create_structure":
48
+ return prompts_mod.create_structure_prompt(
49
+ output_root=output_root,
50
+ dataset_xml=dataset_xml,
51
+ readme_text=readme_text,
52
+ publication_text=publication_text,
53
+ )
54
+ if step == "rename_move":
55
+ return prompts_mod.rename_and_move_prompt(
56
+ output_root=output_root,
57
+ dataset_xml=dataset_xml,
58
+ readme_text=readme_text,
59
+ publication_text=publication_text,
60
+ )
61
+ raise ValueError(f"Unknown step: {step}")
62
+
63
+ def run_step(self, step: str, context: Dict[str, Any]) -> str:
64
+ system_msg = prompts_mod.system_prompt()
65
+ user_msg = self._build_user_prompt(step, context)
66
+ resp = self.client.chat.completions.create(
67
+ model=self.model,
68
+ temperature=self.temperature,
69
+ messages=[
70
+ {"role": "system", "content": system_msg},
71
+ {"role": "user", "content": user_msg},
72
+ ],
73
+ )
74
+ return resp.choices[0].message.content
75
+
76
+
77
+ __all__ = ["BIDSifierAgent"]
78
 
79
 
cli.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import re
4
+ import sys
5
+ from typing import List, Optional
6
+
7
+ from agent import BIDSifierAgent
8
+
9
+
10
+ def _read_optional(path: Optional[str]) -> Optional[str]:
11
+ if not path:
12
+ return None
13
+ if not os.path.isfile(path):
14
+ raise FileNotFoundError(f"File not found: {path}")
15
+ with open(path, "r", encoding="utf-8", errors="ignore") as f:
16
+ return f.read()
17
+
18
+
19
+ def parse_commands_from_markdown(markdown: str) -> List[str]:
20
+ """Extract the first bash/sh fenced code block and return one command per line."""
21
+ pattern = re.compile(r"```(?:bash|sh)\n(.*?)```", re.DOTALL | re.IGNORECASE)
22
+ m = pattern.search(markdown)
23
+ if not m:
24
+ return []
25
+ block = m.group(1)
26
+ commands: List[str] = []
27
+ for raw in block.splitlines():
28
+ line = raw.strip()
29
+ if not line or line.startswith("#"):
30
+ continue
31
+ commands.append(line)
32
+ return commands
33
+
34
+
35
+ def _print_commands(commands: List[str]) -> None:
36
+ if not commands:
37
+ print("(No commands detected in fenced bash block.)")
38
+ return
39
+ print("\nProposed commands (NOT executed):")
40
+ for c in commands:
41
+ print(f" {c}")
42
+
43
+
44
+ def prompt_yes_no(question: str, default: bool = False) -> bool:
45
+ suffix = "[Y/n]" if default else "[y/N]"
46
+ ans = input(f"{question} {suffix} ").strip().lower()
47
+ if not ans:
48
+ return default
49
+ return ans in {"y", "yes"}
50
+
51
+
52
+ def short_divider(title: str) -> None:
53
+ print("\n" + "=" * 80)
54
+ print(title)
55
+ print("=" * 80 + "\n")
56
+
57
+
58
+ def main(argv: Optional[List[str]] = None) -> int:
59
+ parser = argparse.ArgumentParser(
60
+ prog="bidsifier",
61
+ description="Interactive LLM assistant to convert a dataset into BIDS via stepwise shell commands.",
62
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
63
+ )
64
+ parser.add_argument("--dataset-xml", dest="dataset_xml_path", help="Path to dataset structure XML", required=False)
65
+ parser.add_argument("--readme", dest="readme_path", help="Path to dataset README file", required=False)
66
+ parser.add_argument("--publication", dest="publication_path", help="Path to a publication/notes file", required=False)
67
+ parser.add_argument("--output-root", dest="output_root", help="Target BIDS root directory", required=True)
68
+ parser.add_argument("--model", dest="model", help="OpenAI model name", default=os.getenv("BIDSIFIER_MODEL", "gpt-4o-mini"))
69
+ # Execution is intentionally disabled; we only display commands.
70
+ # Keeping --dry-run for backward compatibility (no effect other than display).
71
+ parser.add_argument("--dry-run", dest="dry_run", help="Display-only (default behavior)", action="store_true")
72
+
73
+ args = parser.parse_args(argv)
74
+
75
+ dataset_xml = _read_optional(args.dataset_xml_path)
76
+ readme_text = _read_optional(args.readme_path)
77
+ publication_text = _read_optional(args.publication_path)
78
+
79
+ context = {
80
+ "dataset_xml": dataset_xml,
81
+ "readme_text": readme_text,
82
+ "publication_text": publication_text,
83
+ "output_root": args.output_root,
84
+ }
85
+
86
+ command_env = {
87
+ "OUTPUT_ROOT": args.output_root,
88
+ }
89
+ if args.dataset_xml_path:
90
+ command_env["DATASET_XML_PATH"] = os.path.abspath(args.dataset_xml_path)
91
+ if args.readme_path:
92
+ command_env["README_PATH"] = os.path.abspath(args.readme_path)
93
+ if args.publication_path:
94
+ command_env["PUBLICATION_PATH"] = os.path.abspath(args.publication_path)
95
+
96
+ agent = BIDSifierAgent(model=args.model)
97
+
98
+ short_divider("Step 1: Understand dataset")
99
+ summary = agent.run_step("summary", context)
100
+ print(summary)
101
+ if not prompt_yes_no("Proceed to create BIDS root?", default=True):
102
+ return 0
103
+
104
+ short_divider("Step 2: Propose commands to create BIDS root")
105
+ root_plan = agent.run_step("create_root", context)
106
+ print(root_plan)
107
+ cmds = parse_commands_from_markdown(root_plan)
108
+ _print_commands(cmds)
109
+ if not prompt_yes_no("Proceed to create metadata files?", default=True):
110
+ return 0
111
+
112
+ short_divider("Step 3: Propose commands to create metadata files")
113
+ meta_plan = agent.run_step("create_metadata", context)
114
+ print(meta_plan)
115
+ cmds = parse_commands_from_markdown(meta_plan)
116
+ _print_commands(cmds)
117
+ if not prompt_yes_no("Proceed to create empty BIDS structure?", default=True):
118
+ return 0
119
+
120
+ short_divider("Step 4: Propose commands to create dataset structure")
121
+ struct_plan = agent.run_step("create_structure", context)
122
+ print(struct_plan)
123
+ cmds = parse_commands_from_markdown(struct_plan)
124
+ _print_commands(cmds)
125
+ if not prompt_yes_no("Proceed to propose renaming/moving?", default=True):
126
+ return 0
127
+
128
+ short_divider("Step 5: Propose commands to rename/move files")
129
+ move_plan = agent.run_step("rename_move", context)
130
+ print(move_plan)
131
+ cmds = parse_commands_from_markdown(move_plan)
132
+ _print_commands(cmds)
133
+
134
+ print("\nAll steps completed. Commands were only displayed (never executed). Use them manually or in a future Gradio/HF Space interface.")
135
+ return 0
136
+
137
+
138
+ if __name__ == "__main__":
139
+ sys.exit(main())
prompts.py CHANGED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Prompt templates for the BIDSifier assistant.
3
+
4
+ Contract expected by the CLI:
5
+ - Exactly one bash code block (```bash ... ```), one command per line, no inline comments.
6
+ - Prefer safe operations: mkdir -p, cp -n; avoid destructive actions unless explicitly stated.
7
+ - Use env vars when present: $OUTPUT_ROOT, $DATASET_XML_PATH, $README_PATH, $PUBLICATION_PATH.
8
+ """
9
+
10
+ from typing import Optional
11
+
12
+
13
+ SYSTEM_PROMPT = (
14
+ "You are BIDSifier, an LLM assistant that proposes careful, incremental shell commands "
15
+ "to convert non-standard neuroimaging datasets into BIDS. Be conservative and explicit."
16
+ )
17
+
18
+
19
+ def _ctx(dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
20
+ parts = []
21
+ if dataset_xml:
22
+ parts.append("[Dataset XML]\n" + dataset_xml.strip())
23
+ if readme_text:
24
+ parts.append("[README]\n" + readme_text.strip())
25
+ if publication_text:
26
+ parts.append("[Publication]\n" + publication_text.strip())
27
+ return "\n\n".join(parts) if parts else "[No additional context provided]"
28
+
29
+
30
+ def system_prompt() -> str:
31
+ return SYSTEM_PROMPT
32
+
33
+
34
+ def summarize_dataset_prompt(*, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
35
+ return f"""
36
+ Step 1/5 — Understand the dataset and produce a short summary.
37
+
38
+ Requirements:
39
+ - 8–15 concise bullets covering subjects/sessions, modalities (T1w/T2w/DWI/fMRI/etc.), tasks, naming patterns, id conventions.
40
+ - Call out uncertainties or missing info explicitly.
41
+ - Do not propose any commands in this step.
42
+
43
+ Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
44
+
45
+ Output:
46
+ - One short paragraph (<=4 sentences) then bullets. End with open questions for the user if any.
47
+ """
48
+
49
+
50
+ def create_root_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
51
+ return f"""
52
+ Step 2/5 — Propose commands to create a new BIDS root directory.
53
+
54
+ Constraints:
55
+ - Use $OUTPUT_ROOT if present, otherwise use: {output_root}
56
+ - Use mkdir -p; don't overwrite existing files.
57
+ - Optionally create a minimal skeleton (.bidsignore, empty dirs if helpful).
58
+
59
+ Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
60
+
61
+ Output:
62
+ - A brief plan (2–5 bullets) followed by exactly one fenced bash block with commands only.
63
+ """
64
+
65
+
66
+ def create_metadata_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
67
+ return f"""
68
+ Step 3/5 — Propose commands to create required BIDS metadata files.
69
+
70
+ Must include:
71
+ - dataset_description.json (Name, BIDSVersion, License if known)
72
+ - participants.tsv and participants.json (headers and column descriptions; can be placeholders)
73
+ - README and LICENSE (best guess or TODO)
74
+ - Task/event placeholders if task fMRI is suspected
75
+
76
+ Constraints:
77
+ - Use $OUTPUT_ROOT if present, else {output_root}
78
+ - Create without overwriting existing content; use here-docs or echo safely. If unsure, add TODO markers.
79
+
80
+ Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
81
+
82
+ Output:
83
+ - Short rationale bullets, then a single fenced bash block with commands only.
84
+ """
85
+
86
+
87
+ def create_structure_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
88
+ return f"""
89
+ Step 4/5 — Propose commands to create the BIDS directory structure.
90
+
91
+ Goals:
92
+ - Infer subjects, sessions, and modalities; create sub-<label>/, optional ses-<label>/, and modality folders (anat, dwi, func, fmap, etc.).
93
+ - Do not move/copy raw files yet; create empty structure only.
94
+
95
+ Constraints:
96
+ - Use $OUTPUT_ROOT if present, else {output_root}
97
+ - Use mkdir -p.
98
+
99
+ Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
100
+
101
+ Output:
102
+ - One plan then a single fenced bash block with commands.
103
+ """
104
+
105
+
106
+ def rename_and_move_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
107
+ return f"""
108
+ Step 5/5 — Propose commands to rename and move files into the BIDS structure.
109
+
110
+ Requirements:
111
+ - Map original names to BIDS filenames; demonstrate patterns (e.g., with find/xargs) carefully.
112
+ - Prefer non-destructive copy (cp -n). Use mv only if explicitly stated by the user.
113
+ - Include TODOs for ambiguous mappings; split into small chunks to facilitate review.
114
+
115
+ Constraints:
116
+ - Target $OUTPUT_ROOT (or {output_root}).
117
+ - Reference inputs via env vars when possible.
118
+
119
+ Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
120
+
121
+ Output:
122
+ - A brief mapping summary (text) followed by a single fenced bash block with commands only.
123
+ """
124
+
requirements.txt CHANGED
@@ -1,18 +1,2 @@
1
- annotated-types==0.7.0
2
- anyio==4.11.0
3
- certifi==2025.11.12
4
- distro==1.9.0
5
- dotenv==0.9.9
6
- h11==0.16.0
7
- httpcore==1.0.9
8
- httpx==0.28.1
9
- idna==3.11
10
- jiter==0.12.0
11
- openai==2.8.1
12
- pydantic==2.12.4
13
- pydantic_core==2.41.5
14
- python-dotenv==1.2.1
15
- sniffio==1.3.1
16
- tqdm==4.67.1
17
- typing-inspection==0.4.2
18
- typing_extensions==4.15.0
 
1
+ openai>=1.52.0
2
+ python-dotenv>=1.0.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools.py DELETED
File without changes