Spaces:

stefanches
/

OpenBIDSifier

Sleeping

App Files Files Community

earrieta commited on Nov 20, 2025

Commit

4379b25

1 Parent(s): 7428353

vibe coded the cli and the llm

Browse files

Files changed (6) hide show

__main__.py +4 -0
agent.py +73 -4
cli.py +139 -0
prompts.py +124 -0
requirements.txt +2 -18
tools.py +0 -0

__main__.py CHANGED Viewed

	@@ -0,0 +1,4 @@

+from cli import main
+if __name__ == "__main__":
+	raise SystemExit(main())

agent.py CHANGED Viewed

@@ -1,10 +1,79 @@
-import openai
 import os
 from dotenv import load_dotenv
-load_dotenv()
-openai.api_key = os.getenv("OPENAI_API_KEY")
-client = OpenAI()

+from typing import Optional, Dict, Any
 import os
 from dotenv import load_dotenv
+from openai import OpenAI
+import prompts as prompts_mod
+class BIDSifierAgent:
+	"""Wrapper around OpenAI chat API for step-wise BIDSification."""
+	def __init__(self, *, model: Optional[str] = None, temperature: float = 0.2):
+		load_dotenv()
+		if not os.getenv("OPENAI_API_KEY"):
+			raise RuntimeError("OPENAI_API_KEY not set in environment.")
+		self.client = OpenAI()
+		self.model = model or os.getenv("BIDSIFIER_MODEL", "gpt-4o-mini")
+		self.temperature = temperature
+	def _build_user_prompt(self, step: str, context: Dict[str, Any]) -> str:
+		dataset_xml = context.get("dataset_xml")
+		readme_text = context.get("readme_text")
+		publication_text = context.get("publication_text")
+		output_root = context.get("output_root", "./bids_output")
+		if step == "summary":
+			return prompts_mod.summarize_dataset_prompt(
+				dataset_xml=dataset_xml,
+				readme_text=readme_text,
+				publication_text=publication_text,
+			)
+		if step == "create_root":
+			return prompts_mod.create_root_prompt(
+				output_root=output_root,
+				dataset_xml=dataset_xml,
+				readme_text=readme_text,
+				publication_text=publication_text,
+			)
+		if step == "create_metadata":
+			return prompts_mod.create_metadata_prompt(
+				output_root=output_root,
+				dataset_xml=dataset_xml,
+				readme_text=readme_text,
+				publication_text=publication_text,
+			)
+		if step == "create_structure":
+			return prompts_mod.create_structure_prompt(
+				output_root=output_root,
+				dataset_xml=dataset_xml,
+				readme_text=readme_text,
+				publication_text=publication_text,
+			)
+		if step == "rename_move":
+			return prompts_mod.rename_and_move_prompt(
+				output_root=output_root,
+				dataset_xml=dataset_xml,
+				readme_text=readme_text,
+				publication_text=publication_text,
+			)
+		raise ValueError(f"Unknown step: {step}")
+	def run_step(self, step: str, context: Dict[str, Any]) -> str:
+		system_msg = prompts_mod.system_prompt()
+		user_msg = self._build_user_prompt(step, context)
+		resp = self.client.chat.completions.create(
+			model=self.model,
+			temperature=self.temperature,
+			messages=[
+				{"role": "system", "content": system_msg},
+				{"role": "user", "content": user_msg},
+			],
+		)
+		return resp.choices[0].message.content
+__all__ = ["BIDSifierAgent"]

cli.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import argparse
+import os
+import re
+import sys
+from typing import List, Optional
+from agent import BIDSifierAgent
+def _read_optional(path: Optional[str]) -> Optional[str]:
+    if not path:
+        return None
+    if not os.path.isfile(path):
+        raise FileNotFoundError(f"File not found: {path}")
+    with open(path, "r", encoding="utf-8", errors="ignore") as f:
+        return f.read()
+def parse_commands_from_markdown(markdown: str) -> List[str]:
+    """Extract the first bash/sh fenced code block and return one command per line."""
+    pattern = re.compile(r"```(?:bash|sh)\n(.*?)```", re.DOTALL | re.IGNORECASE)
+    m = pattern.search(markdown)
+    if not m:
+        return []
+    block = m.group(1)
+    commands: List[str] = []
+    for raw in block.splitlines():
+        line = raw.strip()
+        if not line or line.startswith("#"):
+            continue
+        commands.append(line)
+    return commands
+def _print_commands(commands: List[str]) -> None:
+    if not commands:
+        print("(No commands detected in fenced bash block.)")
+        return
+    print("\nProposed commands (NOT executed):")
+    for c in commands:
+        print(f"  {c}")
+def prompt_yes_no(question: str, default: bool = False) -> bool:
+    suffix = "[Y/n]" if default else "[y/N]"
+    ans = input(f"{question} {suffix} ").strip().lower()
+    if not ans:
+        return default
+    return ans in {"y", "yes"}
+def short_divider(title: str) -> None:
+    print("\n" + "=" * 80)
+    print(title)
+    print("=" * 80 + "\n")
+def main(argv: Optional[List[str]] = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="bidsifier",
+        description="Interactive LLM assistant to convert a dataset into BIDS via stepwise shell commands.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument("--dataset-xml", dest="dataset_xml_path", help="Path to dataset structure XML", required=False)
+    parser.add_argument("--readme", dest="readme_path", help="Path to dataset README file", required=False)
+    parser.add_argument("--publication", dest="publication_path", help="Path to a publication/notes file", required=False)
+    parser.add_argument("--output-root", dest="output_root", help="Target BIDS root directory", required=True)
+    parser.add_argument("--model", dest="model", help="OpenAI model name", default=os.getenv("BIDSIFIER_MODEL", "gpt-4o-mini"))
+    # Execution is intentionally disabled; we only display commands.
+    # Keeping --dry-run for backward compatibility (no effect other than display).
+    parser.add_argument("--dry-run", dest="dry_run", help="Display-only (default behavior)", action="store_true")
+    args = parser.parse_args(argv)
+    dataset_xml = _read_optional(args.dataset_xml_path)
+    readme_text = _read_optional(args.readme_path)
+    publication_text = _read_optional(args.publication_path)
+    context = {
+        "dataset_xml": dataset_xml,
+        "readme_text": readme_text,
+        "publication_text": publication_text,
+        "output_root": args.output_root,
+    }
+    command_env = {
+        "OUTPUT_ROOT": args.output_root,
+    }
+    if args.dataset_xml_path:
+        command_env["DATASET_XML_PATH"] = os.path.abspath(args.dataset_xml_path)
+    if args.readme_path:
+        command_env["README_PATH"] = os.path.abspath(args.readme_path)
+    if args.publication_path:
+        command_env["PUBLICATION_PATH"] = os.path.abspath(args.publication_path)
+    agent = BIDSifierAgent(model=args.model)
+    short_divider("Step 1: Understand dataset")
+    summary = agent.run_step("summary", context)
+    print(summary)
+    if not prompt_yes_no("Proceed to create BIDS root?", default=True):
+        return 0
+    short_divider("Step 2: Propose commands to create BIDS root")
+    root_plan = agent.run_step("create_root", context)
+    print(root_plan)
+    cmds = parse_commands_from_markdown(root_plan)
+    _print_commands(cmds)
+    if not prompt_yes_no("Proceed to create metadata files?", default=True):
+        return 0
+    short_divider("Step 3: Propose commands to create metadata files")
+    meta_plan = agent.run_step("create_metadata", context)
+    print(meta_plan)
+    cmds = parse_commands_from_markdown(meta_plan)
+    _print_commands(cmds)
+    if not prompt_yes_no("Proceed to create empty BIDS structure?", default=True):
+        return 0
+    short_divider("Step 4: Propose commands to create dataset structure")
+    struct_plan = agent.run_step("create_structure", context)
+    print(struct_plan)
+    cmds = parse_commands_from_markdown(struct_plan)
+    _print_commands(cmds)
+    if not prompt_yes_no("Proceed to propose renaming/moving?", default=True):
+        return 0
+    short_divider("Step 5: Propose commands to rename/move files")
+    move_plan = agent.run_step("rename_move", context)
+    print(move_plan)
+    cmds = parse_commands_from_markdown(move_plan)
+    _print_commands(cmds)
+    print("\nAll steps completed. Commands were only displayed (never executed). Use them manually or in a future Gradio/HF Space interface.")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

prompts.py CHANGED Viewed

	@@ -0,0 +1,124 @@

+"""
+Prompt templates for the BIDSifier assistant.
+Contract expected by the CLI:
+- Exactly one bash code block (```bash ... ```), one command per line, no inline comments.
+- Prefer safe operations: mkdir -p, cp -n; avoid destructive actions unless explicitly stated.
+- Use env vars when present: $OUTPUT_ROOT, $DATASET_XML_PATH, $README_PATH, $PUBLICATION_PATH.
+"""
+from typing import Optional
+SYSTEM_PROMPT = (
+	"You are BIDSifier, an LLM assistant that proposes careful, incremental shell commands "
+	"to convert non-standard neuroimaging datasets into BIDS. Be conservative and explicit."
+)
+def _ctx(dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
+	parts = []
+	if dataset_xml:
+		parts.append("[Dataset XML]\n" + dataset_xml.strip())
+	if readme_text:
+		parts.append("[README]\n" + readme_text.strip())
+	if publication_text:
+		parts.append("[Publication]\n" + publication_text.strip())
+	return "\n\n".join(parts) if parts else "[No additional context provided]"
+def system_prompt() -> str:
+	return SYSTEM_PROMPT
+def summarize_dataset_prompt(*, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
+	return f"""
+Step 1/5 — Understand the dataset and produce a short summary.
+Requirements:
+- 8–15 concise bullets covering subjects/sessions, modalities (T1w/T2w/DWI/fMRI/etc.), tasks, naming patterns, id conventions.
+- Call out uncertainties or missing info explicitly.
+- Do not propose any commands in this step.
+Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
+Output:
+- One short paragraph (<=4 sentences) then bullets. End with open questions for the user if any.
+"""
+def create_root_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
+	return f"""
+Step 2/5 — Propose commands to create a new BIDS root directory.
+Constraints:
+- Use $OUTPUT_ROOT if present, otherwise use: {output_root}
+- Use mkdir -p; don't overwrite existing files.
+- Optionally create a minimal skeleton (.bidsignore, empty dirs if helpful).
+Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
+Output:
+- A brief plan (2–5 bullets) followed by exactly one fenced bash block with commands only.
+"""
+def create_metadata_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
+	return f"""
+Step 3/5 — Propose commands to create required BIDS metadata files.
+Must include:
+- dataset_description.json (Name, BIDSVersion, License if known)
+- participants.tsv and participants.json (headers and column descriptions; can be placeholders)
+- README and LICENSE (best guess or TODO)
+- Task/event placeholders if task fMRI is suspected
+Constraints:
+- Use $OUTPUT_ROOT if present, else {output_root}
+- Create without overwriting existing content; use here-docs or echo safely. If unsure, add TODO markers.
+Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
+Output:
+- Short rationale bullets, then a single fenced bash block with commands only.
+"""
+def create_structure_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
+	return f"""
+Step 4/5 — Propose commands to create the BIDS directory structure.
+Goals:
+- Infer subjects, sessions, and modalities; create sub-<label>/, optional ses-<label>/, and modality folders (anat, dwi, func, fmap, etc.).
+- Do not move/copy raw files yet; create empty structure only.
+Constraints:
+- Use $OUTPUT_ROOT if present, else {output_root}
+- Use mkdir -p.
+Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
+Output:
+- One plan then a single fenced bash block with commands.
+"""
+def rename_and_move_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
+	return f"""
+Step 5/5 — Propose commands to rename and move files into the BIDS structure.
+Requirements:
+- Map original names to BIDS filenames; demonstrate patterns (e.g., with find/xargs) carefully.
+- Prefer non-destructive copy (cp -n). Use mv only if explicitly stated by the user.
+- Include TODOs for ambiguous mappings; split into small chunks to facilitate review.
+Constraints:
+- Target $OUTPUT_ROOT (or {output_root}).
+- Reference inputs via env vars when possible.
+Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
+Output:
+- A brief mapping summary (text) followed by a single fenced bash block with commands only.
+"""

requirements.txt CHANGED Viewed

@@ -1,18 +1,2 @@
-annotated-types==0.7.0
-anyio==4.11.0
-certifi==2025.11.12
-distro==1.9.0
-dotenv==0.9.9
-h11==0.16.0
-httpcore==1.0.9
-httpx==0.28.1
-idna==3.11
-jiter==0.12.0
-openai==2.8.1
-pydantic==2.12.4
-pydantic_core==2.41.5
-python-dotenv==1.2.1
-sniffio==1.3.1
-tqdm==4.67.1
-typing-inspection==0.4.2
-typing_extensions==4.15.0


1	+ openai>=1.52.0
2	+ python-dotenv>=1.0.1

tools.py DELETED Viewed

File without changes