Spaces:

stefanches
/

OpenBIDSifier

Sleeping

App Files Files Community

Ekain Arrieta commited on Nov 21, 2025

Commit

57f68aa

2 Parent(s): 9e17db1 ba7c80f

Merge branch 'main' into testing

Browse files

Files changed (8) hide show

.gitignore +1 -0
LM_Studio_chat.py +55 -0
__pycache__/agent.cpython-314.pyc +0 -0
agent.py +27 -16
cli.py +24 -15
extract_dir_structure_xml.py +1 -3
prompts.py +4 -18
requirements.txt +1 -0

.gitignore CHANGED Viewed

@@ -2,3 +2,4 @@
 __pycache__/
 .venv
 testing_structure.xml

 __pycache__/
 .venv
 testing_structure.xml
+/Non_Bids_Dataset

LM_Studio_chat.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import requests
+import json
+url = "http://localhost:1234/v1/chat/completions"
+headers = {
+    "Content-Type": "application/json"
+}
+# Initialize conversation history with the system message
+conversation_history = [
+    {
+        "role": "system",
+        "content": "Be a helpful assistant. Be concise."
+    }
+]
+while True:
+    # Get user input
+    user_input = input("You: ")
+    # Exit the loop if the user types 'exit'
+    if user_input.lower() == 'exit':
+        print("Ending conversation...")
+        break
+    # Add user's message to the conversation history
+    conversation_history.append({
+        "role": "user",
+        "content": user_input
+    })
+    # Prepare the data for the API call
+    data = {
+        "model": "deepseek/deepseek-r1-0528-qwen3-8b",
+        "messages": conversation_history,
+        "temperature": 0.7,
+        "max_tokens": -1,
+        "stream": False
+    }
+    # Make the POST request to the API
+    response = requests.post(url, headers=headers, data=json.dumps(data))
+    # Get the model's response
+    model_response = response.json()
+    # Extract and print the last model response (the assistant's content)
+    last_message = model_response['choices'][0]['message']['content']
+    print(f"Model: {last_message}")
+    # Add model's response to the conversation history for the next round
+    conversation_history.append({
+        "role": "assistant",
+        "content": last_message
+    })

__pycache__/agent.cpython-314.pyc ADDED Viewed

Binary file (4.11 kB). View file

agent.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 from dotenv import load_dotenv
 import dspy
-import prompts as prompts_mod
 class BIDSifierAgent:
@@ -13,14 +13,21 @@ class BIDSifierAgent:
 		load_dotenv()
 		if provider=="openai":
-			lm = dspy.LM(f"{provider}/{model}", api_key=os.getenv("OPENAI_API_KEY"))
 		else:
-			lm = dspy.LM(f"{provider}/{model}", api_key="")
 		dspy.configure(lm=lm)
 		self.llm = lm
 		self.model = model or os.getenv("BIDSIFIER_MODEL", "gpt-4o-mini")
 		self.temperature = temperature
 	def _build_user_prompt(self, step: str, context: Dict[str, Any]) -> str:
 		dataset_xml = context.get("dataset_xml")
 		readme_text = context.get("readme_text")
@@ -28,34 +35,27 @@ class BIDSifierAgent:
 		output_root = context.get("output_root", "./bids_output")
 		if step == "summary":
-			return prompts_mod.summarize_dataset_prompt(
-				dataset_xml=dataset_xml,
-				readme_text=readme_text,
-				publication_text=publication_text,
-			)
-		if step == "create_root":
-			return prompts_mod.create_root_prompt(
-				output_root=output_root,
 				dataset_xml=dataset_xml,
 				readme_text=readme_text,
 				publication_text=publication_text,
 			)
 		if step == "create_metadata":
-			return prompts_mod.create_metadata_prompt(
 				output_root=output_root,
 				dataset_xml=dataset_xml,
 				readme_text=readme_text,
 				publication_text=publication_text,
 			)
 		if step == "create_structure":
-			return prompts_mod.create_structure_prompt(
 				output_root=output_root,
 				dataset_xml=dataset_xml,
 				readme_text=readme_text,
 				publication_text=publication_text,
 			)
 		if step == "rename_move":
-			return prompts_mod.rename_and_move_prompt(
 				output_root=output_root,
 				dataset_xml=dataset_xml,
 				readme_text=readme_text,
@@ -64,7 +64,7 @@ class BIDSifierAgent:
 		raise ValueError(f"Unknown step: {step}")
 	def run_step(self, step: str, context: Dict[str, Any]) -> str:
-		system_msg = prompts_mod.system_prompt()
 		user_msg = self._build_user_prompt(step, context)
 		resp = self.llm(
 			messages=[
@@ -75,6 +75,17 @@ class BIDSifierAgent:
 		)
 		return resp[0]
 __all__ = ["BIDSifierAgent"]

 from dotenv import load_dotenv
 import dspy
+import prompts
 class BIDSifierAgent:
 		load_dotenv()
 		if provider=="openai":
+			if model == "gpt-5": #reasoning model that requires special handling
+				temperature = 1.0
+				lm = dspy.LM(f"{provider}/{model}", api_key=os.getenv("OPENAI_API_KEY"), temperature = temperature, max_tokens = 40000)
+			else:
+				lm = dspy.LM(f"{provider}/{model}", api_key=os.getenv("OPENAI_API_KEY"), temperature = temperature, max_tokens = 10000)
 		else:
+			lm = dspy.LM(f"{provider}/{model}", api_key="", max_tokens=10000)
 		dspy.configure(lm=lm)
 		self.llm = lm
 		self.model = model or os.getenv("BIDSIFIER_MODEL", "gpt-4o-mini")
 		self.temperature = temperature
 	def _build_user_prompt(self, step: str, context: Dict[str, Any]) -> str:
 		dataset_xml = context.get("dataset_xml")
 		readme_text = context.get("readme_text")
 		output_root = context.get("output_root", "./bids_output")
 		if step == "summary":
+			return prompts.summarize_dataset_prompt(
 				dataset_xml=dataset_xml,
 				readme_text=readme_text,
 				publication_text=publication_text,
 			)
 		if step == "create_metadata":
+			return prompts.create_metadata_prompt(
 				output_root=output_root,
 				dataset_xml=dataset_xml,
 				readme_text=readme_text,
 				publication_text=publication_text,
 			)
 		if step == "create_structure":
+			return prompts.create_structure_prompt(
 				output_root=output_root,
 				dataset_xml=dataset_xml,
 				readme_text=readme_text,
 				publication_text=publication_text,
 			)
 		if step == "rename_move":
+			return prompts.rename_and_move_prompt(
 				output_root=output_root,
 				dataset_xml=dataset_xml,
 				readme_text=readme_text,
 		raise ValueError(f"Unknown step: {step}")
 	def run_step(self, step: str, context: Dict[str, Any]) -> str:
+		system_msg = prompts.system_prompt()
 		user_msg = self._build_user_prompt(step, context)
 		resp = self.llm(
 			messages=[
 		)
 		return resp[0]
+	def run_query(self, query: str) -> str:
+		system_msg = prompts.system_prompt()
+		resp = self.llm(
+			messages=[
+				{"role": "system", "content": system_msg},
+				{"role": "user", "content": query},
+			],
+			temperature=self.temperature,
+		)
+		return resp[0]
 __all__ = ["BIDSifierAgent"]

cli.py CHANGED Viewed

@@ -60,7 +60,11 @@ def _print_commands(commands: List[str]) -> None:
     if not commands:
         print("(No commands detected in fenced bash block.)")
         return
-    print("\nProposed commands (NOT executed):")
     for c in commands:
         print(f"  {c}")
@@ -77,7 +81,15 @@ def short_divider(title: str) -> None:
     print("\n" + "=" * 80)
     print(title)
     print("=" * 80 + "\n")
 def main(argv: Optional[List[str]] = None) -> int:
     parser = argparse.ArgumentParser(
@@ -106,6 +118,7 @@ def main(argv: Optional[List[str]] = None) -> int:
         "readme_text": readme_text,
         "publication_text": publication_text,
         "output_root": args.output_root,
     }
     command_env = {
@@ -123,40 +136,36 @@ def main(argv: Optional[List[str]] = None) -> int:
     short_divider("Step 1: Understand dataset")
     summary = agent.run_step("summary", context)
     print(summary)
     if not prompt_yes_no("Proceed to create BIDS root?", default=True):
         return 0
-    short_divider("Step 2: Propose commands to create BIDS root")
-    root_plan = agent.run_step("create_root", context)
-    print(root_plan)
-    cmds = parse_commands_from_markdown(root_plan)
-    _print_commands(cmds)
-    if not prompt_yes_no("Proceed to create metadata files?", default=True):
-        return 0
-    short_divider("Step 3: Propose commands to create metadata files")
     meta_plan = agent.run_step("create_metadata", context)
     print(meta_plan)
     cmds = parse_commands_from_markdown(meta_plan)
     _print_commands(cmds)
     if not prompt_yes_no("Proceed to create empty BIDS structure?", default=True):
         return 0
-    short_divider("Step 4: Propose commands to create dataset structure")
     struct_plan = agent.run_step("create_structure", context)
     print(struct_plan)
     cmds = parse_commands_from_markdown(struct_plan)
     _print_commands(cmds)
     if not prompt_yes_no("Proceed to propose renaming/moving?", default=True):
         return 0
-    short_divider("Step 5: Propose commands to rename/move files")
     move_plan = agent.run_step("rename_move", context)
     print(move_plan)
     cmds = parse_commands_from_markdown(move_plan)
     _print_commands(cmds)
-    print("\nAll steps completed. Commands were only displayed (never executed). Use them manually or in a future Gradio/HF Space interface.")
     return 0

     if not commands:
         print("(No commands detected in fenced bash block.)")
         return
+    print("-----"*10)
+    print("COMMANDS TO EXECUTE:")
+    print("-----"*10)
     for c in commands:
         print(f"  {c}")
     print("\n" + "=" * 80)
     print(title)
     print("=" * 80 + "\n")
+def enter_feedback_loop(agent: BIDSifierAgent, context: dict) -> dict:
+    feedback = input("\nAny comments or corrections to the summary? (press Enter to skip): ").strip()
+    while feedback:
+        context["user_feedback"] += feedback
+        agent_response = agent.run_query(feedback)
+        print(agent_response)
+        feedback = input("\nAny additional comments or corrections? (press Enter to skip): ").strip()
+    return context
 def main(argv: Optional[List[str]] = None) -> int:
     parser = argparse.ArgumentParser(
         "readme_text": readme_text,
         "publication_text": publication_text,
         "output_root": args.output_root,
+        "user_feedback": "",
     }
     command_env = {
     short_divider("Step 1: Understand dataset")
     summary = agent.run_step("summary", context)
     print(summary)
+    context = enter_feedback_loop(agent, context)
     if not prompt_yes_no("Proceed to create BIDS root?", default=True):
         return 0
+    short_divider("Step 2: Propose commands to create metadata files")
     meta_plan = agent.run_step("create_metadata", context)
     print(meta_plan)
     cmds = parse_commands_from_markdown(meta_plan)
     _print_commands(cmds)
+    context = enter_feedback_loop(agent, context)
     if not prompt_yes_no("Proceed to create empty BIDS structure?", default=True):
         return 0
+    short_divider("Step 3: Propose commands to create dataset structure")
     struct_plan = agent.run_step("create_structure", context)
     print(struct_plan)
     cmds = parse_commands_from_markdown(struct_plan)
     _print_commands(cmds)
+    context = enter_feedback_loop(agent, context)
     if not prompt_yes_no("Proceed to propose renaming/moving?", default=True):
         return 0
+    short_divider("Step 4: Propose commands to rename/move files")
     move_plan = agent.run_step("rename_move", context)
     print(move_plan)
     cmds = parse_commands_from_markdown(move_plan)
     _print_commands(cmds)
+    context = enter_feedback_loop(agent, context)
+    print("\nAll steps completed. Commands were only displayed - use them manually")
     return 0

extract_dir_structure_xml.py CHANGED Viewed

@@ -2,8 +2,6 @@ import sys
 from pathlib import Path
 import xml.etree.ElementTree as ET
-FOLDER_LOCATION = sys.argv[1] if len(sys.argv) > 1 else "sample_dataset"
 def build_xml(dir_path: Path, root_level: bool = True) -> ET.Element:
     """
     Recursively build the XML tree.
@@ -43,7 +41,7 @@ def indent(elem: ET.Element, level: int = 0) -> None:
     if level and (not elem.tail or not elem.tail.strip()):
         elem.tail = i
-def main(directory: str = FOLDER_LOCATION) -> None:
     path = Path(directory)
     if not path.is_dir():
         print(f"Error: '{directory}' is not a directory.", file=sys.stderr)

 from pathlib import Path
 import xml.etree.ElementTree as ET
 def build_xml(dir_path: Path, root_level: bool = True) -> ET.Element:
     """
     Recursively build the XML tree.
     if level and (not elem.tail or not elem.tail.strip()):
         elem.tail = i
+def main(directory: str = "sample_dataset") -> None:
     path = Path(directory)
     if not path.is_dir():
         print(f"Error: '{directory}' is not a directory.", file=sys.stderr)

prompts.py CHANGED Viewed

@@ -33,7 +33,7 @@ def system_prompt() -> str:
 def summarize_dataset_prompt(*, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
 	return f"""
-Step 1/5 — Understand the dataset and produce a short summary.
 Requirements:
 - 8–15 concise bullets covering subjects/sessions, modalities (T1w/T2w/DWI/fMRI/etc.), tasks, naming patterns, id conventions.
@@ -47,25 +47,11 @@ Output:
 """
-def create_root_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
-	return f"""
-Step 2/5 — Propose commands to create a new BIDS root directory.
-Constraints:
-- Use $OUTPUT_ROOT if present, otherwise use: {output_root}
-- Use mkdir -p; don't overwrite existing files.
-- Optionally create a minimal skeleton (.bidsignore, empty dirs if helpful).
-Context:\n{_ctx(dataset_xml, readme_text, publication_text)}
-Output:
-- A brief plan (2–5 bullets) followed by exactly one fenced bash block with commands only.
-"""
 def create_metadata_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
 	return f"""
-Step 3/5 — Propose commands to create required BIDS metadata files.
 Must include:
 - dataset_description.json (Name, BIDSVersion, License if known)
@@ -86,7 +72,7 @@ Output:
 def create_structure_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
 	return f"""
-Step 4/5 — Propose commands to create the BIDS directory structure.
 Goals:
 - Infer subjects, sessions, and modalities; create sub-<label>/, optional ses-<label>/, and modality folders (anat, dwi, func, fmap, etc.).
@@ -105,7 +91,7 @@ Output:
 def rename_and_move_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
 	return f"""
-Step 5/5 — Propose commands to rename and move files into the BIDS structure.
 Requirements:
 - Map original names to BIDS filenames; demonstrate patterns (e.g., with find/xargs) carefully.

 def summarize_dataset_prompt(*, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
 	return f"""
+Step 1/4 — Understand the dataset and produce a short summary.
 Requirements:
 - 8–15 concise bullets covering subjects/sessions, modalities (T1w/T2w/DWI/fMRI/etc.), tasks, naming patterns, id conventions.
 """
 def create_metadata_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
 	return f"""
+Step 2/4 — Propose commands to create required BIDS metadata files.
 Must include:
 - dataset_description.json (Name, BIDSVersion, License if known)
 def create_structure_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
 	return f"""
+Step 3/4 — Propose commands to create the BIDS directory structure.
 Goals:
 - Infer subjects, sessions, and modalities; create sub-<label>/, optional ses-<label>/, and modality folders (anat, dwi, func, fmap, etc.).
 def rename_and_move_prompt(*, output_root: str, dataset_xml: Optional[str], readme_text: Optional[str], publication_text: Optional[str]) -> str:
 	return f"""
+Step 4/4 — Propose commands to rename and move files into the BIDS structure.
 Requirements:
 - Map original names to BIDS filenames; demonstrate patterns (e.g., with find/xargs) carefully.

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 openai>=1.52.0
 python-dotenv>=1.0.1

 openai>=1.52.0
 python-dotenv>=1.0.1
+dspy