Spaces:

stefanches
/

OpenBIDSifier

Sleeping

OpenBIDSifier / LM_Studio_chat_v3_file_summery

NewLaptop_

Lucky LM studio attempt

a7ff3d0 7 months ago

4.53 kB

	import requests
	import json
	import os

	url = "http://localhost:1234/v1/chat/completions"
	headers = {
	"Content-Type": "application/json"
	}

	# System instruction
	conversation_history = [
	{
	"role": "system",
	"content": "You are an assistant that helps build a BIDS dataset_description.json file. "
	"Because the file tree may be extremely large, you will receive only 100 items at a time."
	}
	]

	# Step 1: Collect basic dataset info
	def get_basic_dataset_info():
	print("Please provide the basic details of your dataset:")
	dataset_name = input("Dataset Name: ")
	dataset_version = input("Dataset Version: ")
	dataset_description = input("Dataset Description: ")

	# Authors
	authors_input = input("Authors (comma separated, type 'None' if unknown): ").strip()
	authors = None if authors_input.lower() == "none" else [a.strip() for a in authors_input.split(",")]

	# References
	references_input = input("References and Citations (comma separated, type 'None' if unknown): ").strip()
	references = None if references_input.lower() == "none" else [r.strip() for r in references_input.split(",")]

	return {
	"name": dataset_name,
	"version": dataset_version,
	"description": dataset_description,
	"authors": authors,
	"references": references
	}

	# Step 2: Root folder
	def get_root_folder():
	folder = input("Please provide the root folder containing the dataset files: ")
	while not os.path.isdir(folder):
	print("Invalid folder. Try again.")
	folder = input("Please provide the root folder containing the dataset files: ")
	return folder

	# Step 3: Load folder content in batches of 100
	def get_folder_batches(folder, batch_size=100):
	items = os.listdir(folder)
	full_paths = [os.path.join(folder, item) for item in items]

	# Break into batches of up to 100
	for i in range(0, len(full_paths), batch_size):
	yield full_paths[i:i + batch_size], i // batch_size + 1 # return (batch_items, batch_number)

	# Step 4: LLM interaction + JSON building
	def process_and_build_json(batches, basic_info):
	dataset_description = {
	"Name": basic_info["name"],
	"BIDSVersion": "1.0.0",
	"DatasetType": "raw",
	"License": "CC0",
	"Authors": basic_info["authors"] if basic_info["authors"] else "None",
	"Acknowledgements": "None",
	"HowToAcknowledge": "None",
	"ReferencesAndLinks": basic_info["references"] if basic_info["references"] else "None",
	"DatasetDescription": basic_info["description"]
	}

	for batch_items, batch_number in batches:
	# Describe what is being sent
	message = (
	f"Batch {batch_number}: Here are up to 100 items from the dataset root.\n"
	f"Total items in this batch: {len(batch_items)}\n"
	f"Items:\n" + "\n".join(batch_items)
	)

	conversation_history.append({"role": "user", "content": message})

	data = {
	"model": "deepseek/deepseek-r1-0528-qwen3-8b",
	"messages": conversation_history,
	"temperature": 0.7,
	"max_tokens": 500,
	"stream": False
	}

	response = requests.post(url, headers=headers, data=json.dumps(data))
	model_response = response.json()
	last_message = model_response['choices'][0]['message']['content']

	print("\n--- LLM Response for Batch", batch_number, "---")
	print(last_message)

	conversation_history.append({"role": "assistant", "content": last_message})

	# Store response under the batch key
	dataset_description[f"batch_{batch_number}"] = last_message

	return dataset_description

	# Step 5: Save JSON
	def save_json(dataset_description):
	out = "dataset_description.json"
	with open(out, "w") as f:
	json.dump(dataset_description, f, indent=4)
	print(f"\nSaved: {out}")

	# Main
	def main():
	basic_info = get_basic_dataset_info()

	# Infer authors from citations if possible
	if basic_info["authors"] is None and basic_info["references"]:
	print("Attempting to infer authors from citations...")
	basic_info["authors"] = ["Author inferred from reference: " + r for r in basic_info["references"]]

	root_folder = get_root_folder()
	batches = get_folder_batches(root_folder, batch_size=100)

	dataset_json = process_and_build_json(batches, basic_info)
	save_json(dataset_json)

	if __name__ == "__main__":
	main()