{ "bomFormat": "CycloneDX", "specVersion": "1.6", "serialNumber": "urn:uuid:e6ef2cd0-b2c1-4b2e-ba7c-adb01c441786", "version": 1, "metadata": { "timestamp": "2025-06-05T09:34:41.455909+00:00", "component": { "type": "machine-learning-model", "bom-ref": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B-6784ad22-545f-555e-aac7-abd08a7c4fc0", "name": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B", "externalReferences": [ { "url": "https://huggingface.co/cognitivecomputations/Dolphin3.0-R1-Mistral-24B", "type": "documentation" } ], "modelCard": { "modelParameters": { "task": "text-generation", "architectureFamily": "mistral", "modelArchitecture": "MistralForCausalLM", "datasets": [ { "ref": "cognitivecomputations/dolphin-r1-e86781ae-297a-5171-8a0d-61460dd40738" }, { "ref": "OpenCoder-LLM/opc-sft-stage1-f0572249-17d5-5847-8e5a-72583934eca0" }, { "ref": "OpenCoder-LLM/opc-sft-stage2-d7e9795c-e343-5ebc-b785-9718e7d737e8" }, { "ref": "microsoft/orca-agentinstruct-1M-v1-bbc92138-5aa0-5737-8ce6-93043b04b4dd" }, { "ref": "microsoft/orca-math-word-problems-200k-611afa9f-b6db-5b9f-9a51-598e4ce79d0e" }, { "ref": "NousResearch/hermes-function-calling-v1-a6d53a4d-e191-5d88-867f-4472e0bdc9f6" }, { "ref": "AI-MO/NuminaMath-CoT-3aa976f5-9ca4-5435-8542-1a123856aafb" }, { "ref": "AI-MO/NuminaMath-TIR-7930ec33-be02-5b4c-9fd0-00effafbc6ce" }, { "ref": "allenai/tulu-3-sft-mixture-4f86da52-fbf0-52de-9b77-716bafb7e098" }, { "ref": "cognitivecomputations/dolphin-coder-69688d29-ae99-5d6e-828c-cfc37b7221b1" }, { "ref": "HuggingFaceTB/smoltalk-0cc0e162-0f38-50bd-b5b0-169bcd97515b" }, { "ref": "cognitivecomputations/samantha-data-9c52f41f-feb0-51dd-921a-2a581d9f2fc7" }, { "ref": "m-a-p/CodeFeedback-Filtered-Instruction-9012249d-db87-5b91-a4e9-2e2bd74e6053" }, { "ref": "m-a-p/Code-Feedback-f4d189a1-046d-5a43-8007-a1eec34e9a7f" } ] }, "properties": [ { "name": "library_name", "value": "transformers" }, { "name": "base_model", "value": "mistralai/Mistral-Small-24B-Base-2501" } ] }, "authors": [ { "name": "cognitivecomputations" } ], "tags": [ "transformers", "safetensors", "mistral", "text-generation", "conversational", "en", "dataset:cognitivecomputations/dolphin-r1", "dataset:OpenCoder-LLM/opc-sft-stage1", "dataset:OpenCoder-LLM/opc-sft-stage2", "dataset:microsoft/orca-agentinstruct-1M-v1", "dataset:microsoft/orca-math-word-problems-200k", "dataset:NousResearch/hermes-function-calling-v1", "dataset:AI-MO/NuminaMath-CoT", "dataset:AI-MO/NuminaMath-TIR", "dataset:allenai/tulu-3-sft-mixture", "dataset:cognitivecomputations/dolphin-coder", "dataset:HuggingFaceTB/smoltalk", "dataset:cognitivecomputations/samantha-data", "dataset:m-a-p/CodeFeedback-Filtered-Instruction", "dataset:m-a-p/Code-Feedback", "base_model:mistralai/Mistral-Small-24B-Base-2501", "base_model:finetune:mistralai/Mistral-Small-24B-Base-2501", "autotrain_compatible", "text-generation-inference", "endpoints_compatible", "region:us" ] } }, "components": [ { "type": "data", "bom-ref": "cognitivecomputations/dolphin-r1-e86781ae-297a-5171-8a0d-61460dd40738", "name": "cognitivecomputations/dolphin-r1", "data": [ { "type": "dataset", "bom-ref": "cognitivecomputations/dolphin-r1-e86781ae-297a-5171-8a0d-61460dd40738", "name": "cognitivecomputations/dolphin-r1", "contents": { "url": "https://huggingface.co/datasets/cognitivecomputations/dolphin-r1", "properties": [ { "name": "configs", "value": "Name of the dataset subset: nonreasoning {\"split\": \"train\", \"path\": \"dolphin-r1-nonreasoning.jsonl\"}" }, { "name": "configs", "value": "Name of the dataset subset: reasoning-deepseek {\"split\": \"train\", \"path\": \"dolphin-r1-reasoning-deepseek.jsonl\"}" }, { "name": "configs", "value": "Name of the dataset subset: reasoning-flash {\"split\": \"train\", \"path\": \"dolphin-r1-reasoning-flash.jsonl\"}" }, { "name": "license", "value": "apache-2.0" } ] }, "governance": { "owners": [ { "organization": { "name": "cognitivecomputations", "url": "https://huggingface.co/cognitivecomputations" } } ] }, "description": "\n\t\n\t\t\n\t\tDolphin R1 \ud83d\udc2c\n\t\n\nAn Apache-2.0 dataset curated by Eric Hartford and Cognitive Computations\n\nDiscord: https://discord.gg/cognitivecomputations\n\n\n\n\t\n\t\t\n\t\n\t\n\t\tSponsors\n\t\n\nOur appreciation for the generous sponsors of Dolphin R1 - Without whom this dataset could not exist.\n\nDria https://x.com/driaforall - Inference Sponsor (DeepSeek)\nChutes https://x.com/rayon_labs - Inference Sponsor (Flash)\nCrusoe Cloud - Compute Sponsor\nAndreessen Horowitz - provided the grant that originally launched\u2026 See the full description on the dataset page: https://huggingface.co/datasets/cognitivecomputations/dolphin-r1." } ] }, { "type": "data", "bom-ref": "OpenCoder-LLM/opc-sft-stage1-f0572249-17d5-5847-8e5a-72583934eca0", "name": "OpenCoder-LLM/opc-sft-stage1", "data": [ { "type": "dataset", "bom-ref": "OpenCoder-LLM/opc-sft-stage1-f0572249-17d5-5847-8e5a-72583934eca0", "name": "OpenCoder-LLM/opc-sft-stage1", "contents": { "url": "https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage1", "properties": [ { "name": "configs", "value": "Name of the dataset subset: filtered_infinity_instruct {\"split\": \"train\", \"path\": \"data/filtered_infinity_instruct-*\"}" }, { "name": "configs", "value": "Name of the dataset subset: largescale_diverse_instruct {\"split\": \"train\", \"path\": \"data/largescale_diverse_instruct-*\"}" }, { "name": "configs", "value": "Name of the dataset subset: realuser_instruct {\"split\": \"train\", \"path\": \"data/realuser_instruct-*\"}" }, { "name": "license", "value": "mit" } ] }, "governance": { "owners": [ { "organization": { "name": "OpenCoder-LLM", "url": "https://huggingface.co/OpenCoder-LLM" } } ] }, "description": "\n\n\t\n\t\t\n\t\tOpenCoder Dataset\n\t\n\nThe OpenCoder dataset is composed of the following datasets:\n\nopc-sft-stage1: the sft data used for opencoder sft-stage1 <-- you are here\nopc-sft-stage2: the sft data used for opencoder sft-stage2\nopc-annealing-corpus: the synthetic data & algorithmic corpus used for opencoder annealing\nopc-fineweb-code-corpus: the code-related page recalled from fineweb\nopc-fineweb-math-corpus: the math-related page recalled from finewebrefineCode-code-corpus-meta: the meta-data\u2026 See the full description on the dataset page: https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage1." } ] }, { "type": "data", "bom-ref": "OpenCoder-LLM/opc-sft-stage2-d7e9795c-e343-5ebc-b785-9718e7d737e8", "name": "OpenCoder-LLM/opc-sft-stage2", "data": [ { "type": "dataset", "bom-ref": "OpenCoder-LLM/opc-sft-stage2-d7e9795c-e343-5ebc-b785-9718e7d737e8", "name": "OpenCoder-LLM/opc-sft-stage2", "contents": { "url": "https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage2", "properties": [ { "name": "configs", "value": "Name of the dataset subset: educational_instruct {\"split\": \"train\", \"path\": \"educational_instruct/train-*\"}" }, { "name": "configs", "value": "Name of the dataset subset: evol_instruct {\"split\": \"train\", \"path\": \"evol_instruct/train-*\"}" }, { "name": "configs", "value": "Name of the dataset subset: mceval_instruct {\"split\": \"train\", \"path\": \"mceval_instruct/train-*\"}" }, { "name": "configs", "value": "Name of the dataset subset: package_instruct {\"split\": \"train\", \"path\": \"package_instruct/train-*\"}" }, { "name": "license", "value": "mit" } ] }, "governance": { "owners": [ { "organization": { "name": "OpenCoder-LLM", "url": "https://huggingface.co/OpenCoder-LLM" } } ] }, "description": "\n\n\t\n\t\t\n\t\tOpenCoder Dataset\n\t\n\nThe OpenCoder dataset is composed of the following datasets:\n\nopc-sft-stage1: the sft data used for opencoder sft-stage1\nopc-sft-stage2: the sft data used for opencoder sft-stage2 <-- you are here\nopc-annealing-corpus: the synthetic data & algorithmic corpus used for opencoder annealing\nopc-fineweb-code-corpus: the code-related page recalled from fineweb\nopc-fineweb-math-corpus: the math-related page recalled from finewebrefineCode-code-corpus-meta: the meta-data\u2026 See the full description on the dataset page: https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage2." } ] }, { "type": "data", "bom-ref": "microsoft/orca-agentinstruct-1M-v1-bbc92138-5aa0-5737-8ce6-93043b04b4dd", "name": "microsoft/orca-agentinstruct-1M-v1", "data": [ { "type": "dataset", "bom-ref": "microsoft/orca-agentinstruct-1M-v1-bbc92138-5aa0-5737-8ce6-93043b04b4dd", "name": "microsoft/orca-agentinstruct-1M-v1", "contents": { "url": "https://huggingface.co/datasets/microsoft/orca-agentinstruct-1M-v1", "properties": [ { "name": "task_categories", "value": "question-answering" }, { "name": "language", "value": "en" }, { "name": "size_categories", "value": "1M