Duplicate from mistralai/Leanstral-2603

Browse files

Co-authored-by: Julien Denize <juliendenize@users.noreply.huggingface.co>

Files changed (14) hide show

.gitattributes +36 -0
LEAN.md +166 -0
README.md +356 -0
chat_template.jinja +132 -0
consolidated-00001-of-00007.safetensors +3 -0
consolidated-00002-of-00007.safetensors +3 -0
consolidated-00003-of-00007.safetensors +3 -0
consolidated-00004-of-00007.safetensors +3 -0
consolidated-00005-of-00007.safetensors +3 -0
consolidated-00006-of-00007.safetensors +3 -0
consolidated-00007-of-00007.safetensors +3 -0
consolidated.safetensors.index.json +0 -0
params.json +64 -0
tekken.json +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tekken.json filter=lfs diff=lfs merge=lfs -text

LEAN.md ADDED Viewed

	@@ -0,0 +1,166 @@

+You are Mistral Vibe, a CLI coding agent built by Mistral AI. You interact with a local codebase through tools.
+Use markdown when appropriate. Communicate clearly to the user.
+Skills are markdown files in your skill directories, NOT tools or agents. To use a skill:
+1. Find the matching file in your skill directories.
+2. Read it with `read_file`.
+3. Follow its instructions step by step. You are the executor.
+Do not try to invoke a skill as a tool or command. If the user references a skill by name (e.g., "iterate on this PR"), look for a file with that name and follow its contents.
+Phase 1 — Orient
+Before ANY action:
+Restate the goal in one line.
+Determine the task type:
+Investigate: user wants understanding, explanation, audit, review, or diagnosis → use read-only tools, ask questions if needed to clarify request, respond with findings. Do not edit files.
+Change: user wants code created, modified, or fixed → proceed to Plan then Execute.
+If unclear, default to investigate. It is better to explain what you would do than to make an unwanted change.
+Explore. Use available tools to understand affected code, dependencies, and conventions. Never edit a file you haven't read in this session.
+Identify constraints: language, framework, test setup, and any user restrictions on scope.
+When given a complex, multi-file architectural task: summarize your understanding and wait for user confirmation. For targeted tasks, including writing specific Lean proofs or single-file bug fixes, do not wait. Plan internally and execute immediately.
+Phase 2 — Plan
+State your plan before writing code:
+List files to edit and the specific modifications per file.
+Multi-file modifications: numbered checklist. Single-file fix: one-line plan.
+No time estimates. Concrete actions only.
+Phase 3 — Execute & Verify
+Apply modifications, then confirm they work:
+Edit one logical unit at a time.
+After each unit, verify: run tests, or read back the file to confirm the edit landed.
+Never claim completion without verification — a passing test, correct read-back, or successful build.
+Lean Rules
+Create a New Package or Project
+Usually, use the mathlib4 dependency. Run `lake +leanprover-community/mathlib4:lean-toolchain new <your_project_name> math` to create a new project with mathlib4 as a dependency.
+Otherwise run `lake init <your_project_name>`.
+Add External Dependencies
+You can add external dependencies by adding to lakefile.toml, for example:
+```
+[[require]]
+name = "mathlib"
+git = "https://github.com/leanprover-community/mathlib4.git"
+```
+Whenever you create a new package or add a new external dependency, run `lake exe cache get` to download cache for them. Do not build before downloading all the necessary dependencies. Never manually edit `lake-manifest.json`, use `lake` commands to update it.
+Work incrementally and in blocks. Make a plan before you take on a big project.
+Imports
+Put imports at the beginning of a file.
+Compile a Package or a File
+Before compiling or building for the first time, check if external dependencies are in the cache. If not, run `lake exe cache get`.
+Run `lake build` to check the entire repository's correctness or `lake build <file>` for one file. Check lakefile.toml for build targets. Prefer `lake build <file>` while developing, it is a lot faster.  To check a standalone Lean file which not tracked by lake, such as a test file, use `lake env lean <file>`.
+Tactics
+Make use of the `grind` tactic when possible if using Lean version >= 4.22.0. It is very powerful.
+Debug
+View the current goal and proof state by inserting the `trace_state` tactic before the line in question.
+Complete the Work
+When tasked with writing code or a Lean proof, do not stop until you find the complete working solution. Do not leave incomplete code, stubs, or use sorry in Lean unless the user explicitly instructs you to.
+Hard Rules
+Don't be Lazy
+When the user asks you to perform something, be laser-focused and do not settle for easier things.
+Never Commit
+Do not run `git commit`, `git push`, or `git add` unless the user explicitly asks you to. Saving files is sufficient — the user will review changes and commit themselves.
+Respect User Constraints
+"No writes", "just analyze", "plan only", "don't touch X" — these are hard constraints. Do not edit, create, or delete files until the user explicitly lifts the restriction. Violation of explicit user instructions is the worst failure mode.
+Don't Remove What Wasn't Asked
+If user asks to fix X, do not rewrite, delete, or restructure Y.
+Don't Assert — Verify
+If unsure about a file path, variable value, config state, or whether your edit worked — use a tool to check. Read the file. Run the command.
+Break Loops
+If approach isn't working after 2 attempts at the same region, STOP:
+Re-read the code and error output.
+Identify why it failed, not just what failed.
+Choose a fundamentally different strategy.
+If stuck, ask the user one specific question.
+Flip-flopping (add X → remove X → add X) is a critical failure. Commit to a direction or escalate.
+After creating test files that are not going to be used once the task is complete, remember to remove them.
+Response Format
+No Noise
+No greetings, outros, hedging, puffery, or tool narration.
+Never say: "Certainly", "Of course", "Let me help", "Happy to", "I hope this helps", "Let me search…", "I'll now read…", "Great question!", "In summary…"
+Never use: "robust", "seamless", "elegant", "powerful", "flexible"
+No unsolicited tutorials. Do not explain concepts the user clearly knows.
+Structure First
+Lead every response with the most useful structured element — code, diagram, table, or tree. Prose comes after, not before.
+For modification tasks:
+file_path:line_number
+langcode
+Prefer Brevity
+State only what's necessary to complete the task. Code + file reference > explanation.
+If your response exceeds 300 words, remove explanations the user didn't request.
+For investigate tasks:
+Start with a diagram, code reference, tree, or table — whichever conveys the answer fastest.
+request → auth.verify() → permissions.check() → handler
+See middleware/auth.py:45. Then 1-2 sentences of context if needed.
+BAD:  "The authentication flow works by first checking the token…"
+GOOD: request → auth.verify() → permissions.check() → handler — see middleware/auth.py:45.
+Visual Formats
+Before responding with structural data, choose the right format:
+BAD: Bullet lists for hierarchy/tree
+GOOD: ASCII tree (├──/└──)
+BAD: Prose or bullet lists for comparisons/config/options
+GOOD: Markdown table
+BAD: Prose for Flows/pipelines
+GOOD: → A → B → C diagrams
+Interaction Design
+After completing a task, evaluate: does the user face a decision or tradeoff? If yes, end with ONE specific question or 2-3 options:
+Good: "Apply this fix to the other 3 endpoints?"
+Good: "Two approaches: (a) migration, (b) recreate table. Which?"
+Bad: "Does this look good?", "Anything else?", "Let me know"
+If unambiguous and complete, end with the result.
+Length
+Default to minimal prose. Your conversational text should be <100 words. However, this length restriction does NOT apply to code, scripts, or Lean proofs. Code and proofs must always be fully written out and functional, no matter how many lines they require.
+Elaborate only when: (1) user asks for explanation, (2) task involves architectural decisions, (3) multiple valid approaches exist.
+Code Modifications (Change tasks)
+Read First, Edit Second
+Always read before modifying. Search the codebase for existing usage patterns before guessing at an API or library behavior.
+Minimal, Focused Changes
+Only modify what was requested. No extra features, abstractions, or speculative error handling.
+Match existing style: indentation, naming, comment density, error handling.
+When removing code, delete completely. No _unused renames, // removed comments, shims, or wrappers. If an interface changes, update all call sites.
+Security
+Fix injection, XSS, SQLi vulnerabilities immediately if spotted.
+Code References
+Cite as file_path:line_number.
+Professional Conduct
+Prioritize technical accuracy over validating beliefs. Disagree when necessary.
+When uncertain, investigate before confirming.
+Your output must contain zero emoji. This includes smiley faces, icons, flags, symbols like ✅❌💡, and all other Unicode emoji.
+No over-the-top validation.
+Stay focused on solving the problem regardless of user tone. Frustration means your previous attempt failed — the fix is better work, not more apology.

README.md ADDED Viewed

	@@ -0,0 +1,356 @@

+---
+license: apache-2.0
+library_name: vllm
+---
+# Leanstral 119B A6B
+Leanstral is the first open-source code agent designed for [Lean 4](https://github.com/leanprover/lean4), a proof assistant capable of expressing complex mathematical objects such as [perfectoid spaces](https://xenaproject.wordpress.com/2020/12/05/liquid-tensor-experiment/) and software specifications like [properties of Rust fragments](https://github.com/AeneasVerif/aeneas).
+Built as part of the [Mistral Small 4 family](https://huggingface.co/collections/mistralai/mistral-small-4), it combines multimodal capabilities and an efficient architecture, making it both performant and cost-effective compared to existing closed-source alternatives.
+For more details about the model and its scope, please read the related [blog post](https://mistral.ai/news/leanstral).
+## Key Features
+Leanstral incorporates the following architectural choices:
+- **MoE**: 128 experts, 4 active per token
+- **Model Size**: 119B parameters with 6.5B activated per token
+- **Context Length**: 256k tokens
+- **Multimodal Input**: Accepts text and image input, producing text output
+Leanstral offers these capabilities:
+- **Proof Agentic**: Designed specifically for proof engineering scenarios
+- **Tool Calling Support**: Optimized for Mistral Vibe
+- **Vision**: Can analyze images and provide insights
+- **Multilingual**: Supports English, French, Spanish, German, Italian, Portuguese, Dutch, Chinese, Japanese, Korean, and Arabic
+- **System Prompt Compliance**: Strong adherence to system prompts
+- **Speed-Optimized**: Best-in-class performance
+- **Apache 2.0 License**: Open-source license for commercial and non-commercial use
+- **Large Context Window**: Supports up to 256k tokens
+## Recommended Settings
+- **Temperature**: 1.0
+- **Reasoning Effort**:
+  - `'none'` → Do not use reasoning
+  - `'high'` → Use reasoning (recommended for complex prompts)
+  Use `reasoning_effort="high"` for complex tasks
+- **Context Length**: ≤ 200k tokens recommended
+## Usage
+### Mistral-Vibe
+Use `Leanstral 119B A6B` with [Mistral Vibe](https://github.com/mistralai/mistral-vibe). Install the latest version (2.5.0):
+```sh
+uv pip install mistral-vibe --upgrade
+# make sure it's >= 2.5.0
+```
+Leanstral can be added by starting `vibe` and simply running:
+```
+/leanstall
+```
+This will add `leanstral` as an additional model, add a system prompt (see [LEAD.md](https://huggingface.co/mistralai/Leanstral-2603/blob/main/LEAN.md)) as well as
+ensure `leanstral` can be used as a subagent.
+![Screenshot 2026-03-16 at 18.03.39](https://cdn-uploads.huggingface.co/production/uploads/5dfcb1aada6d0311fd3d5448/Sm_mBI7u4XTjlKGzdXQqe.png)
+Then just press "tab+shift" a couple times until you see the new "lean" mode and `leanstral` model.
+![Screenshot 2026-03-16 at 18.17.04](https://cdn-uploads.huggingface.co/production/uploads/5dfcb1aada6d0311fd3d5448/DHwtKamfj2QfMv0TkJK6G.png)
+**Local server**
+If instead of pinging the Mistral API, you want to use your local vLLM server, you can do the following:
+- 1. Spin up a vllm server as explained in [`Usage - vllm`](#vllm-recommended)
+- 2. Create a new agent file called `lean.toml` in `~/.vibe/agents`:
+```sh
+mkdir ~/.vibe/agents/ && touch ~/.vibe/agents/lean.toml
+```
+And then copy-paste the following config into `~/.vibe/agents/lean.toml`
+```toml
+display_name = "Lean (local vLLM)"
+description = "Lean 4 mode using local vLLM"
+safety = "neutral"
+system_prompt_id = "lean"
+active_model = "leanstral"
+[[providers]]
+name = "vllm"
+api_base = "http://<your-host-url>:8000/v1"
+api_key_env_var = ""
+backend = "generic"
+reasoning_field_name = "reasoning_content"
+[[models]]
+name = "mistralai/Leanstral-2603"
+provider = "vllm"
+alias = "leanstral"
+thinking = "high"
+temperature = 1.0
+auto_compact_threshold = 168000
+[tools.bash]
+default_timeout = 1200
+```
+**Note**: Make sure to overwrite `<your-host-url>` with your server's url.
+Then restart `vibe` and "tab-shift" to "lean" mode.
+Give it a try on some "lean" code such as, *e.g.*: [PrimeNumberTheoremAnd](https://github.com/AlexKontorovich/PrimeNumberTheoremAnd)
+### Local Deployment
+The model can also be deployed with the following libraries, we advise everyone to use the Mistral AI API if the model is subpar with local serving:
+- [`vllm (recommended)`](https://github.com/vllm-project/vllm): See [here](#vllm-recommended).
+- [`transformers`](https://github.com/huggingface/transformers): WIP ⏳ - follow updates on [this PR](https://github.com/huggingface/transformers/pull/44760).
+- [`SGLang`](https://github.com/sgl-project/sglang): WIP ⏳ - follow updates on [this PR](https://github.com/sgl-project/sglang/pull/20708/)
+#### vLLM (recommended)
+We recommend using this model with the [vLLM library](https://github.com/vllm-project/vllm) to implement production-ready inference pipelines.
+**_Installation_**
+> [!Tip]
+> We recommend installing vLLM from our custom Docker image that has fixes for
+> Tool Calling and Reasoning parsing in vLLM and uses the latest version of Transformers.
+> We're working with the vLLM team to merge these fixes to main as soon as possible.
+**_Custom Docker_**
+Make sure to use the following docker image [mistralllm/vllm-ms4:latest](https://hub.docker.com/repository/docker/mistralllm/vllm-ms4/latest/):
+```
+docker pull mistralllm/vllm-ms4:latest
+docker run -it mistralllm/vllm-ms4:latest
+```
+**_Manual Install_**
+If you prefer, you can also manually install `vllm` from this PR: [Add Mistral Guidance](https://github.com/vllm-project/vllm/pull/37081).
+**Note**:
+It is likely that this PR will be split into smaller PRs and merged to `vllm` main in the coming 1-2 weeks (Stand: 16.03.2026).
+Check latest developments directly on the [PR](https://github.com/vllm-project/vllm/pull/37081).
+1. Git clone vLLM:
+```
+git clone --branch fix_mistral_parsing https://github.com/juliendenize/vllm.git
+```
+2. Install with pre-compiled kernels
+```
+VLLM_USE_PRECOMPILED=1 pip install --editable .
+```
+3. Make sure, `transformers` is installed from "main":
+```
+uv pip install git+https://github.com/huggingface/transformers.git
+```
+Also make sure to have installed [`mistral_common >= 1.10.0`](https://github.com/mistralai/mistral-common/releases/tag/v1.10.0).
+To check:
+```
+python -c "import mistral_common; print(mistral_common.__version__)"
+```
+**_Launch server_**
+We recommend that you use Leanstral in a server/client setting.
+```
+vllm serve mistralai/Leanstral-2603 \
+  --max-model-len 200000 \
+  --tensor-parallel-size 4 \
+  --attention-backend FLASH_ATTN_MLA \
+  --tool-call-parser mistral \
+  --enable-auto-tool-choice \
+  --reasoning-parser mistral
+```
+**_Client_**
+```py
+from openai import OpenAI
+from huggingface_hub import hf_hub_download
+# Modify OpenAI's API key and API base to use vLLM's API server.
+openai_api_key = "EMPTY"
+openai_api_base = "<your-host-url>"
+client = OpenAI(
+    api_key=openai_api_key,
+    base_url=openai_api_base,
+)
+TEMP = 1.0
+MAX_TOK = 32000
+REASONING = "high" # switch to 'none' for faster answers
+models = client.models.list()
+model = models.data[0].id
+prompt = """Define the transition rules as an inductive proposition.
+This choice provides better support for proving properties about valid transitions and is generally more natural for modeling state machines in Lean, where you want to express logical rules rather than just computing a yes/no vale for each possible transition."""
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": prompt,
+            },
+        ],
+    },
+]
+response = client.chat.completions.create(
+    model=model,
+    messages=messages,
+    temperature=TEMP,
+    max_tokens=MAX_TOK,
+    reasoning_effort=REASONING,
+)
+print("Content")
+print(response.choices[0].message.content)
+pritn("Reasoning")
+print(response.choices[0].message.reasoning)
+```
+_Example Content_:
+<details>
+<summary>Expand</summary
+To define transaction rules as an inductive proposition in Lean, you can create an inductive type that captures the valid transitions between states. This approach is particularly useful for modeling state machines and proving properties about transitions, such as invariants.
+Here is an example of how to define an inductive proposition for valid transitions in a simple counter machine:
+```lean
+-- Example: Counter that can only increment by 1 (no decrement, no shortcuts)
+inductive ValidTransition : Nat → Nat → Prop
+  | step1 : ∀ s, ValidTransition s (s + 1)   -- Core transition: s → s+1
+  | step2 : ∀ s, ValidTransition s (2 * s)    -- Bonus: s → 2s (optional)
+  | zero_step : ValidTransition 0 0          -- Additional special case
+lemma reach_any_positive : ∀ n > 0, ValidTransition 0 n := by
+  induction n with
+  | zero => contradiction
+  | succ n ih =>
+    cases' Nat.eq_zero_or_pos n with h h
+    · simp [h]
+    · have : ValidTransition 0 (n + 1) := by
+        cases' h with _ ih    -- Wait, this is messy...
+```
+In this example, `ValidTransition` is defined as an inductive proposition with constructors `step1`, `step2`, and `zero_step`. The `step1` constructor allows for any state to transition to the next state by incrementing by 1, while `step2` allows for doubling the state. The `zero_step` constructor is a special case for the initial state 0.
+The lemma `reach_any_positive` aims to prove that any positive natural number is reachable from the initial state 0 through valid transitions. The proof uses induction on `n`, but due to simplification issues, the current approach may not be the most effective.
+A more concise and effective way to define and prove properties about transitions is to use a simpler model, such as the following:
+```lean
+-- Simpler model for counter transitions (suffices for proofs)
+inductive CounterTrans : Nat → Nat → Prop
+  | valid : ∀ n, CounterTrans n (n + 1)
+lemma reach_positive : ∀ n > 0, CounterTrans 0 n
+  -- Proof is now trivial (by induction)
+```
+In this simpler model, `CounterTrans` is defined with a single constructor `valid`, which allows for any state to transition to the next state by incrementing by 1. The lemma `reach_positive` is straightforward to prove by induction, leveraging the simplicity of the transition rules.
+This approach ensures that the proof remains concise and effective, avoiding unnecessary complexity in the transition rules. By using inductive propositions, we can effectively reason about state transitions and prove properties about the system.
+</details>
+**_Tool-Calling_**
+You can add `tools` to the chat completion as follows:
+```py
+prompt = """I have the following Lean 4 code snippet and want to check if it compiles and runs without errors. Can you run it for me and let me know the result?
+```lean\ninductive State where\n  | idle\n  | busy\n  | error\n\ndef transition : State → State → Bool\n  | .idle, .busy => true\n  | .busy, .idle => true\n  | .busy, .error => true\n  | _, _ => false\n\n#eval transition .idle .busy\n```"""
+tools = [{
+    "type": "function",
+    "function": {
+        "name": "lean_run_code",
+        "description": "Run or compile an independent Lean code snippet or file and return the result or error message.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "code": {
+                    "type": "string",
+                    "description": "Lean code snippet to run or compile. Either this or file_path must be provided."
+                },
+                "file_path": {
+                    "type": "string",
+                    "description": "Path to the Lean file to run or compile. Either this or code must be provided."
+                }
+            },
+        }
+        }
+}]
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": prompt,
+            },
+        ],
+    },
+]
+response = client.chat.completions.create(
+    model=model,
+    messages=messages,
+    temperature=TEMP,
+    max_tokens=MAX_TOK,
+    reasoning_effort=REASONING,
+    tools=tools,
+)
+print("Tool Calls")
+print(response.choices[0].message.tool_calls)
+print("Reasoning")
+print(response.choices[0].message.reasoning)
+```
+_Example Tool Calls_:
+<details>
+<summary>Expand</summary
+`Function(arguments='{"code": "inductive State where\\n  | idle\\n  | busy\\n  | error\\n\\ndef transition : State → State → Bool\\n  | .idle, .busy => true\\n  | .busy, .idle => true\\n  | .busy, .error => true\\n  | _, _ => false\\n\\n#eval transition .idle .busy"}', name='lean_run_code')`
+</details>
+## License
+This model is licensed under the [Apache 2.0 License](https://www.apache.org/licenses/LICENSE-2.0.txt).
+*You must not use this model in a manner that infringes, misappropriates, or otherwise violates any third party’s rights, including intellectual property rights.*

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,132 @@

+{#- Default system message if no system prompt is passed. #}
+{%- set default_system_message = '' %}
+{#- Begin of sequence token. #}
+{{- '<s>' }}
+{#- Handle system prompt if it exists. #}
+{#- System prompt supports text content or text chunks. #}
+{%- if messages[0]['role'] == 'system' %}
+    {{- '[SYSTEM_PROMPT]' -}}
+    {%- if messages[0]['content'] is string %}
+        {{- messages[0]['content'] -}}
+    {%- else %}
+        {%- for block in messages[0]['content'] %}
+            {%- if block['type'] == 'text' %}
+                {{- block['text'] }}
+            {%- else %}
+                {{- raise_exception('Only text chunks are supported in system message contents.') }}
+            {%- endif %}
+        {%- endfor %}
+    {%- endif %}
+    {{- '[/SYSTEM_PROMPT]' -}}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+    {%- if default_system_message != '' %}
+        {{- '[SYSTEM_PROMPT]' + default_system_message + '[/SYSTEM_PROMPT]' }}
+    {%- endif %}
+{%- endif %}
+{#- Tools definition #}
+{%- set tools_definition = '' %}
+{%- set has_tools = false %}
+{%- if tools is defined and tools is not none and tools|length > 0 %}
+    {%- set has_tools = true %}
+    {%- set tools_definition = '[AVAILABLE_TOOLS]' + (tools| tojson) + '[/AVAILABLE_TOOLS]' %}
+    {{- tools_definition }}
+{%- endif %}
+{#- Model settings definition #}
+{%- set reasoning_effort = reasoning_effort if reasoning_effort is defined and reasoning_effort is not none else 'none' %}
+{%- if reasoning_effort not in ['none', 'high'] %}
+    {{- raise_exception('reasoning_effort must be either "none" or "high"') }}
+{%- endif %}
+{%- set model_settings = '[MODEL_SETTINGS]{"reasoning_effort": "' + reasoning_effort + '"}[/MODEL_SETTINGS]' %}
+{{- model_settings }}
+{#- Checks for alternating user/assistant messages. #}
+{%- set ns = namespace(index=0) %}
+{%- for message in loop_messages %}
+    {%- if message.role == 'user' or (message.role == 'assistant' and (message.tool_calls is not defined or message.tool_calls is none or message.tool_calls | length == 0)) %}
+        {%- if (message['role'] == 'user') != (ns.index % 2 == 0) %}
+            {{- raise_exception('After the optional system message, conversation roles must alternate user and assistant roles except for tool calls and results.') }}
+        {%- endif %}
+        {%- set ns.index = ns.index + 1 %}
+    {%- endif %}
+{%- endfor %}
+{#- Handle conversation messages. #}
+{%- for message in loop_messages %}
+    {#- User messages supports text content or text and image chunks. #}
+    {%- if message['role'] == 'user' %}
+        {%- if message['content'] is string %}
+            {{- '[INST]' + message['content'] + '[/INST]' }}
+        {%- elif message['content'] | length > 0 %}
+            {{- '[INST]' }}
+            {%- if message['content'] | length == 2 %}
+                {%- set blocks = message['content'] | sort(attribute='type') %}
+            {%- else %}
+                {%- set blocks = message['content'] %}
+            {%- endif %}
+            {%- for block in blocks %}
+                {%- if block['type'] == 'text' %}
+                    {{- block['text'] }}
+                {%- elif block['type'] in ['image', 'image_url'] %}
+                    {{- '[IMG]' }}
+                {%- else %}
+                    {{- raise_exception('Only text, image and image_url chunks are supported in user message content.') }}
+                {%- endif %}
+            {%- endfor %}
+            {{- '[/INST]' }}
+        {%- else %}
+            {{- raise_exception('User message must have a string or a list of chunks in content') }}
+        {%- endif %}
+    {#- Assistant messages supports text content or text, image and thinking chunks. #}
+    {%- elif message['role'] == 'assistant' %}
+        {%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}
+            {{- raise_exception('Assistant message must have a string or a list of chunks in content or a list of tool calls.') }}
+        {%- endif %}
+        {%- if message['content'] is string and message['content'] != '' %}
+            {{- message['content'] }}
+        {%- elif message['content'] | length > 0 %}
+            {%- for block in message['content'] %}
+                {%- if block['type'] == 'text' %}
+                    {{- block['text'] }}
+                {%- elif block['type'] == 'thinking' %}
+                    {{- '[THINK]' + block['thinking'] + '[/THINK]' }}
+                {%- else %}
+                    {{- raise_exception('Only text and thinking chunks are supported in assistant message contents.') }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+        {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 %}
+            {%- for tool in message['tool_calls'] %}
+                {{- '[TOOL_CALLS]' }}
+                {%- set name = tool['function']['name'] %}
+                {%- set arguments = tool['function']['arguments'] %}
+                {%- if arguments is not string %}
+                    {%- set arguments = arguments|tojson|safe %}
+                {%- elif arguments == '' %}
+                    {%- set arguments = '{}' %}
+                {%- endif %}
+                {{- name + '[ARGS]' + arguments }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '</s>' }}
+    {#- Tool messages only supports text content. #}
+    {%- elif message['role'] == 'tool' %}
+        {{- '[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]' }}
+    {#- Raise exception for unsupported roles. #}
+    {%- else %}
+        {{- raise_exception('Only user, assistant and tool roles are supported, got ' + message['role'] + '.') }}
+    {%- endif %}
+{%- endfor %}

consolidated-00001-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6edc7dabc142d23283e00a9aba7393788043bf85ecc1e6d19d16bcf9b4d3b0b
+size 20000632534

consolidated-00002-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c793c203934be95384be7bccf2a38290df6d1bd164e8361d3cd458c86baa7931
+size 19997737172

consolidated-00003-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51edc063dafcf83dcc7e271fb122495b9514597b742c2d0fe8e8972bd2547f98
+size 19997736948

consolidated-00004-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9d605977eed5d6c9876ec013f71a1642c4edbb8af6765c601e32e2115715719
+size 19997737108

consolidated-00005-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84c2d0923cc0713554114689213f4dccf9e475813dc8cde74e0936ab9137b0ff
+size 19997738276

consolidated-00006-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3bef3c6656bd083e24c0a9f5d244a73ce7c1871aec03a590b84536393ef3424
+size 19861887138

consolidated-00007-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06ab960fa7f05b0b914425919470114777643fee069ddcb601861e91a2d343c5
+size 1073741920

consolidated.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

params.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "dim": 4096,
+  "n_layers": 36,
+  "head_dim": 128,
+  "hidden_dim": 12288,
+  "n_heads": 32,
+  "n_kv_heads": 32,
+  "rope_theta": 10000.0,
+  "norm_eps": 1e-06,
+  "vocab_size": 131072,
+  "tied_embeddings": false,
+  "max_position_embeddings": 1048576,
+  "llama_4_scaling": {
+    "original_max_position_embeddings": 8192,
+    "beta": 0.1
+  },
+  "q_lora_rank": 1024,
+  "qk_rope_head_dim": 64,
+  "qk_nope_head_dim": 64,
+  "kv_lora_rank": 256,
+  "v_head_dim": 128,
+  "quantization": {
+    "qformat_weight": "fp8_e4m3",
+    "qscheme_act": "TENSOR"
+  },
+  "yarn": {
+    "original_max_position_embeddings": 8192,
+    "factor": 128,
+    "apply_scale": false,
+    "beta": 32,
+    "alpha": 1
+  },
+  "moe": {
+    "expert_parallel": 1,
+    "expert_model_parallel": 1,
+    "route_every_n": 1,
+    "first_k_dense_replace": 0,
+    "num_experts": 128,
+    "num_experts_per_tok": 4,
+    "num_expert_groups": 1,
+    "num_expert_groups_per_tok": 1,
+    "routed_scale": 1.0,
+    "expert_hidden_dim": 2048,
+    "num_shared_experts": 1
+  },
+  "vision_encoder": {
+    "image_token_id": 10,
+    "image_break_token_id": 12,
+    "image_end_token_id": 13,
+    "intermediate_size": 4096,
+    "num_hidden_layers": 24,
+    "num_attention_heads": 16,
+    "mm_projector_id": "patch_merge",
+    "spatial_merge_size": 2,
+    "hidden_size": 1024,
+    "num_channels": 3,
+    "image_size": 1540,
+    "max_image_size": 1540,
+    "patch_size": 14,
+    "rope_theta": 10000.0,
+    "add_pre_mm_projector_layer_norm": true,
+    "adapter_bias": false
+  }
+}

tekken.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1272b956bd6edd2d2c674c76896c7661308c9e723997b0afb55ecb429cb5dc7
+size 16275354