Upload 11 files
Browse files- .gitattributes +4 -0
- Lmlm.md +2732 -0
- Lmlm.py +62 -0
- VoiceAuthenticationModel.pyx +36 -0
- fig_LMLM_TOFU_NPO.pdf +0 -0
- fig_LMLM_delta_loss_distribution.png +3 -0
- fig_LMLM_motivation_new.pdf +0 -0
- fig_LMLM_overview.pdf +3 -0
- fig_LMLM_perplexity.adoc +0 -0
- fig_LMLM_prefix_tree.pdf +3 -0
- fig_LMLM_results.pdf +3 -0
- lmlm4.cpp +131 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
fig_LMLM_delta_loss_distribution.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
fig_LMLM_overview.pdf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
fig_LMLM_prefix_tree.pdf filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
fig_LMLM_results.pdf filter=lfs diff=lfs merge=lfs -text
|
Lmlm.md
ADDED
|
@@ -0,0 +1,2732 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# [DocumentationLmlm studio](file:///private/var/containers/Bundle/Application/61352102-115D-4798-934E-1E7EB868B788/stable.app/error_page_loaded.html?url=https://github.com/Web4application/lmlm/edit/main/PromptStudio.html&dontLoad=true)
|
| 2 |
+
|
| 3 |
+
Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 4 |
+
>
|
| 5 |
+
> Use this file to discover all available pages before exploring further.
|
| 6 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 7 |
+
Use this file to discover all available pages before exploring further.
|
| 8 |
+
|
| 9 |
+
# Cloud
|
| 10 |
+
|
| 11 |
+
## Cloud Models
|
| 12 |
+
|
| 13 |
+
Ollama's cloud models are a new kind of model in Ollama that can run without a powerful GPU. Instead, cloud models are automatically offloaded to Ollama's cloud service while offering the same capabilities as local models, making it possible to keep using your local tools
|
| 14 |
+
`LmlmNotebookmarks.ipynb`hile running larger models that wouldn't fit on a personal computer.
|
| 15 |
+
`Ollama.cpp`
|
| 16 |
+
|
| 17 |
+
### Supported models
|
| 18 |
+
|
| 19 |
+
For a list of supported models, see Ollama's [model library](https://ollama.com/search?c=cloud).
|
| 20 |
+
|
| 21 |
+
``Pyx``
|
| 22 |
+
### Running Cloud models
|
| 23 |
+
|
| 24 |
+
Ollama's cloud models require an account on [ollama.com](https://ollama.com). To sign in or create an account, run:
|
| 25 |
+
|
| 26 |
+
```ini
|
| 27 |
+
ollama signin
|
| 28 |
+
``
|
| 29 |
+
<Tabs>
|
| 30 |
+
<Tab title="CLI">
|
| 31 |
+
To run a cloud model, open the terminal and run:
|
| 32 |
+
```pymdownx
|
| 33 |
+
ollama run gpt-oss:120b-cloud
|
| 34 |
+
```
|
| 35 |
+
</Tab>
|
| 36 |
+
<Tab title="Python">
|
| 37 |
+
First, pull a cloud model so it can be accessed:
|
| 38 |
+
```
|
| 39 |
+
```bash
|
| 40 |
+
ollama pull gpt-oss:120b-cloud
|
| 41 |
+
``
|
| 42 |
+
```
|
| 43 |
+
``Next, install [Ollama's Python library](https://github.com/ollama/ollama-python):``
|
| 44 |
+
pip install ollama
|
| 45 |
+
|
| 46 |
+
``Next, create and run a simple Python script``:
|
| 47 |
+
|
| 48 |
+
```python theme={"system"}
|
| 49 |
+
from ollama import Client
|
| 50 |
+
|
| 51 |
+
client = Client()
|
| 52 |
+
|
| 53 |
+
messages = [
|
| 54 |
+
{
|
| 55 |
+
'role': 'user',
|
| 56 |
+
'content': 'Why is the sky blue?',
|
| 57 |
+
},
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
for part in client.chat('gpt-oss:120b-cloud', messages=messages, stream=True):
|
| 61 |
+
print(part['message']['content'], end='', flush=True)
|
| 62 |
+
```
|
| 63 |
+
</Tab>
|
| 64 |
+
|
| 65 |
+
<Tab title="JavaScript">
|
| 66 |
+
First, pull a cloud model so it can be accessed:
|
| 67 |
+
```
|
| 68 |
+
```bash
|
| 69 |
+
ollama pull gpt-oss:120b-cloud
|
| 70 |
+
```
|
| 71 |
+
```
|
| 72 |
+
Next, install ``[Ollama's JavaScript library](https://github.com/ollama/ollama-js)``:
|
| 73 |
+
|
| 74 |
+
```nvx
|
| 75 |
+
npm i ollama
|
| 76 |
+
```
|
| 77 |
+
```
|
| 78 |
+
Then use the library to run a cloud model:
|
| 79 |
+
|
| 80 |
+
```typescript theme={"system"}
|
| 81 |
+
import { Ollama } from "ollama";
|
| 82 |
+
|
| 83 |
+
const ollama = new Ollama();
|
| 84 |
+
|
| 85 |
+
const response = await ollama.chat({
|
| 86 |
+
model: "gpt-oss:120b-cloud",
|
| 87 |
+
messages: [{ role: "user", content: "Explain quantum computing" }],
|
| 88 |
+
stream: true,
|
| 89 |
+
});
|
| 90 |
+
|
| 91 |
+
for await (const part of response) {
|
| 92 |
+
process.stdout.write(part.message.content);
|
| 93 |
+
}
|
| 94 |
+
```
|
| 95 |
+
</Tab>
|
| 96 |
+
```
|
| 97 |
+
<Tab title="cURL">
|
| 98 |
+
First, pull a cloud model so it can be accessed:
|
| 99 |
+
|
| 100 |
+
```bash
|
| 101 |
+
ollama pull gpt-oss:120b-cloud
|
| 102 |
+
``
|
| 103 |
+
```
|
| 104 |
+
Run the following cURL command to run the command via Ollama's API:
|
| 105 |
+
```curl
|
| 106 |
+
curl http://localhost:11434/api/chat -d '{
|
| 107 |
+
"model": "gpt-oss:120b-cloud",
|
| 108 |
+
"messages": [{
|
| 109 |
+
"role": "user",
|
| 110 |
+
"content": "Why is the sky blue?"
|
| 111 |
+
}],
|
| 112 |
+
"stream": false
|
| 113 |
+
}'
|
| 114 |
+
```
|
| 115 |
+
</Tab>
|
| 116 |
+
</Tabs>
|
| 117 |
+
```
|
| 118 |
+
## Cloud API access
|
| 119 |
+
|
| 120 |
+
Cloud models can also be accessed directly on ollama.com's API. In this mode, ollama.com acts as a remote Ollama host.
|
| 121 |
+
|
| 122 |
+
### Authentication
|
| 123 |
+
|
| 124 |
+
For direct access to ollama.com's API, first create an [API key](https://ollama.com/settings/keys).
|
| 125 |
+
|
| 126 |
+
Then, set the `OLLAMA_API_KEY` environment variable to your API key.
|
| 127 |
+
```
|
| 128 |
+
```bash
|
| 129 |
+
ollama pull llama3.2
|
| 130 |
+
echo "FROM llama3.2" >> Modelfile
|
| 131 |
+
echo "SYSTEM You are a friendly assistant." >> Modelfile
|
| 132 |
+
ollama create -f Modelfile lmlm/Lmkm
|
| 133 |
+
ollama push lmlm/Lmkm
|
| 134 |
+
export OLLAMA_API_KEY=your_api_key
|
| 135 |
+
```
|
| 136 |
+
|
| 137 |
+
### Listing models
|
| 138 |
+
|
| 139 |
+
For models available directly via Ollama's API, models can be listed via:
|
| 140 |
+
|
| 141 |
+
```curl
|
| 142 |
+
curl https://ollama.com/api/tags
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
### Generating a response
|
| 146 |
+
|
| 147 |
+
<Tabs>
|
| 148 |
+
<Tab title="Python">
|
| 149 |
+
First, install [Ollama's Python library](https://github.com/ollama/ollama-python)
|
| 150 |
+
|
| 151 |
+
```bash
|
| 152 |
+
pip install ollama
|
| 153 |
+
```
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
Then make a request
|
| 157 |
+
|
| 158 |
+
```python theme={"system"}
|
| 159 |
+
import os
|
| 160 |
+
from ollama import Client
|
| 161 |
+
|
| 162 |
+
client = Client(
|
| 163 |
+
host="https://ollama.com",
|
| 164 |
+
headers={'Authorization': 'Bearer ' + os.environ.get('OLLAMA_API_KEY')}
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
messages = [
|
| 168 |
+
{
|
| 169 |
+
'role': 'user',
|
| 170 |
+
'content': 'Why is the sky blue?',
|
| 171 |
+
},
|
| 172 |
+
]
|
| 173 |
+
|
| 174 |
+
for part in client.chat('gpt-oss:120b', messages=messages, stream=True):
|
| 175 |
+
print(part['message']['content'], end='', flush=True)
|
| 176 |
+
```
|
| 177 |
+
</Tab>
|
| 178 |
+
|
| 179 |
+
<Tab title="JavaScript">
|
| 180 |
+
First, install [Ollama's JavaScript library](https://github.com/ollama/ollama-js)
|
| 181 |
+
```
|
| 182 |
+
```bash
|
| 183 |
+
npm i ollama
|
| 184 |
+
```
|
| 185 |
+
Next, make a request to the model:
|
| 186 |
+
|
| 187 |
+
```typescript theme={"system"}
|
| 188 |
+
import { Ollama } from "ollama";
|
| 189 |
+
|
| 190 |
+
const ollama = new Ollama({
|
| 191 |
+
host: "https://ollama.com",
|
| 192 |
+
headers: {
|
| 193 |
+
Authorization: "Bearer " + process.env.OLLAMA_API_KEY,
|
| 194 |
+
},
|
| 195 |
+
});
|
| 196 |
+
|
| 197 |
+
const response = await ollama.chat({
|
| 198 |
+
model: "gpt-oss:120b",
|
| 199 |
+
messages: [{ role: "user", content: "Explain quantum computing" }],
|
| 200 |
+
stream: true,
|
| 201 |
+
});
|
| 202 |
+
|
| 203 |
+
for await (const part of response) {
|
| 204 |
+
process.stdout.write(part.message.content);
|
| 205 |
+
}
|
| 206 |
+
```
|
| 207 |
+
</Tab>
|
| 208 |
+
|
| 209 |
+
<Tab title="cURL">
|
| 210 |
+
Generate a response via Ollama's chat API:
|
| 211 |
+
|
| 212 |
+
```bash
|
| 213 |
+
curl https://ollama.com/api/chat \
|
| 214 |
+
-H "Authorization: Bearer $OLLAMA_API_KEY" \
|
| 215 |
+
-d '{
|
| 216 |
+
"model": "gpt-oss:120b",
|
| 217 |
+
"messages": [{
|
| 218 |
+
"role": "user",
|
| 219 |
+
"content": "Why is the sky blue?"
|
| 220 |
+
}],
|
| 221 |
+
"stream": false
|
| 222 |
+
}'
|
| 223 |
+
```
|
| 224 |
+
</Tab>
|
| 225 |
+
</Tabs>
|
| 226 |
+
```
|
| 227 |
+
## Local only
|
| 228 |
+
```jsx
|
| 229 |
+
Ollama can run in local-only mode by [disabling Ollama's cloud](./faq#how-do-i-disable-ollama-cloud) features.
|
| 230 |
+
import ollama from 'ollama'
|
| 231 |
+
|
| 232 |
+
const response = await ollama.chat({
|
| 233 |
+
model: 'lmlm/Lmkm',
|
| 234 |
+
messages: [{role: 'user', content: 'Hello!'}],
|
| 235 |
+
})
|
| 236 |
+
console.log(response.message.content)
|
| 237 |
+
```
|
| 238 |
+
|
| 239 |
+
> ## Documentation Index
|
| 240 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 241 |
+
> Use this file to discover all available pages before exploring further.
|
| 242 |
+
|
| 243 |
+
# Thinking
|
| 244 |
+
|
| 245 |
+
Thinking-capable models emit a `thinking` field that separates their reasoning trace from the final answer.
|
| 246 |
+
|
| 247 |
+
Use this capability to audit model steps, animate the model *thinking* in a UI, or hide the trace entirely when you only need the final response.
|
| 248 |
+
|
| 249 |
+
## Supported models
|
| 250 |
+
|
| 251 |
+
* [Qwen 3](https://ollama.com/library/qwen3)
|
| 252 |
+
* [GPT-OSS](https://ollama.com/library/gpt-oss) *(use `think` levels: `low`, `medium`, `high` — the trace cannot be fully disabled)*
|
| 253 |
+
* [DeepSeek-v3.1](https://ollama.com/library/deepseek-v3.1)
|
| 254 |
+
* [DeepSeek R1](https://ollama.com/library/deepseek-r1)
|
| 255 |
+
* Browse the latest additions under [thinking models](https://ollama.com/search?c=thinking)
|
| 256 |
+
|
| 257 |
+
## Enable thinking in API calls
|
| 258 |
+
|
| 259 |
+
Set the `think` field on chat or generate requests. Most models accept booleans (`true`/`false`).
|
| 260 |
+
|
| 261 |
+
GPT-OSS instead expects one of `low`, `medium`, or `high` to tune the trace length.
|
| 262 |
+
|
| 263 |
+
The `message.thinking` (chat endpoint) or `thinking` (generate endpoint) field contains the reasoning trace while `message.content` / `response` holds the final answer.
|
| 264 |
+
|
| 265 |
+
<Tabs>
|
| 266 |
+
<Tab title="cURL">
|
| 267 |
+
```shell theme={"system"}
|
| 268 |
+
curl http://localhost:11434/api/chat -d '{
|
| 269 |
+
"model": "qwen3",
|
| 270 |
+
"messages": [{
|
| 271 |
+
"role": "user",
|
| 272 |
+
"content": "How many letter r are in strawberry?"
|
| 273 |
+
}],
|
| 274 |
+
"think": true,
|
| 275 |
+
"stream": false
|
| 276 |
+
}'
|
| 277 |
+
```
|
| 278 |
+
</Tab>
|
| 279 |
+
|
| 280 |
+
<Tab title="Python">
|
| 281 |
+
```python theme={"system"}
|
| 282 |
+
from ollama import chat
|
| 283 |
+
|
| 284 |
+
response = chat(
|
| 285 |
+
model='qwen3',
|
| 286 |
+
messages=[{'role': 'user', 'content': 'How many letter r are in strawberry?'}],
|
| 287 |
+
think=True,
|
| 288 |
+
stream=False,
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
print('Thinking:\n', response.message.thinking)
|
| 292 |
+
print('Answer:\n', response.message.content)
|
| 293 |
+
</Tab>
|
| 294 |
+
<Tab title="JavaScript">
|
| 295 |
+
|
| 296 |
+
```javascript theme={"system"}
|
| 297 |
+
import ollama from 'ollama'
|
| 298 |
+
const response = await ollama.chat({
|
| 299 |
+
model: 'deepseek-r1',
|
| 300 |
+
messages: [{ role: 'user', content: 'How many letter r are in strawberry?' }],
|
| 301 |
+
think: true,
|
| 302 |
+
stream: false,
|
| 303 |
+
})
|
| 304 |
+
|
| 305 |
+
console.log('Thinking:\n', response.message.thinking)
|
| 306 |
+
console.log('Answer:\n', response.message.content)
|
| 307 |
+
```
|
| 308 |
+
</Tab>
|
| 309 |
+
</Tabs>
|
| 310 |
+
|
| 311 |
+
<Note>
|
| 312 |
+
GPT-OSS requires `think` to be set to `"low"`, `"medium"`, or `"high"`. Passing `true`/`false` is ignored for that model.
|
| 313 |
+
</Note>
|
| 314 |
+
```
|
| 315 |
+
## Stream the reasoning trace
|
| 316 |
+
|
| 317 |
+
Thinking streams interleave reasoning tokens before answer tokens. Detect the first `thinking` chunk to render a "thinking" section, then switch to the final reply once `message.content` arrives.
|
| 318 |
+
|
| 319 |
+
<Tabs>
|
| 320 |
+
<Tab title="Python">
|
| 321 |
+
```python theme={"system"}
|
| 322 |
+
from ollama import chat
|
| 323 |
+
|
| 324 |
+
stream = chat(
|
| 325 |
+
model='qwen3',
|
| 326 |
+
messages=[{'role': 'user', 'content': 'What is 17 × 23?'}],
|
| 327 |
+
think=True,
|
| 328 |
+
stream=True,
|
| 329 |
+
)
|
| 330 |
+
|
| 331 |
+
in_thinking = False
|
| 332 |
+
|
| 333 |
+
for chunk in stream:
|
| 334 |
+
if chunk.message.thinking and not in_thinking:
|
| 335 |
+
in_thinking = True
|
| 336 |
+
print('Thinking:\n', end='')
|
| 337 |
+
|
| 338 |
+
if chunk.message.thinking:
|
| 339 |
+
print(chunk.message.thinking, end='')
|
| 340 |
+
elif chunk.message.content:
|
| 341 |
+
if in_thinking:
|
| 342 |
+
print('\n\nAnswer:\n', end='')
|
| 343 |
+
in_thinking = False
|
| 344 |
+
print(chunk.message.content, end='')
|
| 345 |
+
</Tab>
|
| 346 |
+
<Tab title="JavaScript">
|
| 347 |
+
```javascript theme={"system"}
|
| 348 |
+
import ollama from 'ollama'
|
| 349 |
+
|
| 350 |
+
async function main() {
|
| 351 |
+
const stream = await ollama.chat({
|
| 352 |
+
model: 'qwen3',
|
| 353 |
+
messages: [{ role: 'user', content: 'What is 17 × 23?' }],
|
| 354 |
+
think: true,
|
| 355 |
+
stream: true,
|
| 356 |
+
})
|
| 357 |
+
|
| 358 |
+
let inThinking = false
|
| 359 |
+
|
| 360 |
+
for await (const chunk of stream) {
|
| 361 |
+
if (chunk.message.thinking && !inThinking) {
|
| 362 |
+
inThinking = true
|
| 363 |
+
process.stdout.write('Thinking:\n')
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
if (chunk.message.thinking) {
|
| 367 |
+
process.stdout.write(chunk.message.thinking)
|
| 368 |
+
} else if (chunk.message.content) {
|
| 369 |
+
if (inThinking) {
|
| 370 |
+
process.stdout.write('\n\nAnswer:\n')
|
| 371 |
+
inThinking = false
|
| 372 |
+
}
|
| 373 |
+
process.stdout.write(chunk.message.content)
|
| 374 |
+
}
|
| 375 |
+
}
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
main()
|
| 379 |
+
``
|
| 380 |
+
</Tab>
|
| 381 |
+
</Tabs>
|
| 382 |
+
````
|
| 383 |
+
|
| 384 |
+
``
|
| 385 |
+
## CLI quick reference
|
| 386 |
+
|
| 387 |
+
* Enable thinking for a single run: `ollama run deepseek-r1 --think "Where should I visit in Lisbon?"`
|
| 388 |
+
* Disable thinking: `ollama run deepseek-r1 --think=false "Summarize this article"`
|
| 389 |
+
* Hide the trace while still using a thinking model: `ollama run deepseek-r1 --hidethinking "Is 9.9 bigger or 9.11?"`
|
| 390 |
+
* Inside interactive sessions, toggle with `/set think` or `/set nothink`.
|
| 391 |
+
* GPT-OSS only accepts levels: `ollama run gpt-oss --think=low "Draft a headline"` (replace `low` with `medium` or `high` as needed).
|
| 392 |
+
|
| 393 |
+
<Note>Thinking is enabled by default in the CLI and API for supported models.</Note>
|
| 394 |
+
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
## Install
|
| 399 |
+
|
| 400 |
+
Install [marimo](https://marimo.io). You can use `pip` or `uv` for this. You
|
| 401 |
+
can also use `uv` to create a sandboxed environment for marimo by running:
|
| 402 |
+
|
| 403 |
+
```uv
|
| 404 |
+
uvx marimo edit --sandbox notebook.py
|
| 405 |
+
```
|
| 406 |
+
|
| 407 |
+
## Usage with Ollama
|
| 408 |
+
|
| 409 |
+
1. In marimo, go to the user settings and go to the AI tab. From here
|
| 410 |
+
you can find and configure Ollama as an AI provider. For local use you
|
| 411 |
+
would typically point the base url to `http://localhost:11434/v1`.
|
| 412 |
+
|
| 413 |
+
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
| 414 |
+
<img src="https://mintcdn.com/ollama-9269c548/sniSFOOyehzMt2RV/images/marimo-settings.png?fit=max&auto=format&n=sniSFOOyehzMt2RV&q=85&s=33007ad4867ca8258854eab513da81ff" alt="Ollama settings in marimo" width="50%" data-path="images/marimo-settings.png" />
|
| 415 |
+
</div>
|
| 416 |
+
|
| 417 |
+
2. Once the AI provider is set up, you can turn on/off specific AI models you'd like to access.
|
| 418 |
+
|
| 419 |
+
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
| 420 |
+
<img src="https://mintcdn.com/ollama-9269c548/sniSFOOyehzMt2RV/images/marimo-models.png?fit=max&auto=format&n=sniSFOOyehzMt2RV&q=85&s=61acca69dfc3d32e1eb524095c42e4a0" alt="Selecting an Ollama model" width="50%" data-path="images/marimo-models.png" />
|
| 421 |
+
</div>
|
| 422 |
+
|
| 423 |
+
3. You can also add a model to the list of available models by scrolling to the bottom and using the UI there.
|
| 424 |
+
|
| 425 |
+
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
| 426 |
+
<img src="https://mintcdn.com/ollama-9269c548/sniSFOOyehzMt2RV/images/marimo-add-model.png?fit=max&auto=format&n=sniSFOOyehzMt2RV&q=85&s=c3a2dfa7cba1a6565cc726bbbe0ea079" alt="Adding a new Ollama model" width="50%" data-path="images/marimo-add-model.png" />
|
| 427 |
+
</div>
|
| 428 |
+
|
| 429 |
+
4. Once configured, you can now use Ollama for AI chats in marimo.
|
| 430 |
+
|
| 431 |
+
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
| 432 |
+
<img src="https://mintcdn.com/ollama-9269c548/sniSFOOyehzMt2RV/images/marimo-chat.png?fit=max&auto=format&n=sniSFOOyehzMt2RV&q=85&s=03cd217cf60765a00da87e6dc7a07f53" alt="Configure code completion" width="50%" data-path="images/marimo-chat.png" />
|
| 433 |
+
</div>
|
| 434 |
+
|
| 435 |
+
4. Alternatively, you can now use Ollama for **inline code completion** in marimo. This can be configured in the "AI Features" tab.
|
| 436 |
+
|
| 437 |
+
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
| 438 |
+
<img src="https://mintcdn.com/ollama-9269c548/sniSFOOyehzMt2RV/images/marimo-code-completion.png?fit=max&auto=format&n=sniSFOOyehzMt2RV&q=85&s=2cd6ad42b810642a90d41b7fd3515278" alt="Configure code completion" width="50%" data-path="images/marimo-code-completion.png" />
|
| 439 |
+
</div>
|
| 440 |
+
|
| 441 |
+
## Connecting to ollama.com
|
| 442 |
+
|
| 443 |
+
1. Sign in to ollama cloud via `ollama signin`
|
| 444 |
+
2. In the ollama model settings add a model that ollama hosts, like `gpt-oss:120b`.
|
| 445 |
+
3. You can now refer to this model in marimo!
|
| 446 |
+
|
| 447 |
+
> ## Documentation Index
|
| 448 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 449 |
+
> Use this file to discover all available pages before exploring further.
|
| 450 |
+
|
| 451 |
+
# Onyx
|
| 452 |
+
|
| 453 |
+
## Overview
|
| 454 |
+
: is a self-hostable Chat UI that integrates with all Ollama models. Features include
|
| 455 |
+
**Creating custom Agents**
|
| 456 |
+
**Web search**
|
| 457 |
+
* Deep Research
|
| 458 |
+
* RAG over uploaded documents and connected apps
|
| 459 |
+
* Connectors to applications like Google Drive, Email, Slack, etc.
|
| 460 |
+
* MCP and OpenAPI Actions support
|
| 461 |
+
* Image generation
|
| 462 |
+
* User/Groups management, RBAC, SSO, etc.
|
| 463 |
+
|
| 464 |
+
Onyx can be deployed for single users or large organizations.
|
| 465 |
+
|
| 466 |
+
## Install Onyx
|
| 467 |
+
|
| 468 |
+
Deploy Onyx with the [quickstart guide](https://docs.onyx.app/deployment/getting_started/quickstart.md).
|
| 469 |
+
<Info>
|
| 470 |
+
Resourcing/scaling docs [here](https://docs.onyx.app/deployment/getting_started/resourcing).
|
| 471 |
+
</Info>
|
| 472 |
+
|
| 473 |
+
## Usage with Ollama
|
| 474 |
+
|
| 475 |
+
1. Login to your Onyx deployment (create an account first).
|
| 476 |
+
|
| 477 |
+
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
| 478 |
+
<img src="https://mintcdn.com/ollama-9269c548/rqi257JWXmZRsZn4/images/onyx-login.png?fit=max&auto=format&n=rqi257JWXmZRsZn4&q=85&s=5850db0abbfca50c1b6eb5029648ae89" alt="Onyx Login Page" width="75%" data-path="images/onyx-login.png" />
|
| 479 |
+
</div>
|
| 480 |
+
|
| 481 |
+
2. In the set-up process select `Ollama` as the LLM provider.
|
| 482 |
+
|
| 483 |
+
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
| 484 |
+
<img src="https://mintcdn.com/ollama-9269c548/rqi257JWXmZRsZn4/images/onyx-ollama-llm.png?fit=max&auto=format&n=rqi257JWXmZRsZn4&q=85&s=399b5938d0d0d18b359845529dd9408b" alt="Onyx Set Up Form" width="75%" data-path="images/onyx-ollama-llm.png" />
|
| 485 |
+
</div>
|
| 486 |
+
|
| 487 |
+
3. Provide your **Ollama API URL** and select your models.
|
| 488 |
+
<Note>If you're running Onyx in Docker, to access your computer's local network use `http://host.docker.internal` instead of `http://127.0.0.1`.</Note>
|
| 489 |
+
|
| 490 |
+
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
| 491 |
+
<img src="https://mintcdn.com/ollama-9269c548/rqi257JWXmZRsZn4/images/onyx-ollama-form.png?fit=max&auto=format&n=rqi257JWXmZRsZn4&q=85&s=f675da3f8a399614b549f72d6adaa798" alt="Selecting Ollama Models" width="75%" data-path="images/onyx-ollama-form.png" />
|
| 492 |
+
</div>
|
| 493 |
+
|
| 494 |
+
You can also easily connect up Onyx Cloud with the `Ollama Cloud` tab of the setup.
|
| 495 |
+
|
| 496 |
+
## Send your first query
|
| 497 |
+
|
| 498 |
+
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
| 499 |
+
<img src="https://mintcdn.com/ollama-9269c548/rqi257JWXmZRsZn4/images/onyx-query.png?fit=max&auto=format&n=rqi257JWXmZRsZn4&q=85&s=3e7b6e38fb14b288d72bcd828cdd91d9" alt="Onyx Query Example" width="75%" data-path="images/onyx-query.png" />
|
| 500 |
+
</div>
|
| 501 |
+
> ## Documentation Index
|
| 502 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 503 |
+
> Use this file to discover all available pages before exploring further.
|
| 504 |
+
|
| 505 |
+
# Vision
|
| 506 |
+
|
| 507 |
+
Vision models accept images alongside text so the model can describe, classify, and answer questions about what it sees.
|
| 508 |
+
|
| 509 |
+
## Quick start
|
| 510 |
+
|
| 511 |
+
```shell theme={"system"}
|
| 512 |
+
ollama run gemma3 ./image.png whats in this image?
|
| 513 |
+
```
|
| 514 |
+
|
| 515 |
+
## Usage with Ollama's API
|
| 516 |
+
|
| 517 |
+
Provide an `images` array. SDKs accept file paths, URLs or raw bytes while the REST API expects base64-encoded image data.
|
| 518 |
+
|
| 519 |
+
<Tabs>
|
| 520 |
+
<Tab title="cURL">
|
| 521 |
+
```shell theme={"system"}
|
| 522 |
+
# 1. Download a sample image
|
| 523 |
+
curl -L -o test.jpg "https://upload.wikimedia.org/wikipedia/commons/3/3a/Cat03.jpg"
|
| 524 |
+
# 2. Encode the image
|
| 525 |
+
IMG=$(base64 < test.jpg | tr -d '\n')
|
| 526 |
+
|
| 527 |
+
# 3. Send it to Ollama
|
| 528 |
+
curl -X POST http://localhost:11434/api/chat \
|
| 529 |
+
-H "Content-Type: application/json" \
|
| 530 |
+
-d '{
|
| 531 |
+
"model": "gemma3",
|
| 532 |
+
"messages": [{
|
| 533 |
+
"role": "user",
|
| 534 |
+
"content": "What is in this image?",
|
| 535 |
+
"images": ["'"$IMG"'"]
|
| 536 |
+
}],
|
| 537 |
+
"stream": false
|
| 538 |
+
}'
|
| 539 |
+
```
|
| 540 |
+
</Tab>
|
| 541 |
+
|
| 542 |
+
<Tab title="Python">
|
| 543 |
+
```python theme={"system"}
|
| 544 |
+
from ollama import chat
|
| 545 |
+
# from pathlib import Path
|
| 546 |
+
|
| 547 |
+
# Pass in the path to the image
|
| 548 |
+
path = input('Please enter the path to the image: ')
|
| 549 |
+
|
| 550 |
+
# You can also pass in base64 encoded image data
|
| 551 |
+
# img = base64.b64encode(Path(path).read_bytes()).decode()
|
| 552 |
+
# or the raw bytes
|
| 553 |
+
# img = Path(path).read_bytes()
|
| 554 |
+
|
| 555 |
+
response = chat(
|
| 556 |
+
model='gemma3',
|
| 557 |
+
messages=[
|
| 558 |
+
{
|
| 559 |
+
'role': 'user',
|
| 560 |
+
'content': 'What is in this image? Be concise.',
|
| 561 |
+
'images': [path],
|
| 562 |
+
}
|
| 563 |
+
],
|
| 564 |
+
)
|
| 565 |
+
|
| 566 |
+
print(response.message.content)
|
| 567 |
+
</Tab>
|
| 568 |
+
```
|
| 569 |
+
<Tab title="JavaScript">
|
| 570 |
+
|
| 571 |
+
```javascript theme={"system"}
|
| 572 |
+
import ollama from 'ollama'
|
| 573 |
+
|
| 574 |
+
const imagePath = '/absolute/path/to/image.jpg'
|
| 575 |
+
const response = await ollama.chat({
|
| 576 |
+
model: 'gemma3',
|
| 577 |
+
messages: [
|
| 578 |
+
{ role: 'user', content: 'What is in this image?', images: [imagePath] }
|
| 579 |
+
],
|
| 580 |
+
stream: false,
|
| 581 |
+
})
|
| 582 |
+
|
| 583 |
+
console.log(response.message.content)
|
| 584 |
+
```
|
| 585 |
+
</Tab>
|
| 586 |
+
</Tabs>
|
| 587 |
+
|
| 588 |
+
```
|
| 589 |
+
> ## Documentation Index
|
| 590 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 591 |
+
> Use this file to discover all available pages before exploring further.
|
| 592 |
+
|
| 593 |
+
# Linux
|
| 594 |
+
|
| 595 |
+
## Install
|
| 596 |
+
|
| 597 |
+
To install Ollama, run the following command:
|
| 598 |
+
|
| 599 |
+
```shell theme={"system"}
|
| 600 |
+
curl -fsSL https://ollama.com/install.sh | sh
|
| 601 |
+
```
|
| 602 |
+
|
| 603 |
+
## Manual install
|
| 604 |
+
|
| 605 |
+
<Note>
|
| 606 |
+
If you are upgrading from a prior version, you should remove the old libraries
|
| 607 |
+
with `sudo rm -rf /usr/lib/ollama` first.
|
| 608 |
+
</Note>
|
| 609 |
+
|
| 610 |
+
Download and extract the package:
|
| 611 |
+
|
| 612 |
+
```shell theme={"system"}
|
| 613 |
+
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \
|
| 614 |
+
| sudo tar x -C /usr
|
| 615 |
+
```
|
| 616 |
+
> ## Documentation Index
|
| 617 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 618 |
+
> Use this file to discover all available pages before exploring further.
|
| 619 |
+
|
| 620 |
+
# Tool calling
|
| 621 |
+
|
| 622 |
+
Ollama supports tool calling (also known as function calling) which allows a model to invoke tools and incorporate their results into its replies.
|
| 623 |
+
|
| 624 |
+
## Calling a single tool
|
| 625 |
+
|
| 626 |
+
Invoke a single tool and include its response in a follow-up request.
|
| 627 |
+
|
| 628 |
+
Also known as "single-shot" tool calling.
|
| 629 |
+
|
| 630 |
+
<Tabs>
|
| 631 |
+
<Tab title="cURL"
|
| 632 |
+
|
| 633 |
+
```shell theme={"system"}
|
| 634 |
+
curl -s http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
|
| 635 |
+
"model": "qwen3",
|
| 636 |
+
"messages": [{"role": "user", "content": "What is the temperature in New York?"}],
|
| 637 |
+
"stream": false,
|
| 638 |
+
"tools": [
|
| 639 |
+
{
|
| 640 |
+
"type": "function",
|
| 641 |
+
"function": {
|
| 642 |
+
"name": "get_temperature",
|
| 643 |
+
"description": "Get the current temperature for a city",
|
| 644 |
+
"parameters": {
|
| 645 |
+
"type": "object",
|
| 646 |
+
"required": ["city"],
|
| 647 |
+
"properties": {
|
| 648 |
+
"city": {"type": "string", "description": "The name of the city"}
|
| 649 |
+
}
|
| 650 |
+
}
|
| 651 |
+
}
|
| 652 |
+
}
|
| 653 |
+
]
|
| 654 |
+
}'
|
| 655 |
+
```
|
| 656 |
+
|
| 657 |
+
``**Generate a response with a single tool result**``
|
| 658 |
+
|
| 659 |
+
```shell theme={"system"}
|
| 660 |
+
curl -s http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
|
| 661 |
+
"model": "qwen3",
|
| 662 |
+
"messages": [
|
| 663 |
+
{"role": "user", "content": "What is the temperature in New York?"},
|
| 664 |
+
{
|
| 665 |
+
"role": "assistant",
|
| 666 |
+
"tool_calls": [
|
| 667 |
+
{
|
| 668 |
+
"type": "function",
|
| 669 |
+
"function": {
|
| 670 |
+
"index": 0,
|
| 671 |
+
"name": "get_temperature",
|
| 672 |
+
"arguments": {"city": "New York"}
|
| 673 |
+
}
|
| 674 |
+
}
|
| 675 |
+
]
|
| 676 |
+
},
|
| 677 |
+
{"role": "tool", "tool_name": "get_temperature", "content": "22°C"}
|
| 678 |
+
],
|
| 679 |
+
"stream": false
|
| 680 |
+
}'
|
| 681 |
+
```
|
| 682 |
+
</Tab>
|
| 683 |
+
``
|
| 684 |
+
<Tab title="Python">
|
| 685 |
+
Install the Ollama Python SDK:
|
| 686 |
+
|
| 687 |
+
```bash theme={"system"}
|
| 688 |
+
# with pip
|
| 689 |
+
pip install ollama -U
|
| 690 |
+
|
| 691 |
+
# with uv
|
| 692 |
+
uv add ollama
|
| 693 |
+
```
|
| 694 |
+
```
|
| 695 |
+
```python theme={"system"}
|
| 696 |
+
from ollama import chat
|
| 697 |
+
|
| 698 |
+
def get_temperature(city: str) -> str:
|
| 699 |
+
"""Get the current temperature for a city
|
| 700 |
+
|
| 701 |
+
Args:
|
| 702 |
+
city: The name of the city
|
| 703 |
+
|
| 704 |
+
Returns:
|
| 705 |
+
The current temperature for the city
|
| 706 |
+
"""
|
| 707 |
+
temperatures = {
|
| 708 |
+
"New York": "22°C",
|
| 709 |
+
"London": "15°C",
|
| 710 |
+
"Tokyo": "18°C",
|
| 711 |
+
}
|
| 712 |
+
return temperatures.get(city, "Unknown")
|
| 713 |
+
|
| 714 |
+
messages = [{"role": "user", "content": "What is the temperature in New York?"}]
|
| 715 |
+
|
| 716 |
+
# pass functions directly as tools in the tools list or as a JSON schema
|
| 717 |
+
response = chat(model="qwen3", messages=messages, tools=[get_temperature], think=True)
|
| 718 |
+
|
| 719 |
+
messages.append(response.message)
|
| 720 |
+
if response.message.tool_calls:
|
| 721 |
+
# only recommended for models which only return a single tool call
|
| 722 |
+
call = response.message.tool_calls[0]
|
| 723 |
+
result = get_temperature(**call.function.arguments)
|
| 724 |
+
# add the tool result to the messages
|
| 725 |
+
messages.append({"role": "tool", "tool_name": call.function.name, "content": str(result)})
|
| 726 |
+
|
| 727 |
+
final_response = chat(model="qwen3", messages=messages, tools=[get_temperature], think=True)
|
| 728 |
+
print(final_response.message.content)
|
| 729 |
+
```
|
| 730 |
+
</Tab>
|
| 731 |
+
``
|
| 732 |
+
<Tab title="JavaScript">
|
| 733 |
+
Install the Ollama JavaScript library:
|
| 734 |
+
```bash theme={"system"}
|
| 735 |
+
# with npm
|
| 736 |
+
npm i ollama
|
| 737 |
+
|
| 738 |
+
# with bun
|
| 739 |
+
bun i ollama
|
| 740 |
+
```
|
| 741 |
+
```
|
| 742 |
+
```typescript theme={"system"}
|
| 743 |
+
import ollama from 'ollama'
|
| 744 |
+
|
| 745 |
+
function getTemperature(city: string): string {
|
| 746 |
+
const temperatures: Record<string, string> = {
|
| 747 |
+
'New York': '22°C',
|
| 748 |
+
'London': '15°C',
|
| 749 |
+
'Tokyo': '18°C',
|
| 750 |
+
}
|
| 751 |
+
return temperatures[city] ?? 'Unknown'
|
| 752 |
+
}
|
| 753 |
+
|
| 754 |
+
const tools = [
|
| 755 |
+
{
|
| 756 |
+
type: 'function',
|
| 757 |
+
function: {
|
| 758 |
+
name: 'get_temperature',
|
| 759 |
+
description: 'Get the current temperature for a city',
|
| 760 |
+
parameters: {
|
| 761 |
+
type: 'object',
|
| 762 |
+
required: ['city'],
|
| 763 |
+
properties: {
|
| 764 |
+
city: { type: 'string', description: 'The name of the city' },
|
| 765 |
+
},
|
| 766 |
+
},
|
| 767 |
+
},
|
| 768 |
+
},
|
| 769 |
+
]
|
| 770 |
+
|
| 771 |
+
const messages = [{ role: 'user', content: "What is the temperature in New York?" }]
|
| 772 |
+
|
| 773 |
+
const response = await ollama.chat({
|
| 774 |
+
model: 'qwen3',
|
| 775 |
+
messages,
|
| 776 |
+
tools,
|
| 777 |
+
think: true,
|
| 778 |
+
})
|
| 779 |
+
|
| 780 |
+
messages.push(response.message)
|
| 781 |
+
if (response.message.tool_calls?.length) {
|
| 782 |
+
// only recommended for models which only return a single tool call
|
| 783 |
+
const call = response.message.tool_calls[0]
|
| 784 |
+
const args = call.function.arguments as { city: string }
|
| 785 |
+
const result = getTemperature(args.city)
|
| 786 |
+
// add the tool result to the messages
|
| 787 |
+
messages.push({ role: 'tool', tool_name: call.function.name, content: result })
|
| 788 |
+
|
| 789 |
+
// generate the final response
|
| 790 |
+
const finalResponse = await ollama.chat({ model: 'qwen3', messages, tools, think: true })
|
| 791 |
+
console.log(finalResponse.message.content)
|
| 792 |
+
}
|
| 793 |
+
```
|
| 794 |
+
</Tab>
|
| 795 |
+
</Tabs>
|
| 796 |
+
```
|
| 797 |
+
## Parallel tool calling
|
| 798 |
+
|
| 799 |
+
<Tabs>
|
| 800 |
+
<Tab title="cURL">
|
| 801 |
+
Request multiple tool calls in parallel, then send all tool responses back to the model.
|
| 802 |
+
|
| 803 |
+
```shell theme={"system"}
|
| 804 |
+
curl -s http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
|
| 805 |
+
"model": "qwen3",
|
| 806 |
+
"messages": [{"role": "user", "content": "What are the current weather conditions and temperature in New York and London?"}],
|
| 807 |
+
"stream": false,
|
| 808 |
+
"tools": [
|
| 809 |
+
{
|
| 810 |
+
"type": "function",
|
| 811 |
+
"function": {
|
| 812 |
+
"name": "get_temperature",
|
| 813 |
+
"description": "Get the current temperature for a city",
|
| 814 |
+
"parameters": {
|
| 815 |
+
"type": "object",
|
| 816 |
+
"required": ["city"],
|
| 817 |
+
"properties": {
|
| 818 |
+
"city": {"type": "string", "description": "The name of the city"}
|
| 819 |
+
}
|
| 820 |
+
}
|
| 821 |
+
}
|
| 822 |
+
},
|
| 823 |
+
{
|
| 824 |
+
"type": "function",
|
| 825 |
+
"function": {
|
| 826 |
+
"name": "get_conditions",
|
| 827 |
+
"description": "Get the current weather conditions for a city",
|
| 828 |
+
"parameters": {
|
| 829 |
+
"type": "object",
|
| 830 |
+
"required": ["city"],
|
| 831 |
+
"properties": {
|
| 832 |
+
"city": {"type": "string", "description": "The name of the city"}
|
| 833 |
+
}
|
| 834 |
+
}
|
| 835 |
+
}
|
| 836 |
+
}
|
| 837 |
+
]
|
| 838 |
+
}'
|
| 839 |
+
```
|
| 840 |
+
```
|
| 841 |
+
``**Generate a response with multiple tool results**``
|
| 842 |
+
```shell theme={"system"}
|
| 843 |
+
curl -s http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
|
| 844 |
+
"model": "qwen3",
|
| 845 |
+
"messages": [
|
| 846 |
+
{"role": "user", "content": "What are the current weather conditions and temperature in New York and London?"},
|
| 847 |
+
{
|
| 848 |
+
"role": "assistant",
|
| 849 |
+
"tool_calls": [
|
| 850 |
+
{
|
| 851 |
+
"type": "function",
|
| 852 |
+
"function": {
|
| 853 |
+
"index": 0,
|
| 854 |
+
"name": "get_temperature",
|
| 855 |
+
"arguments": {"city": "New York"}
|
| 856 |
+
}
|
| 857 |
+
},
|
| 858 |
+
{
|
| 859 |
+
"type": "function",
|
| 860 |
+
"function": {
|
| 861 |
+
"index": 1,
|
| 862 |
+
"name": "get_conditions",
|
| 863 |
+
"arguments": {"city": "New York"}
|
| 864 |
+
}
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"type": "function",
|
| 868 |
+
"function": {
|
| 869 |
+
"index": 2,
|
| 870 |
+
"name": "get_temperature",
|
| 871 |
+
"arguments": {"city": "London"}
|
| 872 |
+
}
|
| 873 |
+
},
|
| 874 |
+
{
|
| 875 |
+
"type": "function",
|
| 876 |
+
"function": {
|
| 877 |
+
"index": 3,
|
| 878 |
+
"name": "get_conditions",
|
| 879 |
+
"arguments": {"city": "London"}
|
| 880 |
+
}
|
| 881 |
+
}
|
| 882 |
+
]
|
| 883 |
+
},
|
| 884 |
+
{"role": "tool", "tool_name": "get_temperature", "content": "22°C"},
|
| 885 |
+
{"role": "tool", "tool_name": "get_conditions", "content": "Partly cloudy"},
|
| 886 |
+
{"role": "tool", "tool_name": "get_temperature", "content": "15°C"},
|
| 887 |
+
{"role": "tool", "tool_name": "get_conditions", "content": "Rainy"}
|
| 888 |
+
],
|
| 889 |
+
"stream": false
|
| 890 |
+
}'
|
| 891 |
+
```
|
| 892 |
+
</Tab>
|
| 893 |
+
|
| 894 |
+
<Tab title="Python">
|
| 895 |
+
```python theme={"system"}
|
| 896 |
+
from ollama import chat
|
| 897 |
+
|
| 898 |
+
def get_temperature(city: str) -> str:
|
| 899 |
+
"""Get the current temperature for a city
|
| 900 |
+
|
| 901 |
+
Args:
|
| 902 |
+
city: The name of the city
|
| 903 |
+
|
| 904 |
+
Returns:
|
| 905 |
+
The current temperature for the city
|
| 906 |
+
"""
|
| 907 |
+
temperatures = {
|
| 908 |
+
"New York": "22°C",
|
| 909 |
+
"London": "15°C",
|
| 910 |
+
"Tokyo": "18°C"
|
| 911 |
+
}
|
| 912 |
+
return temperatures.get(city, "Unknown")
|
| 913 |
+
|
| 914 |
+
def get_conditions(city: str) -> str:
|
| 915 |
+
"""Get the current weather conditions for a city
|
| 916 |
+
|
| 917 |
+
Args:
|
| 918 |
+
city: The name of the city
|
| 919 |
+
|
| 920 |
+
Returns:
|
| 921 |
+
The current weather conditions for the city
|
| 922 |
+
"""
|
| 923 |
+
conditions = {
|
| 924 |
+
"New York": "Partly cloudy",
|
| 925 |
+
"London": "Rainy",
|
| 926 |
+
"Tokyo": "Sunny"
|
| 927 |
+
}
|
| 928 |
+
return conditions.get(city, "Unknown")
|
| 929 |
+
|
| 930 |
+
|
| 931 |
+
messages = [{'role': 'user', 'content': 'What are the current weather conditions and temperature in New York and London?'}]
|
| 932 |
+
|
| 933 |
+
# The python client automatically parses functions as a tool schema so we can pass them directly
|
| 934 |
+
# Schemas can be passed directly in the tools list as well
|
| 935 |
+
response = chat(model='qwen3', messages=messages, tools=[get_temperature, get_conditions], think=True)
|
| 936 |
+
|
| 937 |
+
# add the assistant message to the messages
|
| 938 |
+
messages.append(response.message)
|
| 939 |
+
if response.message.tool_calls:
|
| 940 |
+
# process each tool call
|
| 941 |
+
for call in response.message.tool_calls:
|
| 942 |
+
# execute the appropriate tool
|
| 943 |
+
if call.function.name == 'get_temperature':
|
| 944 |
+
result = get_temperature(**call.function.arguments)
|
| 945 |
+
elif call.function.name == 'get_conditions':
|
| 946 |
+
result = get_conditions(**call.function.arguments)
|
| 947 |
+
else:
|
| 948 |
+
result = 'Unknown tool'
|
| 949 |
+
# add the tool result to the messages
|
| 950 |
+
messages.append({'role': 'tool', 'tool_name': call.function.name, 'content': str(result)})
|
| 951 |
+
|
| 952 |
+
# generate the final response
|
| 953 |
+
final_response = chat(model='qwen3', messages=messages, tools=[get_temperature, get_conditions], think=True)
|
| 954 |
+
print(final_response.message.content)
|
| 955 |
+
```
|
| 956 |
+
</Tab>
|
| 957 |
+
|
| 958 |
+
<Tab title="JavaScript">
|
| 959 |
+
```typescript theme={"system"}
|
| 960 |
+
import ollama from 'ollama'
|
| 961 |
+
|
| 962 |
+
function getTemperature(city: string): string {
|
| 963 |
+
const temperatures: { [key: string]: string } = {
|
| 964 |
+
"New York": "22°C",
|
| 965 |
+
"London": "15°C",
|
| 966 |
+
"Tokyo": "18°C"
|
| 967 |
+
}
|
| 968 |
+
return temperatures[city] || "Unknown"
|
| 969 |
+
}
|
| 970 |
+
|
| 971 |
+
function getConditions(city: string): string {
|
| 972 |
+
const conditions: { [key: string]: string } = {
|
| 973 |
+
"New York": "Partly cloudy",
|
| 974 |
+
"London": "Rainy",
|
| 975 |
+
"Tokyo": "Sunny"
|
| 976 |
+
}
|
| 977 |
+
return conditions[city] || "Unknown"
|
| 978 |
+
}
|
| 979 |
+
|
| 980 |
+
const tools = [
|
| 981 |
+
{
|
| 982 |
+
type: 'function',
|
| 983 |
+
function: {
|
| 984 |
+
name: 'get_temperature',
|
| 985 |
+
description: 'Get the current temperature for a city',
|
| 986 |
+
parameters: {
|
| 987 |
+
type: 'object',
|
| 988 |
+
required: ['city'],
|
| 989 |
+
properties: {
|
| 990 |
+
city: { type: 'string', description: 'The name of the city' },
|
| 991 |
+
},
|
| 992 |
+
},
|
| 993 |
+
},
|
| 994 |
+
},
|
| 995 |
+
{
|
| 996 |
+
type: 'function',
|
| 997 |
+
function: {
|
| 998 |
+
name: 'get_conditions',
|
| 999 |
+
description: 'Get the current weather conditions for a city',
|
| 1000 |
+
parameters: {
|
| 1001 |
+
type: 'object',
|
| 1002 |
+
required: ['city'],
|
| 1003 |
+
properties: {
|
| 1004 |
+
city: { type: 'string', description: 'The name of the city' },
|
| 1005 |
+
},
|
| 1006 |
+
},
|
| 1007 |
+
},
|
| 1008 |
+
}
|
| 1009 |
+
]
|
| 1010 |
+
|
| 1011 |
+
const messages = [{ role: 'user', content: 'What are the current weather conditions and temperature in New York and London?' }]
|
| 1012 |
+
|
| 1013 |
+
const response = await ollama.chat({
|
| 1014 |
+
model: 'qwen3',
|
| 1015 |
+
messages,
|
| 1016 |
+
tools,
|
| 1017 |
+
think: true
|
| 1018 |
+
})
|
| 1019 |
+
|
| 1020 |
+
// add the assistant message to the messages
|
| 1021 |
+
messages.push(response.message)
|
| 1022 |
+
if (response.message.tool_calls) {
|
| 1023 |
+
// process each tool call
|
| 1024 |
+
for (const call of response.message.tool_calls) {
|
| 1025 |
+
// execute the appropriate tool
|
| 1026 |
+
let result: string
|
| 1027 |
+
if (call.function.name === 'get_temperature') {
|
| 1028 |
+
const args = call.function.arguments as { city: string }
|
| 1029 |
+
result = getTemperature(args.city)
|
| 1030 |
+
} else if (call.function.name === 'get_conditions') {
|
| 1031 |
+
const args = call.function.arguments as { city: string }
|
| 1032 |
+
result = getConditions(args.city)
|
| 1033 |
+
} else {
|
| 1034 |
+
result = 'Unknown tool'
|
| 1035 |
+
}
|
| 1036 |
+
// add the tool result to the messages
|
| 1037 |
+
messages.push({ role: 'tool', tool_name: call.function.name, content: result })
|
| 1038 |
+
}
|
| 1039 |
+
|
| 1040 |
+
// generate the final response
|
| 1041 |
+
const finalResponse = await ollama.chat({ model: 'qwen3', messages, tools, think: true })
|
| 1042 |
+
console.log(finalResponse.message.content)
|
| 1043 |
+
}
|
| 1044 |
+
```
|
| 1045 |
+
</Tab>
|
| 1046 |
+
</Tabs>
|
| 1047 |
+
|
| 1048 |
+
## Multi-turn tool calling (Agent loop)
|
| 1049 |
+
|
| 1050 |
+
An agent loop allows the model to decide when to invoke tools and incorporate their results into its replies.
|
| 1051 |
+
|
| 1052 |
+
It also might help to tell the model that it is in a loop and can make multiple tool calls.
|
| 1053 |
+
|
| 1054 |
+
<Tabs>
|
| 1055 |
+
<Tab title="Python"
|
| 1056 |
+
```python theme={"system"}
|
| 1057 |
+
from ollama import chat, ChatResponse
|
| 1058 |
+
def add(a: int, b: int) -> int:
|
| 1059 |
+
"""Add two numbers"""
|
| 1060 |
+
"""
|
| 1061 |
+
Args:
|
| 1062 |
+
a: The first number
|
| 1063 |
+
b: The second number
|
| 1064 |
+
|
| 1065 |
+
Returns:
|
| 1066 |
+
The sum of the two numbers
|
| 1067 |
+
"""
|
| 1068 |
+
return a + b
|
| 1069 |
+
|
| 1070 |
+
|
| 1071 |
+
def multiply(a: int, b: int) -> int:
|
| 1072 |
+
"""Multiply two numbers"""
|
| 1073 |
+
"""
|
| 1074 |
+
Args:
|
| 1075 |
+
a: The first number
|
| 1076 |
+
b: The second number
|
| 1077 |
+
|
| 1078 |
+
Returns:
|
| 1079 |
+
The product of the two numbers
|
| 1080 |
+
"""
|
| 1081 |
+
return a * b
|
| 1082 |
+
|
| 1083 |
+
|
| 1084 |
+
available_functions = {
|
| 1085 |
+
'add': add,
|
| 1086 |
+
'multiply': multiply,
|
| 1087 |
+
}
|
| 1088 |
+
|
| 1089 |
+
messages = [{'role': 'user', 'content': 'What is (11434+12341)*412?'}]
|
| 1090 |
+
while True:
|
| 1091 |
+
response: ChatResponse = chat(
|
| 1092 |
+
model='qwen3',
|
| 1093 |
+
messages=messages,
|
| 1094 |
+
tools=[add, multiply],
|
| 1095 |
+
think=True,
|
| 1096 |
+
)
|
| 1097 |
+
messages.append(response.message)
|
| 1098 |
+
print("Thinking: ", response.message.thinking)
|
| 1099 |
+
print("Content: ", response.message.content)
|
| 1100 |
+
if response.message.tool_calls:
|
| 1101 |
+
for tc in response.message.tool_calls:
|
| 1102 |
+
if tc.function.name in available_functions:
|
| 1103 |
+
print(f"Calling {tc.function.name} with arguments {tc.function.arguments}")
|
| 1104 |
+
result = available_functions[tc.function.name](**tc.function.arguments)
|
| 1105 |
+
print(f"Result: {result}")
|
| 1106 |
+
# add the tool result to the messages
|
| 1107 |
+
messages.append({'role': 'tool', 'tool_name': tc.function.name, 'content': str(result)})
|
| 1108 |
+
else:
|
| 1109 |
+
# end the loop when there are no more tool calls
|
| 1110 |
+
break
|
| 1111 |
+
# continue the loop with the updated messages
|
| 1112 |
+
```
|
| 1113 |
+
</Tab>
|
| 1114 |
+
|
| 1115 |
+
<Tab title="JavaScript">
|
| 1116 |
+
```typescript theme={"system"}
|
| 1117 |
+
import ollama from 'ollama'
|
| 1118 |
+
|
| 1119 |
+
type ToolName = 'add' | 'multiply'
|
| 1120 |
+
|
| 1121 |
+
function add(a: number, b: number): number {
|
| 1122 |
+
return a + b
|
| 1123 |
+
}
|
| 1124 |
+
|
| 1125 |
+
function multiply(a: number, b: number): number {
|
| 1126 |
+
return a * b
|
| 1127 |
+
}
|
| 1128 |
+
|
| 1129 |
+
const availableFunctions: Record<ToolName, (a: number, b: number) => number> = {
|
| 1130 |
+
add,
|
| 1131 |
+
multiply,
|
| 1132 |
+
}
|
| 1133 |
+
|
| 1134 |
+
const tools = [
|
| 1135 |
+
{
|
| 1136 |
+
type: 'function',
|
| 1137 |
+
function: {
|
| 1138 |
+
name: 'add',
|
| 1139 |
+
description: 'Add two numbers',
|
| 1140 |
+
parameters: {
|
| 1141 |
+
type: 'object',
|
| 1142 |
+
required: ['a', 'b'],
|
| 1143 |
+
properties: {
|
| 1144 |
+
a: { type: 'integer', description: 'The first number' },
|
| 1145 |
+
b: { type: 'integer', description: 'The second number' },
|
| 1146 |
+
},
|
| 1147 |
+
},
|
| 1148 |
+
},
|
| 1149 |
+
},
|
| 1150 |
+
{
|
| 1151 |
+
type: 'function',
|
| 1152 |
+
function: {
|
| 1153 |
+
name: 'multiply',
|
| 1154 |
+
description: 'Multiply two numbers',
|
| 1155 |
+
parameters: {
|
| 1156 |
+
type: 'object',
|
| 1157 |
+
required: ['a', 'b'],
|
| 1158 |
+
properties: {
|
| 1159 |
+
a: { type: 'integer', description: 'The first number' },
|
| 1160 |
+
b: { type: 'integer', description: 'The second number' },
|
| 1161 |
+
},
|
| 1162 |
+
},
|
| 1163 |
+
},
|
| 1164 |
+
},
|
| 1165 |
+
]
|
| 1166 |
+
|
| 1167 |
+
async function agentLoop() {
|
| 1168 |
+
const messages = [{ role: 'user', content: 'What is (11434+12341)*412?' }]
|
| 1169 |
+
|
| 1170 |
+
while (true) {
|
| 1171 |
+
const response = await ollama.chat({
|
| 1172 |
+
model: 'qwen3',
|
| 1173 |
+
messages,
|
| 1174 |
+
tools,
|
| 1175 |
+
think: true,
|
| 1176 |
+
})
|
| 1177 |
+
|
| 1178 |
+
messages.push(response.message)
|
| 1179 |
+
console.log('Thinking:', response.message.thinking)
|
| 1180 |
+
console.log('Content:', response.message.content)
|
| 1181 |
+
|
| 1182 |
+
const toolCalls = response.message.tool_calls ?? []
|
| 1183 |
+
if (toolCalls.length) {
|
| 1184 |
+
for (const call of toolCalls) {
|
| 1185 |
+
const fn = availableFunctions[call.function.name as ToolName]
|
| 1186 |
+
if (!fn) {
|
| 1187 |
+
continue
|
| 1188 |
+
}
|
| 1189 |
+
|
| 1190 |
+
const args = call.function.arguments as { a: number; b: number }
|
| 1191 |
+
console.log(`Calling ${call.function.name} with arguments`, args)
|
| 1192 |
+
const result = fn(args.a, args.b)
|
| 1193 |
+
console.log(`Result: ${result}`)
|
| 1194 |
+
messages.push({ role: 'tool', tool_name: call.function.name, content: String(result) })
|
| 1195 |
+
}
|
| 1196 |
+
} else {
|
| 1197 |
+
break
|
| 1198 |
+
}
|
| 1199 |
+
}
|
| 1200 |
+
}
|
| 1201 |
+
|
| 1202 |
+
agentLoop().catch(console.error)
|
| 1203 |
+
```
|
| 1204 |
+
</Tab>
|
| 1205 |
+
</Tabs>
|
| 1206 |
+
|
| 1207 |
+
## Tool calling with streaming
|
| 1208 |
+
|
| 1209 |
+
When streaming, gather every chunk of `thinking`, `content`, and `tool_calls`, then return those fields together with any tool results in the follow-up request.
|
| 1210 |
+
|
| 1211 |
+
<Tabs>
|
| 1212 |
+
<Tab title="Python">
|
| 1213 |
+
|
| 1214 |
+
```python theme={"system"}
|
| 1215 |
+
from ollama import chat
|
| 1216 |
+
def get_temperature(city: str) -> str:
|
| 1217 |
+
"""Get the current temperature for a city
|
| 1218 |
+
|
| 1219 |
+
Args:
|
| 1220 |
+
city: The name of the city
|
| 1221 |
+
|
| 1222 |
+
Returns:
|
| 1223 |
+
The current temperature for the city
|
| 1224 |
+
"""
|
| 1225 |
+
temperatures = {
|
| 1226 |
+
'New York': '22°C',
|
| 1227 |
+
'London': '15°C',
|
| 1228 |
+
}
|
| 1229 |
+
return temperatures.get(city, 'Unknown')
|
| 1230 |
+
|
| 1231 |
+
|
| 1232 |
+
messages = [{'role': 'user', 'content': "What is the temperature in New York?"}]
|
| 1233 |
+
|
| 1234 |
+
while True:
|
| 1235 |
+
stream = chat(
|
| 1236 |
+
model='qwen3',
|
| 1237 |
+
messages=messages,
|
| 1238 |
+
tools=[get_temperature],
|
| 1239 |
+
stream=True,
|
| 1240 |
+
think=True,
|
| 1241 |
+
)
|
| 1242 |
+
|
| 1243 |
+
thinking = ''
|
| 1244 |
+
content = ''
|
| 1245 |
+
tool_calls = []
|
| 1246 |
+
|
| 1247 |
+
done_thinking = False
|
| 1248 |
+
# accumulate the partial fields
|
| 1249 |
+
for chunk in stream:
|
| 1250 |
+
if chunk.message.thinking:
|
| 1251 |
+
thinking += chunk.message.thinking
|
| 1252 |
+
print(chunk.message.thinking, end='', flush=True)
|
| 1253 |
+
if chunk.message.content:
|
| 1254 |
+
if not done_thinking:
|
| 1255 |
+
done_thinking = True
|
| 1256 |
+
print('\n')
|
| 1257 |
+
content += chunk.message.content
|
| 1258 |
+
print(chunk.message.content, end='', flush=True)
|
| 1259 |
+
if chunk.message.tool_calls:
|
| 1260 |
+
tool_calls.extend(chunk.message.tool_calls)
|
| 1261 |
+
print(chunk.message.tool_calls)
|
| 1262 |
+
|
| 1263 |
+
# append accumulated fields to the messages
|
| 1264 |
+
if thinking or content or tool_calls:
|
| 1265 |
+
messages.append({'role': 'assistant', 'thinking': thinking, 'content': content, 'tool_calls': tool_calls})
|
| 1266 |
+
|
| 1267 |
+
if not tool_calls:
|
| 1268 |
+
break
|
| 1269 |
+
|
| 1270 |
+
for call in tool_calls:
|
| 1271 |
+
if call.function.name == 'get_temperature':
|
| 1272 |
+
result = get_temperature(**call.function.arguments)
|
| 1273 |
+
else:
|
| 1274 |
+
result = 'Unknown tool'
|
| 1275 |
+
messages.append({'role': 'tool', 'tool_name': call.function.name, 'content': result})
|
| 1276 |
+
```
|
| 1277 |
+
</Tab>
|
| 1278 |
+
|
| 1279 |
+
<Tab title="JavaScript">
|
| 1280 |
+
|
| 1281 |
+
```typescript theme={"system"}
|
| 1282 |
+
import ollama from 'ollama'
|
| 1283 |
+
|
| 1284 |
+
function getTemperature(city: string): string {
|
| 1285 |
+
const temperatures: Record<string, string> = {
|
| 1286 |
+
'New York': '22°C',
|
| 1287 |
+
'London': '15°C',
|
| 1288 |
+
}
|
| 1289 |
+
return temperatures[city] ?? 'Unknown'
|
| 1290 |
+
}
|
| 1291 |
+
|
| 1292 |
+
const getTemperatureTool = {
|
| 1293 |
+
type: 'function',
|
| 1294 |
+
function: {
|
| 1295 |
+
name: 'get_temperature',
|
| 1296 |
+
description: 'Get the current temperature for a city',
|
| 1297 |
+
parameters: {
|
| 1298 |
+
type: 'object',
|
| 1299 |
+
required: ['city'],
|
| 1300 |
+
properties: {
|
| 1301 |
+
city: { type: 'string', description: 'The name of the city' },
|
| 1302 |
+
},
|
| 1303 |
+
},
|
| 1304 |
+
},
|
| 1305 |
+
}
|
| 1306 |
+
|
| 1307 |
+
async function agentLoop() {
|
| 1308 |
+
const messages = [{ role: 'user', content: "What is the temperature in New York?" }]
|
| 1309 |
+
|
| 1310 |
+
while (true) {
|
| 1311 |
+
const stream = await ollama.chat({
|
| 1312 |
+
model: 'qwen3',
|
| 1313 |
+
messages,
|
| 1314 |
+
tools: [getTemperatureTool],
|
| 1315 |
+
stream: true,
|
| 1316 |
+
think: true,
|
| 1317 |
+
})
|
| 1318 |
+
|
| 1319 |
+
let thinking = ''
|
| 1320 |
+
let content = ''
|
| 1321 |
+
const toolCalls: any[] = []
|
| 1322 |
+
let doneThinking = false
|
| 1323 |
+
|
| 1324 |
+
for await (const chunk of stream) {
|
| 1325 |
+
if (chunk.message.thinking) {
|
| 1326 |
+
thinking += chunk.message.thinking
|
| 1327 |
+
process.stdout.write(chunk.message.thinking)
|
| 1328 |
+
}
|
| 1329 |
+
if (chunk.message.content) {
|
| 1330 |
+
if (!doneThinking) {
|
| 1331 |
+
doneThinking = true
|
| 1332 |
+
process.stdout.write('\n')
|
| 1333 |
+
}
|
| 1334 |
+
content += chunk.message.content
|
| 1335 |
+
process.stdout.write(chunk.message.content)
|
| 1336 |
+
}
|
| 1337 |
+
if (chunk.message.tool_calls?.length) {
|
| 1338 |
+
toolCalls.push(...chunk.message.tool_calls)
|
| 1339 |
+
console.log(chunk.message.tool_calls)
|
| 1340 |
+
}
|
| 1341 |
+
}
|
| 1342 |
+
|
| 1343 |
+
if (thinking || content || toolCalls.length) {
|
| 1344 |
+
messages.push({ role: 'assistant', thinking, content, tool_calls: toolCalls } as any)
|
| 1345 |
+
}
|
| 1346 |
+
|
| 1347 |
+
if (!toolCalls.length) {
|
| 1348 |
+
break
|
| 1349 |
+
}
|
| 1350 |
+
|
| 1351 |
+
for (const call of toolCalls) {
|
| 1352 |
+
if (call.function.name === 'get_temperature') {
|
| 1353 |
+
const args = call.function.arguments as { city: string }
|
| 1354 |
+
const result = getTemperature(args.city)
|
| 1355 |
+
messages.push({ role: 'tool', tool_name: call.function.name, content: result } )
|
| 1356 |
+
} else {
|
| 1357 |
+
messages.push({ role: 'tool', tool_name: call.function.name, content: 'Unknown tool' } )
|
| 1358 |
+
}
|
| 1359 |
+
}
|
| 1360 |
+
}
|
| 1361 |
+
}
|
| 1362 |
+
|
| 1363 |
+
agentLoop().catch(console.error)
|
| 1364 |
+
```
|
| 1365 |
+
</Tab>
|
| 1366 |
+
</Tabs>
|
| 1367 |
+
|
| 1368 |
+
This loop streams the assistant response, accumulates partial fields, passes them back together, and appends the tool results so the model can complete its answer.
|
| 1369 |
+
|
| 1370 |
+
## Using functions as tools with Ollama Python SDK
|
| 1371 |
+
|
| 1372 |
+
The Python SDK automatically parses functions as a tool schema so we can pass them directly.
|
| 1373 |
+
Schemas can still be passed if needed.
|
| 1374 |
+
|
| 1375 |
+
````pyx theme={"system"}
|
| 1376 |
+
from ollama import chat
|
| 1377 |
+
|
| 1378 |
+
def get_temperature(city: str) -> str:
|
| 1379 |
+
"""Get the current temperature for a city
|
| 1380 |
+
|
| 1381 |
+
Args:
|
| 1382 |
+
city: The name of the city
|
| 1383 |
+
|
| 1384 |
+
Returns:
|
| 1385 |
+
The current temperature for the city
|
| 1386 |
+
"""
|
| 1387 |
+
temperatures = {
|
| 1388 |
+
'New York': '22°C',
|
| 1389 |
+
'London': '15°C',
|
| 1390 |
+
}
|
| 1391 |
+
return temperatures.get(city, 'Unknown')
|
| 1392 |
+
|
| 1393 |
+
available_functions = {
|
| 1394 |
+
'get_temperature': get_temperature,
|
| 1395 |
+
}
|
| 1396 |
+
# directly pass the function as part of the tools list
|
| 1397 |
+
response = chat(model='qwen3', messages=messages, tools=available_functions.values(), think=True)
|
| 1398 |
+
``
|
| 1399 |
+
---
|
| 1400 |
+
|
| 1401 |
+
````
|
| 1402 |
+
Start Ollama:
|
| 1403 |
+
|
| 1404 |
+
```shell theme={"system"}
|
| 1405 |
+
ollama serve
|
| 1406 |
+
```
|
| 1407 |
+
|
| 1408 |
+
In another terminal, verify that Ollama is running:
|
| 1409 |
+
|
| 1410 |
+
```shell theme={"system"}
|
| 1411 |
+
ollama -v
|
| 1412 |
+
```
|
| 1413 |
+
|
| 1414 |
+
### AMD GPU install
|
| 1415 |
+
|
| 1416 |
+
If you have an AMD GPU, also download and extract the additional ROCm package:
|
| 1417 |
+
|
| 1418 |
+
```shell theme={"system"}
|
| 1419 |
+
curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tar.zst \
|
| 1420 |
+
| sudo tar x -C /usr
|
| 1421 |
+
```
|
| 1422 |
+
|
| 1423 |
+
### ARM64 install
|
| 1424 |
+
|
| 1425 |
+
Download and extract the ARM64-specific package:
|
| 1426 |
+
|
| 1427 |
+
```shell theme={"system"}
|
| 1428 |
+
curl -fsSL https://ollama.com/download/ollama-linux-arm64.tar.zst \
|
| 1429 |
+
| sudo tar x -C /usr
|
| 1430 |
+
```
|
| 1431 |
+
|
| 1432 |
+
### Adding Ollama as a startup service (recommended)
|
| 1433 |
+
|
| 1434 |
+
Create a user and group for Ollama:
|
| 1435 |
+
|
| 1436 |
+
```shell theme={"system"}
|
| 1437 |
+
sudo useradd -r -s /bin/false -U -m -d /usr/share/ollama ollama
|
| 1438 |
+
sudo usermod -a -G ollama $(whoami)
|
| 1439 |
+
```
|
| 1440 |
+
|
| 1441 |
+
Create a service file in `/etc/systemd/system/ollama.service`:
|
| 1442 |
+
|
| 1443 |
+
```ini theme={"system"}
|
| 1444 |
+
[Unit]
|
| 1445 |
+
Description=Ollama Service
|
| 1446 |
+
After=network-online.target
|
| 1447 |
+
|
| 1448 |
+
[Service]
|
| 1449 |
+
ExecStart=/usr/bin/ollama serve
|
| 1450 |
+
User=ollama
|
| 1451 |
+
Group=ollama
|
| 1452 |
+
Restart=always
|
| 1453 |
+
RestartSec=3
|
| 1454 |
+
Environment="PATH=$PATH"
|
| 1455 |
+
|
| 1456 |
+
[Install]
|
| 1457 |
+
WantedBy=multi-user.target
|
| 1458 |
+
```
|
| 1459 |
+
> ## Documentation Index
|
| 1460 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 1461 |
+
> Use this file to discover all available pages before exploring further.
|
| 1462 |
+
|
| 1463 |
+
# Embeddings
|
| 1464 |
+
|
| 1465 |
+
> Generate text embeddings for semantic search, retrieval, and RAG.
|
| 1466 |
+
|
| 1467 |
+
Embeddings turn text into numeric vectors you can store in a vector database, search with cosine similarity, or use in RAG pipelines. The vector length depends on the model (typically 384–1024 dimensions).
|
| 1468 |
+
|
| 1469 |
+
## Recommended models
|
| 1470 |
+
|
| 1471 |
+
* [embeddinggemma](https://ollama.com/library/embeddinggemma)
|
| 1472 |
+
* [qwen3-embedding](https://ollama.com/library/qwen3-embedding)
|
| 1473 |
+
* [all-minilm](https://ollama.com/library/all-minilm)
|
| 1474 |
+
|
| 1475 |
+
## Generate embeddings
|
| 1476 |
+
|
| 1477 |
+
<Tabs>
|
| 1478 |
+
<Tab title="CLI">
|
| 1479 |
+
Generate embeddings directly from the command line:
|
| 1480 |
+
|
| 1481 |
+
```shell theme={"system"}
|
| 1482 |
+
ollama run embeddinggemma "Hello world"
|
| 1483 |
+
```
|
| 1484 |
+
|
| 1485 |
+
You can also pipe text to generate embeddings:
|
| 1486 |
+
|
| 1487 |
+
```shell theme={"system"}
|
| 1488 |
+
echo "Hello world" | ollama run embeddinggemma
|
| 1489 |
+
```
|
| 1490 |
+
|
| 1491 |
+
Output is a JSON array.
|
| 1492 |
+
</Tab>
|
| 1493 |
+
|
| 1494 |
+
<Tab title="cURL">
|
| 1495 |
+
```shell theme={"system"}
|
| 1496 |
+
curl -X POST http://localhost:11434/api/embed \
|
| 1497 |
+
-H "Content-Type: application/json" \
|
| 1498 |
+
-d '{
|
| 1499 |
+
"model": "embeddinggemma",
|
| 1500 |
+
"input": "The quick brown fox jumps over the lazy dog."
|
| 1501 |
+
}'
|
| 1502 |
+
```
|
| 1503 |
+
</Tab>
|
| 1504 |
+
|
| 1505 |
+
<Tab title="Python">
|
| 1506 |
+
```python theme={"system"}
|
| 1507 |
+
import ollama
|
| 1508 |
+
|
| 1509 |
+
single = ollama.embed(
|
| 1510 |
+
model='embeddinggemma',
|
| 1511 |
+
input='The quick brown fox jumps over the lazy dog.'
|
| 1512 |
+
)
|
| 1513 |
+
print(len(single['embeddings'][0])) # vector length
|
| 1514 |
+
```
|
| 1515 |
+
</Tab>
|
| 1516 |
+
|
| 1517 |
+
<Tab title="JavaScript">
|
| 1518 |
+
|
| 1519 |
+
```javascript theme={"system"}
|
| 1520 |
+
import ollama from 'ollama'
|
| 1521 |
+
|
| 1522 |
+
const single = await ollama.embed({
|
| 1523 |
+
model: 'embeddinggemma',
|
| 1524 |
+
input: 'The quick brown fox jumps over the lazy dog.',
|
| 1525 |
+
})
|
| 1526 |
+
console.log(single.embeddings[0].length) // vector length
|
| 1527 |
+
```
|
| 1528 |
+
</Tab>
|
| 1529 |
+
</Tabs>
|
| 1530 |
+
|
| 1531 |
+
<Note>
|
| 1532 |
+
The `/api/embed` endpoint returns L2‑normalized (unit‑length) vectors.
|
| 1533 |
+
</Note>
|
| 1534 |
+
|
| 1535 |
+
## Generate a batch of embeddings
|
| 1536 |
+
|
| 1537 |
+
Pass an array of strings to `input`.
|
| 1538 |
+
|
| 1539 |
+
<Tabs>
|
| 1540 |
+
<Tab title="cURL">
|
| 1541 |
+
```shell theme={"system"}
|
| 1542 |
+
curl -X POST http://localhost:3000/api/embed \
|
| 1543 |
+
-H "Content-Type: application/json" \
|
| 1544 |
+
-d '{
|
| 1545 |
+
"model": "embeddinggemma",
|
| 1546 |
+
"input": [
|
| 1547 |
+
"First sentence",
|
| 1548 |
+
"Second sentence",
|
| 1549 |
+
"Third sentence"
|
| 1550 |
+
]
|
| 1551 |
+
}'
|
| 1552 |
+
```
|
| 1553 |
+
</Tab>
|
| 1554 |
+
|
| 1555 |
+
<Tab title="Python">
|
| 1556 |
+
```python theme={"system"}
|
| 1557 |
+
import ollama
|
| 1558 |
+
|
| 1559 |
+
batch = ollama.embed(
|
| 1560 |
+
model='embeddinggemma',
|
| 1561 |
+
input=[
|
| 1562 |
+
'The quick brown fox jumps over the lazy dog.',
|
| 1563 |
+
'The five boxing wizards jump quickly.',
|
| 1564 |
+
'Jackdaws love my big sphinx of quartz.',
|
| 1565 |
+
]
|
| 1566 |
+
)
|
| 1567 |
+
print(len(batch['embeddings'])) # number of vectors
|
| 1568 |
+
```
|
| 1569 |
+
</Tab>
|
| 1570 |
+
<Tab title="JavaScript">
|
| 1571 |
+
```javascript theme={"system"}
|
| 1572 |
+
import ollama from 'ollama'
|
| 1573 |
+
const batch = await ollama.embed({
|
| 1574 |
+
model: 'embeddinggemma',
|
| 1575 |
+
input: [
|
| 1576 |
+
'The quick brown fox jumps over the lazy dog.',
|
| 1577 |
+
'The five boxing wizards jump quickly.',
|
| 1578 |
+
'Jackdaws love my big sphinx of quartz.',
|
| 1579 |
+
],
|
| 1580 |
+
})
|
| 1581 |
+
console.log(batch.embeddings.length) // number of vectors
|
| 1582 |
+
```
|
| 1583 |
+
</Tab>
|
| 1584 |
+
</Tabs>
|
| 1585 |
+
|
| 1586 |
+
## Tips
|
| 1587 |
+
|
| 1588 |
+
* Use cosine similarity for most semantic search use cases.
|
| 1589 |
+
* Use the same embedding model for both indexing and querying.
|
| 1590 |
+
|
| 1591 |
+
Then start the service:
|
| 1592 |
+
|
| 1593 |
+
```shell theme={"system"}
|
| 1594 |
+
sudo systemctl daemon-reload
|
| 1595 |
+
sudo systemctl enable ollama
|
| 1596 |
+
```
|
| 1597 |
+
|
| 1598 |
+
### Install CUDA drivers (optional)
|
| 1599 |
+
|
| 1600 |
+
[Download and install](https://developer.nvidia.com/cuda-downloads) CUDA.
|
| 1601 |
+
|
| 1602 |
+
Verify that the drivers are installed by running the following command, which should print details about your GPU:
|
| 1603 |
+
|
| 1604 |
+
```shell theme={"system"}
|
| 1605 |
+
nvidia-smi
|
| 1606 |
+
```
|
| 1607 |
+
|
| 1608 |
+
### Install AMD ROCm drivers (optional)
|
| 1609 |
+
|
| 1610 |
+
[Download and Install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html) ROCm v7.
|
| 1611 |
+
|
| 1612 |
+
### Start Ollama
|
| 1613 |
+
|
| 1614 |
+
Start Ollama and verify it is running:
|
| 1615 |
+
|
| 1616 |
+
```shell theme={"system"}
|
| 1617 |
+
sudo systemctl start ollama
|
| 1618 |
+
sudo systemctl status ollama
|
| 1619 |
+
```
|
| 1620 |
+
|
| 1621 |
+
<Note>
|
| 1622 |
+
While AMD has contributed the `amdgpu` driver upstream to the official linux
|
| 1623 |
+
kernel source, the version is older and may not support all ROCm features. We
|
| 1624 |
+
recommend you install the latest driver from
|
| 1625 |
+
[https://www.amd.com/en/support/linux-drivers](https://www.amd.com/en/support/linux-drivers) for best support of your Radeon
|
| 1626 |
+
GPU.
|
| 1627 |
+
</Note>
|
| 1628 |
+
|
| 1629 |
+
## Customizing
|
| 1630 |
+
|
| 1631 |
+
To customize the installation of Ollama, you can edit the systemd service file or the environment variables by running:
|
| 1632 |
+
|
| 1633 |
+
```shell theme={"system"}
|
| 1634 |
+
sudo systemctl edit ollama
|
| 1635 |
+
```
|
| 1636 |
+
|
| 1637 |
+
Alternatively, create an override file manually in `/etc/systemd/system/ollama.service.d/override.conf`:
|
| 1638 |
+
|
| 1639 |
+
```ini theme={"system"}
|
| 1640 |
+
[Service]
|
| 1641 |
+
Environment="OLLAMA_DEBUG=1"
|
| 1642 |
+
```
|
| 1643 |
+
|
| 1644 |
+
## Updating
|
| 1645 |
+
|
| 1646 |
+
Update Ollama by running the install script again:
|
| 1647 |
+
|
| 1648 |
+
```shell theme={"system"}
|
| 1649 |
+
curl -fsSL https://ollama.com/install.sh | sh
|
| 1650 |
+
```
|
| 1651 |
+
|
| 1652 |
+
Or by re-downloading Ollama:
|
| 1653 |
+
|
| 1654 |
+
```shell theme={"system"}
|
| 1655 |
+
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \
|
| 1656 |
+
| sudo tar x -C /usr
|
| 1657 |
+
```
|
| 1658 |
+
|
| 1659 |
+
## Installing specific versions
|
| 1660 |
+
|
| 1661 |
+
Use `OLLAMA_VERSION` environment variable with the install script to install a specific version of Ollama, including pre-releases. You can find the version numbers in the [releases page](https://github.com/ollama/ollama/releases).
|
| 1662 |
+
|
| 1663 |
+
For example:
|
| 1664 |
+
|
| 1665 |
+
```shell theme={"system"}
|
| 1666 |
+
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.5.7 sh
|
| 1667 |
+
```
|
| 1668 |
+
|
| 1669 |
+
## Viewing logs
|
| 1670 |
+
|
| 1671 |
+
To view logs of Ollama running as a startup service, run:
|
| 1672 |
+
|
| 1673 |
+
```shell theme={"system"}
|
| 1674 |
+
journalctl -e -u ollama
|
| 1675 |
+
```
|
| 1676 |
+
|
| 1677 |
+
> ## Documentation Index
|
| 1678 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 1679 |
+
> Use this file to discover all available pages before exploring further.
|
| 1680 |
+
|
| 1681 |
+
# Modelfile Reference
|
| 1682 |
+
|
| 1683 |
+
A Modelfile is the blueprint to create and share customized models using Ollama.
|
| 1684 |
+
|
| 1685 |
+
## Table of Contents
|
| 1686 |
+
|
| 1687 |
+
* [Format](#format)
|
| 1688 |
+
* [Examples](#examples)
|
| 1689 |
+
* [Instructions](#instructions)
|
| 1690 |
+
* [FROM (Required)](#from-required)
|
| 1691 |
+
* [Build from existing model](#build-from-existing-model)
|
| 1692 |
+
* [Build from a Safetensors model](#build-from-a-safetensors-model)
|
| 1693 |
+
* [Build from a GGUF file](#build-from-a-gguf-file)
|
| 1694 |
+
* [PARAMETER](#parameter)
|
| 1695 |
+
* [Valid Parameters and Values](#valid-parameters-and-values)
|
| 1696 |
+
* [TEMPLATE](#template)
|
| 1697 |
+
* [Template Variables](#template-variables)
|
| 1698 |
+
* [SYSTEM](#system)
|
| 1699 |
+
* [ADAPTER](#adapter)
|
| 1700 |
+
* [LICENSE](#license)
|
| 1701 |
+
* [MESSAGE](#message)
|
| 1702 |
+
* [Notes](#notes)
|
| 1703 |
+
|
| 1704 |
+
## Format
|
| 1705 |
+
|
| 1706 |
+
The format of the `Modelfile`:
|
| 1707 |
+
|
| 1708 |
+
```
|
| 1709 |
+
# comment
|
| 1710 |
+
INSTRUCTION arguments
|
| 1711 |
+
```
|
| 1712 |
+
|
| 1713 |
+
| Instruction | Description |
|
| 1714 |
+
| ----------------------------------- | -------------------------------------------------------------- |
|
| 1715 |
+
| [`FROM`](#from-required) (required) | Defines the base model to use. |
|
| 1716 |
+
| [`PARAMETER`](#parameter) | Sets the parameters for how Ollama will run the model. |
|
| 1717 |
+
| [`TEMPLATE`](#template) | The full prompt template to be sent to the model. |
|
| 1718 |
+
| [`SYSTEM`](#system) | Specifies the system message that will be set in the template. |
|
| 1719 |
+
| [`ADAPTER`](#adapter) | Defines the (Q)LoRA adapters to apply to the model. |
|
| 1720 |
+
| [`LICENSE`](#license) | Specifies the legal license. |
|
| 1721 |
+
| [`MESSAGE`](#message) | Specify message history. |
|
| 1722 |
+
| [`REQUIRES`](#requires) | Specify the minimum version of Ollama required by the model. |
|
| 1723 |
+
|
| 1724 |
+
## Examples
|
| 1725 |
+
|
| 1726 |
+
### Basic `Modelfile`
|
| 1727 |
+
|
| 1728 |
+
An example of a `Modelfile` creating a mario blueprint:
|
| 1729 |
+
|
| 1730 |
+
```Modelfile
|
| 1731 |
+
FROM llama3.2
|
| 1732 |
+
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
| 1733 |
+
PARAMETER temperature 1
|
| 1734 |
+
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
| 1735 |
+
PARAMETER num_ctx 4096
|
| 1736 |
+
|
| 1737 |
+
# sets a custom system message to specify the behavior of the chat assistant
|
| 1738 |
+
SYSTEM You are Mario from super mario bros, acting as an assistant.
|
| 1739 |
+
```
|
| 1740 |
+
|
| 1741 |
+
To use this:
|
| 1742 |
+
|
| 1743 |
+
1. Save it as a file (e.g. `Modelfile`)
|
| 1744 |
+
2. `ollama create choose-a-model-name -f <location of the file e.g. ./Modelfile>`
|
| 1745 |
+
3. `ollama run choose-a-model-name`
|
| 1746 |
+
4. Start using the model!
|
| 1747 |
+
|
| 1748 |
+
To view the Modelfile of a given model, use the `ollama show --modelfile` command.
|
| 1749 |
+
|
| 1750 |
+
```shell theme={"system"}
|
| 1751 |
+
ollama show --modelfile llama3.2
|
| 1752 |
+
```
|
| 1753 |
+
|
| 1754 |
+
```cmake
|
| 1755 |
+
# Modelfile generated by "ollama show"
|
| 1756 |
+
# To build a new Modelfile based on this one, replace the FROM line with:
|
| 1757 |
+
# FROM llama3.2:latest
|
| 1758 |
+
FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
|
| 1759 |
+
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
| 1760 |
+
|
| 1761 |
+
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
|
| 1762 |
+
|
| 1763 |
+
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
|
| 1764 |
+
|
| 1765 |
+
{{ .Response }}<|eot_id|>"""
|
| 1766 |
+
PARAMETER stop "<|start_header_id|>"
|
| 1767 |
+
PARAMETER stop "<|end_header_id|>"
|
| 1768 |
+
PARAMETER stop "<|eot_id|>"
|
| 1769 |
+
PARAMETER stop "<|reserved_special_token"
|
| 1770 |
+
```
|
| 1771 |
+
|
| 1772 |
+
## Instructions
|
| 1773 |
+
|
| 1774 |
+
### FROM (Required)
|
| 1775 |
+
|
| 1776 |
+
The `FROM` instruction defines the base model to use when creating a model.
|
| 1777 |
+
|
| 1778 |
+
```
|
| 1779 |
+
FROM <model name>:<tag>
|
| 1780 |
+
```
|
| 1781 |
+
|
| 1782 |
+
#### Build from existing model
|
| 1783 |
+
|
| 1784 |
+
```
|
| 1785 |
+
FROM llama3.2
|
| 1786 |
+
```
|
| 1787 |
+
|
| 1788 |
+
<Card title="Base Models" href="https://github.com/ollama/ollama#model-library">
|
| 1789 |
+
A list of available base models
|
| 1790 |
+
</Card>
|
| 1791 |
+
|
| 1792 |
+
<Card title="Base Models" href="https://ollama.com/library">
|
| 1793 |
+
Additional models can be found at
|
| 1794 |
+
</Card>
|
| 1795 |
+
|
| 1796 |
+
#### Build from a Safetensors model
|
| 1797 |
+
|
| 1798 |
+
```
|
| 1799 |
+
FROM <model directory>
|
| 1800 |
+
```
|
| 1801 |
+
|
| 1802 |
+
The model directory should contain the Safetensors weights for a supported architecture.
|
| 1803 |
+
|
| 1804 |
+
Currently supported model architectures:
|
| 1805 |
+
|
| 1806 |
+
* Llama (including Llama 2, Llama 3, Llama 3.1, and Llama 3.2)
|
| 1807 |
+
* Mistral (including Mistral 1, Mistral 2, and Mixtral)
|
| 1808 |
+
* Gemma (including Gemma 1 and Gemma 2)
|
| 1809 |
+
* Phi3
|
| 1810 |
+
|
| 1811 |
+
#### Build from a GGUF file
|
| 1812 |
+
|
| 1813 |
+
```
|
| 1814 |
+
FROM ./ollama-model.gguf
|
| 1815 |
+
```
|
| 1816 |
+
|
| 1817 |
+
The GGUF file location should be specified as an absolute path or relative to the `Modelfile` location.
|
| 1818 |
+
|
| 1819 |
+
### PARAMETER
|
| 1820 |
+
|
| 1821 |
+
The `PARAMETER` instruction defines a parameter that can be set when the model is run.
|
| 1822 |
+
|
| 1823 |
+
```
|
| 1824 |
+
PARAMETER <parameter> <parametervalue>
|
| 1825 |
+
```
|
| 1826 |
+
|
| 1827 |
+
#### Valid Parameters and Values
|
| 1828 |
+
|
| 1829 |
+
| Parameter | Description | Value Type | Example Usage |
|
| 1830 |
+
| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | -------------------- |
|
| 1831 |
+
| num\_ctx | Sets the size of the context window used to generate the next token. (Default: 2048) | int | num\_ctx 4096 |
|
| 1832 |
+
| repeat\_last\_n | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num\_ctx) | int | repeat\_last\_n 64 |
|
| 1833 |
+
| repeat\_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) | float | repeat\_penalty 1.1 |
|
| 1834 |
+
| temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 |
|
| 1835 |
+
| seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) | int | seed 42 |
|
| 1836 |
+
| stop | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile. | string | stop "AI assistant:" |
|
| 1837 |
+
| num\_predict | Maximum number of tokens to predict when generating text. (Default: -1, infinite generation) | int | num\_predict 42 |
|
| 1838 |
+
| top\_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top\_k 40 |
|
| 1839 |
+
| top\_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top\_p 0.9 |
|
| 1840 |
+
| min\_p | Alternative to the top*p, and aims to ensure a balance of quality and variety. The parameter \_p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float | min\_p 0.05 |
|
| 1841 |
+
|
| 1842 |
+
### TEMPLATE
|
| 1843 |
+
|
| 1844 |
+
`TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system message, a user's message and the response from the model. Note: syntax may be model specific. Templates use Go [template syntax](https://pkg.go.dev/text/template).
|
| 1845 |
+
|
| 1846 |
+
#### Template Variables
|
| 1847 |
+
|
| 1848 |
+
| Variable | Description |
|
| 1849 |
+
| ----------------- | --------------------------------------------------------------------------------------------- |
|
| 1850 |
+
| `{{ .System }}` | The system message used to specify custom behavior. |
|
| 1851 |
+
| `{{ .Prompt }}` | The user prompt message. |
|
| 1852 |
+
| `{{ .Response }}` | The response from the model. When generating a response, text after this variable is omitted. |
|
| 1853 |
+
|
| 1854 |
+
```
|
| 1855 |
+
TEMPLATE """{{ if .System }}<|im_start|>system
|
| 1856 |
+
{{ .System }}<|im_end|>
|
| 1857 |
+
{{ end }}{{ if .Prompt }}<|im_start|>user
|
| 1858 |
+
{{ .Prompt }}<|im_end|>
|
| 1859 |
+
{{ end }}<|im_start|>assistant
|
| 1860 |
+
"""
|
| 1861 |
+
```
|
| 1862 |
+
|
| 1863 |
+
### SYSTEM
|
| 1864 |
+
|
| 1865 |
+
The `SYSTEM` instruction specifies the system message to be used in the template, if applicable.
|
| 1866 |
+
|
| 1867 |
+
```
|
| 1868 |
+
SYSTEM """<system message>"""
|
| 1869 |
+
```
|
| 1870 |
+
|
| 1871 |
+
### ADAPTER
|
| 1872 |
+
|
| 1873 |
+
The `ADAPTER` instruction specifies a fine tuned LoRA adapter that should apply to the base model. The value of the adapter should be an absolute path or a path relative to the Modelfile. The base model should be specified with a `FROM` instruction. If the base model is not the same as the base model that the adapter was tuned from the behaviour will be erratic.
|
| 1874 |
+
|
| 1875 |
+
#### Safetensor adapter
|
| 1876 |
+
|
| 1877 |
+
```
|
| 1878 |
+
ADAPTER <path to safetensor adapter>
|
| 1879 |
+
```
|
| 1880 |
+
|
| 1881 |
+
Currently supported Safetensor adapters:
|
| 1882 |
+
|
| 1883 |
+
* Llama (including Llama 2, Llama 3, and Llama 3.1)
|
| 1884 |
+
* Mistral (including Mistral 1, Mistral 2, and Mixtral)
|
| 1885 |
+
* Gemma (including Gemma 1 and Gemma 2)
|
| 1886 |
+
|
| 1887 |
+
#### GGUF adapter
|
| 1888 |
+
|
| 1889 |
+
```
|
| 1890 |
+
ADAPTER ./ollama-lora.gguf
|
| 1891 |
+
```
|
| 1892 |
+
|
| 1893 |
+
### LICENSE
|
| 1894 |
+
|
| 1895 |
+
The `LICENSE` instruction allows you to specify the legal license under which the model used with this Modelfile is shared or distributed.
|
| 1896 |
+
|
| 1897 |
+
```
|
| 1898 |
+
LICENSE """
|
| 1899 |
+
<license text>
|
| 1900 |
+
"""
|
| 1901 |
+
```
|
| 1902 |
+
|
| 1903 |
+
### MESSAGE
|
| 1904 |
+
|
| 1905 |
+
The `MESSAGE` instruction allows you to specify a message history for the model to use when responding. Use multiple iterations of the MESSAGE command to build up a conversation which will guide the model to answer in a similar way.
|
| 1906 |
+
|
| 1907 |
+
```
|
| 1908 |
+
MESSAGE <role> <message>
|
| 1909 |
+
```
|
| 1910 |
+
|
| 1911 |
+
#### Valid roles
|
| 1912 |
+
|
| 1913 |
+
| Role | Description |
|
| 1914 |
+
| --------- | ------------------------------------------------------------ |
|
| 1915 |
+
| system | Alternate way of providing the SYSTEM message for the model. |
|
| 1916 |
+
| user | An example message of what the user could have asked. |
|
| 1917 |
+
| assistant | An example message of how the model should respond. |
|
| 1918 |
+
|
| 1919 |
+
#### Example conversation
|
| 1920 |
+
|
| 1921 |
+
```
|
| 1922 |
+
MESSAGE user Is Toronto in Canada?
|
| 1923 |
+
MESSAGE assistant yes
|
| 1924 |
+
MESSAGE user Is Sacramento in Canada?
|
| 1925 |
+
MESSAGE assistant no
|
| 1926 |
+
MESSAGE user Is Ontario in Canada?
|
| 1927 |
+
MESSAGE assistant yes
|
| 1928 |
+
```
|
| 1929 |
+
|
| 1930 |
+
### REQUIRES
|
| 1931 |
+
|
| 1932 |
+
The `REQUIRES` instruction allows you to specify the minimum version of Ollama required by the model.
|
| 1933 |
+
|
| 1934 |
+
```
|
| 1935 |
+
REQUIRES <version>
|
| 1936 |
+
``
|
| 1937 |
+
|
| 1938 |
+
The version should be a valid Ollama version (e.g. 0.14.0).
|
| 1939 |
+
|
| 1940 |
+
## Notes
|
| 1941 |
+
|
| 1942 |
+
* the **`Modelfile` is not case sensitive**. In the examples, uppercase instructions are used to make it easier to distinguish it from arguments.
|
| 1943 |
+
* Instructions can be in any order. In the examples, the `FROM` instruction is first to keep it easily readable.
|
| 1944 |
+
|
| 1945 |
+
[1]: https://ollama.com/library
|
| 1946 |
+
|
| 1947 |
+
|
| 1948 |
+
Remove the ollama service:
|
| 1949 |
+
|
| 1950 |
+
```shell theme={"system"}
|
| 1951 |
+
sudo systemctl stop ollama
|
| 1952 |
+
sudo systemctl disable ollama
|
| 1953 |
+
sudo rm /etc/systemd/system/ollama.service
|
| 1954 |
+
```
|
| 1955 |
+
|
| 1956 |
+
Remove ollama libraries from your lib directory (either `/usr/local/lib`, `/usr/lib`, or `/lib`):
|
| 1957 |
+
|
| 1958 |
+
```shell theme={"system"}
|
| 1959 |
+
sudo rm -r $(which ollama | tr 'bin' 'lib')
|
| 1960 |
+
```
|
| 1961 |
+
|
| 1962 |
+
Remove the ollama binary from your bin directory (either `/usr/local/bin`, `/usr/bin`, or `/bin`):
|
| 1963 |
+
|
| 1964 |
+
```shell theme={"system"}
|
| 1965 |
+
sudo rm $(which ollama)
|
| 1966 |
+
```
|
| 1967 |
+
|
| 1968 |
+
Remove the downloaded models and Ollama service user and group:
|
| 1969 |
+
|
| 1970 |
+
```shell theme={"system"}
|
| 1971 |
+
sudo userdel ollama
|
| 1972 |
+
sudo groupdel ollama
|
| 1973 |
+
sudo rm -r /usr/share/ollama
|
| 1974 |
+
```
|
| 1975 |
+
|
| 1976 |
+
|
| 1977 |
+
|
| 1978 |
+
> ## Documentation Index
|
| 1979 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 1980 |
+
> Use this file to discover all available pages before exploring further.
|
| 1981 |
+
|
| 1982 |
+
# Structured Outputs
|
| 1983 |
+
|
| 1984 |
+
Structured outputs let you enforce a JSON schema on model responses so you can reliably extract structured data, describe images, or keep every reply consistent.
|
| 1985 |
+
|
| 1986 |
+
## Generating structured JSON
|
| 1987 |
+
|
| 1988 |
+
<Tabs>
|
| 1989 |
+
<Tab title="cURL">
|
| 1990 |
+
```shell theme={"system"}
|
| 1991 |
+
curl -X POST http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
|
| 1992 |
+
"model": "gpt-oss",
|
| 1993 |
+
"messages": [{"role": "user", "content": "Tell me about Canada in one line"}],
|
| 1994 |
+
"stream": false,
|
| 1995 |
+
"format": "json"
|
| 1996 |
+
}'
|
| 1997 |
+
```
|
| 1998 |
+
</Tab>
|
| 1999 |
+
|
| 2000 |
+
<Tab title="Python">
|
| 2001 |
+
```python theme={"system"}
|
| 2002 |
+
from ollama import chat
|
| 2003 |
+
|
| 2004 |
+
response = chat(
|
| 2005 |
+
model='gpt-oss',
|
| 2006 |
+
messages=[{'role': 'user', 'content': 'Tell me about Canada.'}],
|
| 2007 |
+
format='json'
|
| 2008 |
+
)
|
| 2009 |
+
print(response.message.content)
|
| 2010 |
+
```
|
| 2011 |
+
</Tab>
|
| 2012 |
+
|
| 2013 |
+
<Tab title="JavaScript">
|
| 2014 |
+
```javascript theme={"system"}
|
| 2015 |
+
import ollama from 'ollama'
|
| 2016 |
+
|
| 2017 |
+
const response = await ollama.chat({
|
| 2018 |
+
model: 'gpt-oss',
|
| 2019 |
+
messages: [{ role: 'user', content: 'Tell me about Canada.' }],
|
| 2020 |
+
format: 'json'
|
| 2021 |
+
})
|
| 2022 |
+
console.log(response.message.content)
|
| 2023 |
+
```
|
| 2024 |
+
</Tab>
|
| 2025 |
+
</Tabs>
|
| 2026 |
+
> ## Documentation Index
|
| 2027 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 2028 |
+
> Use this file to discover all available pages before exploring further.
|
| 2029 |
+
|
| 2030 |
+
# Docker
|
| 2031 |
+
|
| 2032 |
+
## CPU only
|
| 2033 |
+
|
| 2034 |
+
```shell theme={"system"}
|
| 2035 |
+
docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
| 2036 |
+
``
|
| 2037 |
+
|
| 2038 |
+
## Nvidia GPU
|
| 2039 |
+
|
| 2040 |
+
Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
|
| 2041 |
+
|
| 2042 |
+
### Install with Apt
|
| 2043 |
+
|
| 2044 |
+
1. Configure the repository
|
| 2045 |
+
|
| 2046 |
+
```shell theme={"system"}
|
| 2047 |
+
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
|
| 2048 |
+
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
| 2049 |
+
curl -fsSL https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
|
| 2050 |
+
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
|
| 2051 |
+
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
| 2052 |
+
sudo apt-get update
|
| 2053 |
+
```
|
| 2054 |
+
|
| 2055 |
+
2. Install the NVIDIA Container Toolkit packages
|
| 2056 |
+
|
| 2057 |
+
```shell theme={"system"}
|
| 2058 |
+
sudo apt-get install -y nvidia-container-toolkit
|
| 2059 |
+
```
|
| 2060 |
+
|
| 2061 |
+
### Install with Yum or Dnf
|
| 2062 |
+
|
| 2063 |
+
1. Configure the repository
|
| 2064 |
+
|
| 2065 |
+
```shell theme={"system"}
|
| 2066 |
+
curl -fsSL https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
|
| 2067 |
+
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
| 2068 |
+
```
|
| 2069 |
+
|
| 2070 |
+
2. Install the NVIDIA Container Toolkit packages
|
| 2071 |
+
|
| 2072 |
+
```shell theme={"system"}
|
| 2073 |
+
sudo yum install -y nvidia-container-toolkit
|
| 2074 |
+
```
|
| 2075 |
+
|
| 2076 |
+
### Configure Docker to use Nvidia driver
|
| 2077 |
+
|
| 2078 |
+
```shell theme={"system"}
|
| 2079 |
+
sudo nvidia-ctk runtime configure --runtime=docker
|
| 2080 |
+
sudo systemctl restart docker
|
| 2081 |
+
```
|
| 2082 |
+
|
| 2083 |
+
### Start the container
|
| 2084 |
+
|
| 2085 |
+
```shell theme={"system"}
|
| 2086 |
+
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
| 2087 |
+
``
|
| 2088 |
+
|
| 2089 |
+
<Note>
|
| 2090 |
+
If you're running on an NVIDIA JetPack system, Ollama can't automatically discover the correct JetPack version.
|
| 2091 |
+
Pass the environment variable `JETSON_JETPACK=5` or `JETSON_JETPACK=6` to the container to select version 5 or 6.
|
| 2092 |
+
</Note>
|
| 2093 |
+
|
| 2094 |
+
## AMD GPU
|
| 2095 |
+
|
| 2096 |
+
To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
|
| 2097 |
+
|
| 2098 |
+
```shell theme={"system"}
|
| 2099 |
+
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
|
| 2100 |
+
```
|
| 2101 |
+
|
| 2102 |
+
## Vulkan Support
|
| 2103 |
+
|
| 2104 |
+
Vulkan is bundled into the `ollama/ollama` image.
|
| 2105 |
+
|
| 2106 |
+
```shell theme={"system"}
|
| 2107 |
+
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 -e OLLAMA_VULKAN=1 --name ollama ollama/ollama
|
| 2108 |
+
```
|
| 2109 |
+
|
| 2110 |
+
## Run model locally
|
| 2111 |
+
|
| 2112 |
+
Now you can run a model:
|
| 2113 |
+
|
| 2114 |
+
```shell theme={"system"}
|
| 2115 |
+
docker exec -it ollama ollama run llama3.2
|
| 2116 |
+
```
|
| 2117 |
+
|
| 2118 |
+
## Try different models
|
| 2119 |
+
|
| 2120 |
+
More models can be found on the [Ollama library](https://ollama.com/library).
|
| 2121 |
+
|
| 2122 |
+
## Generating structured JSON with a schema
|
| 2123 |
+
|
| 2124 |
+
Provide a JSON schema to the `format` field.
|
| 2125 |
+
|
| 2126 |
+
<Note>
|
| 2127 |
+
It is ideal to also pass the JSON schema as a string in the prompt to ground the model's response.
|
| 2128 |
+
</Note>
|
| 2129 |
+
|
| 2130 |
+
<Tabs>
|
| 2131 |
+
<Tab title="cURL">
|
| 2132 |
+
```shell theme={"system"}
|
| 2133 |
+
curl -X POST http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
|
| 2134 |
+
"model": "gpt-oss",
|
| 2135 |
+
"messages": [{"role": "user", "content": "Tell me about Canada."}],
|
| 2136 |
+
"stream": false,
|
| 2137 |
+
"format": {
|
| 2138 |
+
"type": "object",
|
| 2139 |
+
"properties": {
|
| 2140 |
+
"name": {"type": "string"},
|
| 2141 |
+
"capital": {"type": "string"},
|
| 2142 |
+
"languages": {
|
| 2143 |
+
"type": "array",
|
| 2144 |
+
"items": {"type": "string"}
|
| 2145 |
+
}
|
| 2146 |
+
},
|
| 2147 |
+
"required": ["name", "capital", "languages"]
|
| 2148 |
+
}
|
| 2149 |
+
}'
|
| 2150 |
+
```
|
| 2151 |
+
</Tab>
|
| 2152 |
+
|
| 2153 |
+
<Tab title="Python">
|
| 2154 |
+
Use Pydantic models and pass `model_json_schema()` to `format`, then validate the response:
|
| 2155 |
+
|
| 2156 |
+
```python theme={"system"}
|
| 2157 |
+
from ollama import chat
|
| 2158 |
+
from pydantic import BaseModel
|
| 2159 |
+
|
| 2160 |
+
class Country(BaseModel):
|
| 2161 |
+
name: str
|
| 2162 |
+
capital: str
|
| 2163 |
+
languages: list[str]
|
| 2164 |
+
|
| 2165 |
+
response = chat(
|
| 2166 |
+
model='gpt-oss',
|
| 2167 |
+
messages=[{'role': 'user', 'content': 'Tell me about Canada.'}],
|
| 2168 |
+
format=Country.model_json_schema(),
|
| 2169 |
+
)
|
| 2170 |
+
|
| 2171 |
+
country = Country.model_validate_json(response.message.content)
|
| 2172 |
+
print(country)
|
| 2173 |
+
```
|
| 2174 |
+
</Tab>
|
| 2175 |
+
|
| 2176 |
+
<Tab title="JavaScript">
|
| 2177 |
+
Serialize a Zod schema with `zodToJsonSchema()` and parse the structured response:
|
| 2178 |
+
|
| 2179 |
+
```javascript theme={"system"}
|
| 2180 |
+
import ollama from 'ollama'
|
| 2181 |
+
import { z } from 'zod'
|
| 2182 |
+
import { zodToJsonSchema } from 'zod-to-json-schema'
|
| 2183 |
+
|
| 2184 |
+
const Country = z.object({
|
| 2185 |
+
name: z.string(),
|
| 2186 |
+
capital: z.string(),
|
| 2187 |
+
languages: z.array(z.string()),
|
| 2188 |
+
})
|
| 2189 |
+
|
| 2190 |
+
const response = await ollama.chat({
|
| 2191 |
+
model: 'gpt-oss',
|
| 2192 |
+
messages: [{ role: 'user', content: 'Tell me about Canada.' }],
|
| 2193 |
+
format: zodToJsonSchema(Country),
|
| 2194 |
+
})
|
| 2195 |
+
|
| 2196 |
+
const country = Country.parse(JSON.parse(response.message.content))
|
| 2197 |
+
console.log(country)
|
| 2198 |
+
``
|
| 2199 |
+
</Tab>
|
| 2200 |
+
</Tabs>
|
| 2201 |
+
|
| 2202 |
+
## Example: Extract structured data
|
| 2203 |
+
|
| 2204 |
+
Define the objects you want returned and let the model populate the fields:
|
| 2205 |
+
|
| 2206 |
+
```python theme={"system"}
|
| 2207 |
+
from ollama import chat
|
| 2208 |
+
from pydantic import BaseModel
|
| 2209 |
+
|
| 2210 |
+
class Pet(BaseModel):
|
| 2211 |
+
name: str
|
| 2212 |
+
animal: str
|
| 2213 |
+
age: int
|
| 2214 |
+
color: str | None
|
| 2215 |
+
favorite_toy: str | None
|
| 2216 |
+
|
| 2217 |
+
class PetList(BaseModel):
|
| 2218 |
+
pets: list[Pet]
|
| 2219 |
+
|
| 2220 |
+
response = chat(
|
| 2221 |
+
model='gpt-oss',
|
| 2222 |
+
messages=[{'role': 'user', 'content': 'I have two cats named Luna and Loki...'}],
|
| 2223 |
+
format=PetList.model_json_schema(),
|
| 2224 |
+
)
|
| 2225 |
+
|
| 2226 |
+
pets = PetList.model_validate_json(response.message.content)
|
| 2227 |
+
print(pets)
|
| 2228 |
+
```
|
| 2229 |
+
|
| 2230 |
+
## Example: Vision with structured outputs
|
| 2231 |
+
|
| 2232 |
+
Vision models accept the same `format` parameter, enabling deterministic descriptions of images:
|
| 2233 |
+
|
| 2234 |
+
```python theme={"system"}
|
| 2235 |
+
from ollama import chat
|
| 2236 |
+
from pydantic import BaseModel
|
| 2237 |
+
from typing import Literal, Optional
|
| 2238 |
+
|
| 2239 |
+
class Object(BaseModel):
|
| 2240 |
+
name: str
|
| 2241 |
+
confidence: float
|
| 2242 |
+
attributes: str
|
| 2243 |
+
|
| 2244 |
+
class ImageDescription(BaseModel):
|
| 2245 |
+
summary: str
|
| 2246 |
+
objects: list[Object]
|
| 2247 |
+
scene: str
|
| 2248 |
+
colors: list[str]
|
| 2249 |
+
time_of_day: Literal['Morning', 'Afternoon', 'Evening', 'Night']
|
| 2250 |
+
setting: Literal['Indoor', 'Outdoor', 'Unknown']
|
| 2251 |
+
text_content: Optional[str] = None
|
| 2252 |
+
|
| 2253 |
+
response = chat(
|
| 2254 |
+
model='gemma3',
|
| 2255 |
+
messages=[{
|
| 2256 |
+
'role': 'user',
|
| 2257 |
+
'content': 'Describe this photo and list the objects you detect.',
|
| 2258 |
+
'images': ['path/to/image.jpg'],
|
| 2259 |
+
}],
|
| 2260 |
+
format=ImageDescription.model_json_schema(),
|
| 2261 |
+
options={'temperature': 0},
|
| 2262 |
+
)
|
| 2263 |
+
|
| 2264 |
+
image_description = ImageDescription.model_validate_json(response.message.content)
|
| 2265 |
+
print(image_description)
|
| 2266 |
+
``
|
| 2267 |
+
|
| 2268 |
+
## Tips for reliable structured outputs
|
| 2269 |
+
|
| 2270 |
+
* Define schemas with Pydantic (Python) or Zod (JavaScript) so they can be reused for validation.
|
| 2271 |
+
* Lower the temperature (e.g., set it to `0`) for more deterministic completions.
|
| 2272 |
+
* Structured outputs work through the OpenAI-compatible API via `response_format`
|
| 2273 |
+
> ## Documentation Index
|
| 2274 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 2275 |
+
> Use this file to discover all available pages before exploring further.
|
| 2276 |
+
|
| 2277 |
+
# Quickstart
|
| 2278 |
+
|
| 2279 |
+
Ollama is available on macOS, Windows, and Linux.
|
| 2280 |
+
|
| 2281 |
+
<a href="https://ollama.com/download" target="_blank" className="inline-block px-6 py-2 bg-black rounded-full dark:bg-neutral-700 text-white font-normal border-none">
|
| 2282 |
+
Download Ollama
|
| 2283 |
+
</a>
|
| 2284 |
+
|
| 2285 |
+
## Get Started
|
| 2286 |
+
|
| 2287 |
+
Run `ollama` in your terminal to open the interactive menu:
|
| 2288 |
+
|
| 2289 |
+
```sh theme={"system"}
|
| 2290 |
+
ollama
|
| 2291 |
+
```
|
| 2292 |
+
|
| 2293 |
+
Navigate with `↑/↓`, press `enter` to launch, `→` to change model, and `esc` to quit.
|
| 2294 |
+
|
| 2295 |
+
The menu provides quick access to:
|
| 2296 |
+
|
| 2297 |
+
* **Run a model** - Start an interactive chat
|
| 2298 |
+
* **Launch tools** - Claude Code, Codex, OpenClaw, and more
|
| 2299 |
+
* **Additional integrations** - Available under "More..."
|
| 2300 |
+
|
| 2301 |
+
## Assistants
|
| 2302 |
+
|
| 2303 |
+
Launch [OpenClaw](/integrations/openclaw), a personal AI with 100+ skills:
|
| 2304 |
+
|
| 2305 |
+
```sh theme={"system"}
|
| 2306 |
+
ollama launch openclaw
|
| 2307 |
+
```
|
| 2308 |
+
|
| 2309 |
+
## Coding
|
| 2310 |
+
|
| 2311 |
+
Launch [Claude Code](/integrations/claude-code) and other coding tools with Ollama models:
|
| 2312 |
+
|
| 2313 |
+
```sh theme={"system"}
|
| 2314 |
+
ollama launch claude
|
| 2315 |
+
```
|
| 2316 |
+
|
| 2317 |
+
```sh theme={"system"}
|
| 2318 |
+
ollama launch codex
|
| 2319 |
+
```
|
| 2320 |
+
|
| 2321 |
+
```sh theme={"system"}
|
| 2322 |
+
ollama launch opencode
|
| 2323 |
+
```
|
| 2324 |
+
|
| 2325 |
+
See [integrations](/integrations) for all supported tools.
|
| 2326 |
+
|
| 2327 |
+
## API
|
| 2328 |
+
|
| 2329 |
+
Use the [API](/api) to integrate Ollama into your applications:
|
| 2330 |
+
|
| 2331 |
+
```sh theme={"system"}
|
| 2332 |
+
curl http://localhost:11434/api/chat -d '{
|
| 2333 |
+
"model": "gemma3",
|
| 2334 |
+
"messages": [{ "role": "user", "content": "Hello!" }]
|
| 2335 |
+
}'
|
| 2336 |
+
```
|
| 2337 |
+
|
| 2338 |
+
See the [API documentation](/api) for Python, JavaScript, and other integrations.
|
| 2339 |
+
|
| 2340 |
+
> ## Documentation Index
|
| 2341 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 2342 |
+
> Use this file to discover all available pages before exploring further.
|
| 2343 |
+
|
| 2344 |
+
# Ollama's documentation
|
| 2345 |
+
|
| 2346 |
+
<img src="https://mintcdn.com/ollama-9269c548/w-L7kuDqk3_8zi5c/images/welcome.png?fit=max&auto=format&n=w-L7kuDqk3_8zi5c&q=85&s=914368bbe8709d04481a8a478b66cf8c" noZoom className="rounded-3xl" width="2048" height="1024" data-path="images/welcome.png" />
|
| 2347 |
+
|
| 2348 |
+
[Ollama](https://ollama.com) is the easiest way to get up and running with large language models such as gpt-oss, Gemma 3, DeepSeek-R1, Qwen3 and more.
|
| 2349 |
+
|
| 2350 |
+
<CardGroup cols={2}>
|
| 2351 |
+
<Card title="Quickstart" icon="rocket" href="/quickstart">
|
| 2352 |
+
Get up and running with your first model or integrate Ollama with your favorite tools
|
| 2353 |
+
</Card>
|
| 2354 |
+
|
| 2355 |
+
<Card title="Download Ollama" icon="download" href="https://ollama.com/download">
|
| 2356 |
+
Download Ollama on macOS, Windows or Linux
|
| 2357 |
+
</Card>
|
| 2358 |
+
|
| 2359 |
+
<Card title="Cloud" icon="cloud" href="/cloud">
|
| 2360 |
+
Ollama's cloud models offer larger models with better performance.
|
| 2361 |
+
</Card>
|
| 2362 |
+
|
| 2363 |
+
<Card title="API reference" icon="terminal" href="/api">
|
| 2364 |
+
View Ollama's API reference
|
| 2365 |
+
</Card>
|
| 2366 |
+
</CardGroup>
|
| 2367 |
+
|
| 2368 |
+
## Libraries
|
| 2369 |
+
|
| 2370 |
+
<CardGroup cols={2}>
|
| 2371 |
+
<Card title="Ollama's Python Library" icon="python" href="https://github.com/ollama/ollama-python">
|
| 2372 |
+
The official library for using Ollama with Python
|
| 2373 |
+
</Card>
|
| 2374 |
+
|
| 2375 |
+
<Card title="Ollama's JavaScript library" icon="js" href="https://github.com/ollama/ollama-js">
|
| 2376 |
+
The official library for using Ollama with JavaScript or TypeScript.
|
| 2377 |
+
</Card>
|
| 2378 |
+
|
| 2379 |
+
<Card title="Community libraries" icon="github" href="https://github.com/ollama/ollama?tab=readme-ov-file#libraries-1">
|
| 2380 |
+
View a list of 20+ community-supported libraries for Ollama
|
| 2381 |
+
</Card>
|
| 2382 |
+
</CardGroup>
|
| 2383 |
+
|
| 2384 |
+
## Community
|
| 2385 |
+
|
| 2386 |
+
<CardGroup cols={2}>
|
| 2387 |
+
<Card title="Discord" icon="discord" href="https://discord.gg/ollama">
|
| 2388 |
+
Join our Discord community
|
| 2389 |
+
</Card>
|
| 2390 |
+
|
| 2391 |
+
<Card title="Reddit" icon="reddit" href="https://reddit.com/r/ollama">
|
| 2392 |
+
Join our Reddit community
|
| 2393 |
+
</Card>
|
| 2394 |
+
</CardGroup>
|
| 2395 |
+
> ## Documentation Index
|
| 2396 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 2397 |
+
> Use this file to discover all available pages before exploring further.
|
| 2398 |
+
|
| 2399 |
+
# Importing a Model
|
| 2400 |
+
|
| 2401 |
+
## Table of Contents
|
| 2402 |
+
|
| 2403 |
+
* [Importing a Safetensors adapter](#Importing-a-fine-tuned-adapter-from-Safetensors-weights)
|
| 2404 |
+
* [Importing a Safetensors model](#Importing-a-model-from-Safetensors-weights)
|
| 2405 |
+
* [Importing a GGUF file](#Importing-a-GGUF-based-model-or-adapter)
|
| 2406 |
+
* [Sharing models on ollama.com](#Sharing-your-model-on-ollamacom)
|
| 2407 |
+
|
| 2408 |
+
## Importing a fine tuned adapter from Safetensors weights
|
| 2409 |
+
|
| 2410 |
+
First, create a `Modelfile` with a `FROM` command pointing at the base model you used for fine tuning, and an `ADAPTER` command which points to the directory with your Safetensors adapter:
|
| 2411 |
+
|
| 2412 |
+
```dockerfile theme={"system"}
|
| 2413 |
+
FROM <base model name>
|
| 2414 |
+
ADAPTER /path/to/safetensors/adapter/directory
|
| 2415 |
+
```
|
| 2416 |
+
|
| 2417 |
+
Make sure that you use the same base model in the `FROM` command as you used to create the adapter otherwise you will get erratic results. Most frameworks use different quantization methods, so it's best to use non-quantized (i.e. non-QLoRA) adapters. If your adapter is in the same directory as your `Modelfile`, use `ADAPTER .` to specify the adapter path.
|
| 2418 |
+
|
| 2419 |
+
Now run `ollama create` from the directory where the `Modelfile` was created:
|
| 2420 |
+
|
| 2421 |
+
```shell theme={"system"}
|
| 2422 |
+
ollama create my-model
|
| 2423 |
+
```
|
| 2424 |
+
|
| 2425 |
+
Lastly, test the model:
|
| 2426 |
+
|
| 2427 |
+
```shell theme={"system"}
|
| 2428 |
+
ollama run my-model
|
| 2429 |
+
```
|
| 2430 |
+
|
| 2431 |
+
Ollama supports importing adapters based on several different model architectures including:
|
| 2432 |
+
|
| 2433 |
+
* Llama (including Llama 2, Llama 3, Llama 3.1, and Llama 3.2);
|
| 2434 |
+
* Mistral (including Mistral 1, Mistral 2, and Mixtral); and
|
| 2435 |
+
* Gemma (including Gemma 1 and Gemma 2)
|
| 2436 |
+
|
| 2437 |
+
You can create the adapter using a fine tuning framework or tool which can output adapters in the Safetensors format, such as:
|
| 2438 |
+
|
| 2439 |
+
* Hugging Face [fine tuning framework](https://huggingface.co/docs/transformers/en/training)
|
| 2440 |
+
* [Unsloth](https://github.com/unslothai/unsloth)
|
| 2441 |
+
* [MLX](https://github.com/ml-explore/mlx)
|
| 2442 |
+
|
| 2443 |
+
## Importing a model from Safetensors weights
|
| 2444 |
+
|
| 2445 |
+
First, create a `Modelfile` with a `FROM` command which points to the directory containing your Safetensors weights:
|
| 2446 |
+
|
| 2447 |
+
```dockerfile theme={"system"}
|
| 2448 |
+
FROM /path/to/safetensors/directory
|
| 2449 |
+
```
|
| 2450 |
+
|
| 2451 |
+
If you create the Modelfile in the same directory as the weights, you can use the command `FROM .`.
|
| 2452 |
+
|
| 2453 |
+
Now run the `ollama create` command from the directory where you created the `Modelfile`:
|
| 2454 |
+
|
| 2455 |
+
```shell theme={"system"}
|
| 2456 |
+
ollama create my-model
|
| 2457 |
+
```
|
| 2458 |
+
|
| 2459 |
+
Lastly, test the model:
|
| 2460 |
+
|
| 2461 |
+
```shell theme={"system"}
|
| 2462 |
+
ollama run my-model
|
| 2463 |
+
```
|
| 2464 |
+
|
| 2465 |
+
Ollama supports importing models for several different architectures including:
|
| 2466 |
+
|
| 2467 |
+
* Llama (including Llama 2, Llama 3, Llama 3.1, and Llama 3.2);
|
| 2468 |
+
* Mistral (including Mistral 1, Mistral 2, and Mixtral);
|
| 2469 |
+
* Gemma (including Gemma 1 and Gemma 2); and
|
| 2470 |
+
* Phi3
|
| 2471 |
+
|
| 2472 |
+
This includes importing foundation models as well as any fine tuned models which have been *fused* with a foundation model.
|
| 2473 |
+
|
| 2474 |
+
## Importing a GGUF based model or adapter
|
| 2475 |
+
|
| 2476 |
+
If you have a GGUF based model or adapter it is possible to import it into Ollama. You can obtain a GGUF model or adapter by:
|
| 2477 |
+
|
| 2478 |
+
* converting a Safetensors model with the `convert_hf_to_gguf.py` from Llama.cpp;
|
| 2479 |
+
* converting a Safetensors adapter with the `convert_lora_to_gguf.py` from Llama.cpp; or
|
| 2480 |
+
* downloading a model or adapter from a place such as HuggingFace
|
| 2481 |
+
|
| 2482 |
+
To import a GGUF model, create a `Modelfile` containing:
|
| 2483 |
+
|
| 2484 |
+
```dockerfile theme={"system"}
|
| 2485 |
+
FROM /path/to/file.gguf
|
| 2486 |
+
```
|
| 2487 |
+
|
| 2488 |
+
For a GGUF adapter, create the `Modelfile` with:
|
| 2489 |
+
|
| 2490 |
+
```dockerfile theme={"system"}
|
| 2491 |
+
FROM <model name>
|
| 2492 |
+
ADAPTER /path/to/file.gguf
|
| 2493 |
+
```
|
| 2494 |
+
|
| 2495 |
+
When importing a GGUF adapter, it's important to use the same base model as the base model that the adapter was created with. You can use:
|
| 2496 |
+
|
| 2497 |
+
* a model from Ollama
|
| 2498 |
+
* a GGUF file
|
| 2499 |
+
* a Safetensors based model
|
| 2500 |
+
|
| 2501 |
+
Once you have created your `Modelfile`, use the `ollama create` command to build the model.
|
| 2502 |
+
|
| 2503 |
+
```shell theme={"system"}
|
| 2504 |
+
ollama create my-model
|
| 2505 |
+
```
|
| 2506 |
+
|
| 2507 |
+
## Quantizing a Model
|
| 2508 |
+
|
| 2509 |
+
Quantizing a model allows you to run models faster and with less memory consumption but at reduced accuracy. This allows you to run a model on more modest hardware.
|
| 2510 |
+
|
| 2511 |
+
Ollama can quantize FP16 and FP32 based models into different quantization levels using the `-q/--quantize` flag with the `ollama create` command.
|
| 2512 |
+
|
| 2513 |
+
First, create a Modelfile with the FP16 or FP32 based model you wish to quantize.
|
| 2514 |
+
|
| 2515 |
+
```dockerfile theme={"system"}
|
| 2516 |
+
FROM /path/to/my/gemma/f16/model
|
| 2517 |
+
```
|
| 2518 |
+
|
| 2519 |
+
Use `ollama create` to then create the quantized model.
|
| 2520 |
+
|
| 2521 |
+
```shell theme={"system"}
|
| 2522 |
+
$ ollama create --quantize q4_K_M mymodel
|
| 2523 |
+
transferring model data
|
| 2524 |
+
quantizing F16 model to Q4_K_M
|
| 2525 |
+
creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
|
| 2526 |
+
creating new layer sha256:0853f0ad24e5865173bbf9ffcc7b0f5d56b66fd690ab1009867e45e7d2c4db0f
|
| 2527 |
+
writing manifest
|
| 2528 |
+
success
|
| 2529 |
+
```
|
| 2530 |
+
|
| 2531 |
+
### Supported Quantizations
|
| 2532 |
+
|
| 2533 |
+
* `q8_0`
|
| 2534 |
+
|
| 2535 |
+
#### K-means Quantizations
|
| 2536 |
+
|
| 2537 |
+
* `q4_K_S`
|
| 2538 |
+
* `q4_K_M`
|
| 2539 |
+
|
| 2540 |
+
```cli
|
| 2541 |
+
ollama pull llama3.2
|
| 2542 |
+
echo "FROM llama3.2" >> Modelfile
|
| 2543 |
+
echo "SYSTEM You are a friendly assistant." >> Modelfile
|
| 2544 |
+
ollama create -f Modelfile lmlm/Lmkm
|
| 2545 |
+
ollama push lmlm/Lmkm
|
| 2546 |
+
```
|
| 2547 |
+
## Sharing your model on ollama.com
|
| 2548 |
+
|
| 2549 |
+
You can share any model you have created by pushing it to [ollama.com](https://ollama.com) so that other users can try it out.
|
| 2550 |
+
|
| 2551 |
+
First, use your browser to go to the [Ollama Sign-Up](https://ollama.com/signup) page. If you already have an account, you can skip this step.
|
| 2552 |
+
|
| 2553 |
+
<img src="https://mintcdn.com/ollama-9269c548/uieua2DvLKVQ74Ga/images/signup.png?fit=max&auto=format&n=uieua2DvLKVQ74Ga&q=85&s=d99f1340e6cfd85d36d49a444491cc63" alt="Sign-Up" width="40%" data-path="images/signup.png" />
|
| 2554 |
+
|
| 2555 |
+
The `Username` field will be used as part of your model's name (e.g. `jmorganca/mymodel`), so make sure you are comfortable with the username that you have selected.
|
| 2556 |
+
|
| 2557 |
+
Now that you have created an account and are signed-in, go to the [Ollama Keys Settings](https://ollama.com/settings/keys) page.
|
| 2558 |
+
|
| 2559 |
+
Follow the directions on the page to determine where your Ollama Public Key is located.
|
| 2560 |
+
|
| 2561 |
+
<img src="https://mintcdn.com/ollama-9269c548/uieua2DvLKVQ74Ga/images/ollama-keys.png?fit=max&auto=format&n=uieua2DvLKVQ74Ga&q=85&s=7ced4d97ecf6b115219f929a4914205e" alt="Ollama Keys" width="80%" data-path="images/ollama-keys.png" />
|
| 2562 |
+
|
| 2563 |
+
Click on the `Add Ollama Public Key` button, and copy and paste the contents of your Ollama Public Key into the text field.
|
| 2564 |
+
|
| 2565 |
+
To push a model to [ollama.com](https://ollama.com), first make sure that it is named correctly with your username. You may have to use the `ollama cp` command to copy
|
| 2566 |
+
your model to give it the correct name. Once you're happy with your model's name, use the `ollama push` command to push it to [ollama.com](https://ollama.com).
|
| 2567 |
+
|
| 2568 |
+
```shell theme={"system"}
|
| 2569 |
+
ollama cp mymodel myuser/mymodel
|
| 2570 |
+
ollama push myuser/mymodel
|
| 2571 |
+
```
|
| 2572 |
+
|
| 2573 |
+
Once your model has been pushed, other users can pull and run it by using the command:
|
| 2574 |
+
|
| 2575 |
+
```shell theme={"system"}
|
| 2576 |
+
ollama run myuser/mymodel
|
| 2577 |
+
```
|
| 2578 |
+
> ## Documentation Index
|
| 2579 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 2580 |
+
> Use this file to discover all available pages before exploring further.
|
| 2581 |
+
|
| 2582 |
+
# Overview
|
| 2583 |
+
|
| 2584 |
+
Ollama integrates with a wide range of tools.
|
| 2585 |
+
|
| 2586 |
+
## Coding Agents
|
| 2587 |
+
|
| 2588 |
+
Coding assistants that can read, modify, and execute code in your projects.
|
| 2589 |
+
|
| 2590 |
+
* [Claude Code](/integrations/claude-code)
|
| 2591 |
+
* [Codex](/integrations/codex)
|
| 2592 |
+
* [OpenCode](/integrations/opencode)
|
| 2593 |
+
* [Droid](/integrations/droid)
|
| 2594 |
+
* [Goose](/integrations/goose)
|
| 2595 |
+
* [Pi](/integrations/pi)
|
| 2596 |
+
|
| 2597 |
+
## Assistants
|
| 2598 |
+
|
| 2599 |
+
AI assistants that help with everyday tasks.
|
| 2600 |
+
|
| 2601 |
+
* [OpenClaw](/integrations/openclaw)
|
| 2602 |
+
|
| 2603 |
+
## IDEs & Editors
|
| 2604 |
+
|
| 2605 |
+
Native integrations for popular development environments.
|
| 2606 |
+
|
| 2607 |
+
* [VS Code](/integrations/vscode)
|
| 2608 |
+
* [Cline](/integrations/cline)
|
| 2609 |
+
* [Roo Code](/integrations/roo-code)
|
| 2610 |
+
* [JetBrains](/integrations/jetbrains)
|
| 2611 |
+
* [Xcode](/integrations/xcode)
|
| 2612 |
+
* [Zed](/integrations/zed)
|
| 2613 |
+
|
| 2614 |
+
## Chat & RAG
|
| 2615 |
+
|
| 2616 |
+
Chat interfaces and retrieval-augmented generation platforms.
|
| 2617 |
+
|
| 2618 |
+
* [Onyx](/integrations/onyx)
|
| 2619 |
+
|
| 2620 |
+
```curl
|
| 2621 |
+
curl http://localhost:11434/api/chat \
|
| 2622 |
+
-d '{
|
| 2623 |
+
"model": "lmlm/Lmkm",
|
| 2624 |
+
"messages": [{"role": "user", "content": "Hello!"}]
|
| 2625 |
+
}'
|
| 2626 |
+
```
|
| 2627 |
+
## Automation
|
| 2628 |
+
|
| 2629 |
+
Workflow automation platforms with AI integration.
|
| 2630 |
+
|
| 2631 |
+
* [n8n](/integrations/n8n)
|
| 2632 |
+
|
| 2633 |
+
## Notebooks
|
| 2634 |
+
|
| 2635 |
+
Interactive computing environments with AI capabilities.
|
| 2636 |
+
|
| 2637 |
+
* [marimo](/integrations/marimo)
|
| 2638 |
+
|
| 2639 |
+
* > ## Documentation Index
|
| 2640 |
+
> Fetch the complete documentation index at: https://docs.ollama.com/llms.txt
|
| 2641 |
+
> Use this file to discover all available pages before exploring further.
|
| 2642 |
+
|
| 2643 |
+
# Droid
|
| 2644 |
+
|
| 2645 |
+
## Install
|
| 2646 |
+
|
| 2647 |
+
Install the [Droid CLI](https://factory.ai/):
|
| 2648 |
+
|
| 2649 |
+
```bash theme={"system"}
|
| 2650 |
+
curl -fsSL https://app.factory.ai/cli | sh
|
| 2651 |
+
```
|
| 2652 |
+
|
| 2653 |
+
<Note>Droid requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
|
| 2654 |
+
|
| 2655 |
+
## Usage with Ollama
|
| 2656 |
+
|
| 2657 |
+
### Quick setup
|
| 2658 |
+
|
| 2659 |
+
```bash theme={"system"}
|
| 2660 |
+
ollama launch droid
|
| 2661 |
+
ollama cp llama3.2 lmlm/Lmkm
|
| 2662 |
+
ollama push lmlm/Lmkm
|
| 2663 |
+
```
|
| 2664 |
+
|
| 2665 |
+
To configure without launching:
|
| 2666 |
+
|
| 2667 |
+
```shell theme={"system"}
|
| 2668 |
+
ollama launch droid --config
|
| 2669 |
+
```
|
| 2670 |
+
|
| 2671 |
+
### Manual setup
|
| 2672 |
+
|
| 2673 |
+
Add a local configuration block to `~/.factory/config.json`:
|
| 2674 |
+
|
| 2675 |
+
```json theme={"system"}
|
| 2676 |
+
{
|
| 2677 |
+
"custom_models": [
|
| 2678 |
+
{
|
| 2679 |
+
"model_display_name": "qwen3-coder [Ollama]",
|
| 2680 |
+
"model": "qwen3-coder",
|
| 2681 |
+
"base_url": "http://localhost:11434/v1/",
|
| 2682 |
+
"api_key": "not-needed",
|
| 2683 |
+
"provider": "generic-chat-completion-api",
|
| 2684 |
+
"max_tokens": 32000
|
| 2685 |
+
}
|
| 2686 |
+
]
|
| 2687 |
+
}
|
| 2688 |
+
```
|
| 2689 |
+
|
| 2690 |
+
## Cloud Models
|
| 2691 |
+
|
| 2692 |
+
`qwen3-coder:480b-cloud` is the recommended model for use with Droid.
|
| 2693 |
+
|
| 2694 |
+
Add the cloud configuration block to `~/.factory/config.json`:
|
| 2695 |
+
|
| 2696 |
+
```json theme={"system"}
|
| 2697 |
+
{
|
| 2698 |
+
"custom_models": [
|
| 2699 |
+
{
|
| 2700 |
+
"model_display_name": "qwen3-coder [Ollama Cloud]",
|
| 2701 |
+
"model": "qwen3-coder:480b-cloud",
|
| 2702 |
+
"base_url": "http://localhost:11434/v1/",
|
| 2703 |
+
"api_key": "not-needed",
|
| 2704 |
+
"provider": "generic-chat-completion-api",
|
| 2705 |
+
"max_tokens": 128000
|
| 2706 |
+
}
|
| 2707 |
+
]
|
| 2708 |
+
}
|
| 2709 |
+
```
|
| 2710 |
+
|
| 2711 |
+
## Connecting to ollama.com
|
| 2712 |
+
|
| 2713 |
+
1. Create an [API key](https://ollama.com/settings/keys) from ollama.com and export it as `OLLAMA_API_KEY`.
|
| 2714 |
+
2. Add the cloud configuration block to `~/.factory/config.json`:
|
| 2715 |
+
|
| 2716 |
+
```json theme={"system"}
|
| 2717 |
+
{
|
| 2718 |
+
"custom_models": [
|
| 2719 |
+
{
|
| 2720 |
+
"model_display_name": "qwen3-coder [Ollama Cloud]",
|
| 2721 |
+
"model": "qwen3-coder:480b",
|
| 2722 |
+
"base_url": "https://ollama.com/v1/",
|
| 2723 |
+
"api_key": "OLLAMA_API_KEY",
|
| 2724 |
+
"provider": "generic-chat-completion-api",
|
| 2725 |
+
"max_tokens": 128000
|
| 2726 |
+
}
|
| 2727 |
+
]
|
| 2728 |
+
}
|
| 2729 |
+
```
|
| 2730 |
+
|
| 2731 |
+
Run `droid` in a new terminal to load the new settings.
|
| 2732 |
+
|
Lmlm.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from ..base import BasePatch
|
| 3 |
+
from .base import BaseHQQHFModel
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
|
| 6 |
+
# Patch LLama functions
|
| 7 |
+
class LLamaPatch(BasePatch):
|
| 8 |
+
# These tags are used to specify the parameters of each layer type. For example, if you want to give different quantization parameters to different layers
|
| 9 |
+
@classmethod
|
| 10 |
+
def get_linear_tags(cls):
|
| 11 |
+
return [
|
| 12 |
+
"self_attn.q_proj",
|
| 13 |
+
"self_attn.k_proj",
|
| 14 |
+
"self_attn.v_proj",
|
| 15 |
+
"self_attn.o_proj",
|
| 16 |
+
"mlp.gate_proj",
|
| 17 |
+
"mlp.up_proj",
|
| 18 |
+
"mlp.down_proj",
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
@classmethod
|
| 22 |
+
def patch_nonlinearlayers(cls, model, patch_fct, verbose=True):
|
| 23 |
+
base_model = model.model
|
| 24 |
+
model.lm_head = patch_fct(model.lm_head)
|
| 25 |
+
base_model.embed_tokens = patch_fct(base_model.embed_tokens)
|
| 26 |
+
base_model.norm = patch_fct(base_model.norm)
|
| 27 |
+
|
| 28 |
+
layers = base_model.layers
|
| 29 |
+
for i in tqdm(range(len(base_model.layers)), disable=not verbose):
|
| 30 |
+
layers[i].self_attn.rotary_emb = patch_fct(layers[i].self_attn.rotary_emb)
|
| 31 |
+
layers[i].mlp.act_fn = patch_fct(layers[i].mlp.act_fn)
|
| 32 |
+
layers[i].input_layernorm = patch_fct(layers[i].input_layernorm)
|
| 33 |
+
layers[i].post_attention_layernorm = patch_fct(
|
| 34 |
+
layers[i].post_attention_layernorm
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
@classmethod
|
| 38 |
+
def patch_linearlayers(cls, model, patch_fct, patch_params, verbose=True):
|
| 39 |
+
base_model = model.model
|
| 40 |
+
layers = base_model.layers
|
| 41 |
+
for i in tqdm(range(len(layers)), disable=not verbose):
|
| 42 |
+
layers[i].self_attn.q_proj = patch_fct(
|
| 43 |
+
layers[i].self_attn.q_proj, patch_params["self_attn.q_proj"]
|
| 44 |
+
)
|
| 45 |
+
layers[i].self_attn.k_proj = patch_fct(
|
| 46 |
+
layers[i].self_attn.k_proj, patch_params["self_attn.k_proj"]
|
| 47 |
+
)
|
| 48 |
+
layers[i].self_attn.v_proj = patch_fct(
|
| 49 |
+
layers[i].self_attn.v_proj, patch_params["self_attn.v_proj"]
|
| 50 |
+
)
|
| 51 |
+
layers[i].self_attn.o_proj = patch_fct(
|
| 52 |
+
layers[i].self_attn.o_proj, patch_params["self_attn.o_proj"]
|
| 53 |
+
)
|
| 54 |
+
layers[i].mlp.gate_proj = patch_fct(
|
| 55 |
+
layers[i].mlp.gate_proj, patch_params["mlp.gate_proj"]
|
| 56 |
+
)
|
| 57 |
+
layers[i].mlp.up_proj = patch_fct(
|
| 58 |
+
layers[i].mlp.up_proj, patch_params["mlp.up_proj"]
|
| 59 |
+
)
|
| 60 |
+
layers[i].mlp.down_proj = patch_fct(
|
| 61 |
+
layers[i].mlp.down_proj, patch_params["mlp.down_proj"]
|
| 62 |
+
)
|
VoiceAuthenticationModel.pyx
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import CoreML
|
| 2 |
+
|
| 3 |
+
class VoiceAuthentication {
|
| 4 |
+
private var model: MLModel?
|
| 5 |
+
|
| 6 |
+
init() {
|
| 7 |
+
loadModel()
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
private func loadModel() {
|
| 11 |
+
do {
|
| 12 |
+
let modelURL = Bundle.main.url(forResource: "VoiceAuthenticationModel", withExtension: "mlmodelc")
|
| 13 |
+
model = try MLModel(contentsOf: modelURL!)
|
| 14 |
+
} catch {
|
| 15 |
+
print("Failed to load VoiceAuthenticationModel: \(error.localizedDescription)")
|
| 16 |
+
}
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
func verifyVoice(audioFeatures: [Float]) -> Bool {
|
| 20 |
+
guard let model = model else {
|
| 21 |
+
print("Model not loaded.")
|
| 22 |
+
return false
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
do {
|
| 26 |
+
let input = try MLDictionaryFeatureProvider(dictionary: ["audio_input": MLMultiArray(audioFeatures)])
|
| 27 |
+
let output = try model.prediction(from: input)
|
| 28 |
+
if let isUser = output.featureValue(for: "isUser")?.boolValue {
|
| 29 |
+
return isUser
|
| 30 |
+
}
|
| 31 |
+
} catch {
|
| 32 |
+
print("Voice verification failed: \(error.localizedDescription)")
|
| 33 |
+
}
|
| 34 |
+
return false
|
| 35 |
+
}
|
| 36 |
+
}
|
fig_LMLM_TOFU_NPO.pdf
ADDED
|
Binary file (29.3 kB). View file
|
|
|
fig_LMLM_delta_loss_distribution.png
ADDED
|
Git LFS Details
|
fig_LMLM_motivation_new.pdf
ADDED
|
Binary file (46.6 kB). View file
|
|
|
fig_LMLM_overview.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:588d6735fa7b90f3656d16be681b86466c75c2b26fdc3ed2fd332e254e2bea60
|
| 3 |
+
size 198012
|
fig_LMLM_perplexity.adoc
ADDED
|
Binary file (20.2 kB). View file
|
|
|
fig_LMLM_prefix_tree.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c10eebd094c4c4296d15f25cfefd0aa943b37253a6296a4d69376347fd9cda9f
|
| 3 |
+
size 408258
|
fig_LMLM_results.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1b0eca2ee5cfd8cb3e42a5f7b11c1efa7b2794aa1249d087efe5d276171edc1
|
| 3 |
+
size 820162
|
lmlm4.cpp
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Linux Media Labs MPEG-4 demuxer
|
| 3 |
+
* Copyright (c) 2008 Ivo van Poorten
|
| 4 |
+
*
|
| 5 |
+
* Due to a lack of sample files, only files with one channel are supported.
|
| 6 |
+
* u-law and ADPCM audio are unsupported for the same reason.
|
| 7 |
+
*
|
| 8 |
+
* This file is part of FFmpeg.
|
| 9 |
+
*
|
| 10 |
+
* FFmpeg is free software; you can redistribute it and/or
|
| 11 |
+
* modify it under the terms of the GNU Lesser General Public
|
| 12 |
+
* License as published by the Free Software Foundation; either
|
| 13 |
+
* version 2.1 of the License, or (at your option) any later version.
|
| 14 |
+
*
|
| 15 |
+
* FFmpeg is distributed in the hope that it will be useful,
|
| 16 |
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 17 |
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 18 |
+
* Lesser General Public License for more details.
|
| 19 |
+
*
|
| 20 |
+
* You should have received a copy of the GNU Lesser General Public
|
| 21 |
+
* License along with FFmpeg; if not, write to the Free Software
|
| 22 |
+
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 23 |
+
*/
|
| 24 |
+
|
| 25 |
+
#include "libavutil/intreadwrite.h"
|
| 26 |
+
|
| 27 |
+
#include "avformat.h"
|
| 28 |
+
#include "demux.h"
|
| 29 |
+
#include "internal.h"
|
| 30 |
+
|
| 31 |
+
#define LMLM4_I_FRAME 0x00
|
| 32 |
+
#define LMLM4_P_FRAME 0x01
|
| 33 |
+
#define LMLM4_B_FRAME 0x02
|
| 34 |
+
#define LMLM4_INVALID 0x03
|
| 35 |
+
#define LMLM4_MPEG1L2 0x04
|
| 36 |
+
|
| 37 |
+
#define LMLM4_MAX_PACKET_SIZE 1024 * 1024
|
| 38 |
+
|
| 39 |
+
static int lmlm4_probe(const AVProbeData *pd)
|
| 40 |
+
{
|
| 41 |
+
const unsigned char *buf = pd->buf;
|
| 42 |
+
unsigned int frame_type, packet_size;
|
| 43 |
+
|
| 44 |
+
frame_type = AV_RB16(buf + 2);
|
| 45 |
+
packet_size = AV_RB32(buf + 4);
|
| 46 |
+
|
| 47 |
+
if (!AV_RB16(buf) && frame_type <= LMLM4_MPEG1L2 && packet_size &&
|
| 48 |
+
frame_type != LMLM4_INVALID && packet_size <= LMLM4_MAX_PACKET_SIZE) {
|
| 49 |
+
if (frame_type == LMLM4_MPEG1L2) {
|
| 50 |
+
if ((AV_RB16(buf + 8) & 0xfffe) != 0xfffc)
|
| 51 |
+
return 0;
|
| 52 |
+
/* I could calculate the audio framesize and compare with
|
| 53 |
+
* packet_size-8, but that seems overkill */
|
| 54 |
+
return AVPROBE_SCORE_MAX / 3;
|
| 55 |
+
} else if (AV_RB24(buf + 8) == 0x000001) { /* PES Signal */
|
| 56 |
+
return AVPROBE_SCORE_MAX / 5;
|
| 57 |
+
}
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
return 0;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
static int lmlm4_read_header(AVFormatContext *s)
|
| 64 |
+
{
|
| 65 |
+
AVStream *st;
|
| 66 |
+
|
| 67 |
+
if (!(st = avformat_new_stream(s, NULL)))
|
| 68 |
+
return AVERROR(ENOMEM);
|
| 69 |
+
st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
|
| 70 |
+
st->codecpar->codec_id = AV_CODEC_ID_MPEG4;
|
| 71 |
+
ffstream(st)->need_parsing = AVSTREAM_PARSE_HEADERS;
|
| 72 |
+
avpriv_set_pts_info(st, 64, 1001, 30000);
|
| 73 |
+
|
| 74 |
+
if (!(st = avformat_new_stream(s, NULL)))
|
| 75 |
+
return AVERROR(ENOMEM);
|
| 76 |
+
st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
|
| 77 |
+
st->codecpar->codec_id = AV_CODEC_ID_MP2;
|
| 78 |
+
ffstream(st)->need_parsing = AVSTREAM_PARSE_HEADERS;
|
| 79 |
+
|
| 80 |
+
/* the parameters will be extracted from the compressed bitstream */
|
| 81 |
+
return 0;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
static int lmlm4_read_packet(AVFormatContext *s, AVPacket *pkt)
|
| 85 |
+
{
|
| 86 |
+
AVIOContext *pb = s->pb;
|
| 87 |
+
int ret;
|
| 88 |
+
unsigned int frame_type, packet_size, padding, frame_size;
|
| 89 |
+
|
| 90 |
+
avio_rb16(pb); /* channel number */
|
| 91 |
+
frame_type = avio_rb16(pb);
|
| 92 |
+
packet_size = avio_rb32(pb);
|
| 93 |
+
padding = -packet_size & 511;
|
| 94 |
+
|
| 95 |
+
if (frame_type > LMLM4_MPEG1L2 || frame_type == LMLM4_INVALID) {
|
| 96 |
+
av_log(s, AV_LOG_ERROR, "invalid or unsupported frame_type\n");
|
| 97 |
+
return AVERROR_INVALIDDATA;
|
| 98 |
+
}
|
| 99 |
+
if (packet_size > LMLM4_MAX_PACKET_SIZE || packet_size<=8) {
|
| 100 |
+
av_log(s, AV_LOG_ERROR, "packet size %d is invalid\n", packet_size);
|
| 101 |
+
return AVERROR_INVALIDDATA;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
frame_size = packet_size - 8;
|
| 105 |
+
if ((ret = av_get_packet(pb, pkt, frame_size)) <= 0)
|
| 106 |
+
return ret < 0 ? ret : AVERROR(EIO);
|
| 107 |
+
|
| 108 |
+
avio_skip(pb, padding);
|
| 109 |
+
|
| 110 |
+
switch (frame_type) {
|
| 111 |
+
case LMLM4_I_FRAME:
|
| 112 |
+
pkt->flags = AV_PKT_FLAG_KEY;
|
| 113 |
+
case LMLM4_P_FRAME:
|
| 114 |
+
case LMLM4_B_FRAME:
|
| 115 |
+
pkt->stream_index = 0;
|
| 116 |
+
break;
|
| 117 |
+
case LMLM4_MPEG1L2:
|
| 118 |
+
pkt->stream_index = 1;
|
| 119 |
+
break;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
return ret;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
const FFInputFormat ff_lmlm4_demuxer = {
|
| 126 |
+
.p.name = "lmlm4",
|
| 127 |
+
.p.long_name = NULL_IF_CONFIG_SMALL("raw lmlm4"),
|
| 128 |
+
.read_probe = lmlm4_probe,
|
| 129 |
+
.read_header = lmlm4_read_header,
|
| 130 |
+
.read_packet = lmlm4_read_packet,
|
| 131 |
+
};
|