Spaces:

RayMelius
/

soci2

Running

RayMelius Claude Sonnet 4.6 commited on 13 days ago

Commit

4c88c91

1 Parent(s): 97d584e

Set soci-agent-7b as default LLM for Ollama and HF providers

- MODEL_OLLAMA_SOCI: soci-agent -> soci-agent-7b
- MODEL_HF_SOCI: RayMelius/soci-agent-q4 -> RayMelius/soci-agent-7b
- Update server.py menu labels to match

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show

src/soci/api/server.py +2 -2
src/soci/engine/llm.py +2 -2

src/soci/api/server.py CHANGED Viewed

@@ -277,12 +277,12 @@ def _choose_provider() -> str:
     # Priority: Soci fine-tuned model first, then free cloud providers, then Ollama.
     options = []
     if has_hf:
-        options.append(("hf", "Soci Agent / HF Inference (RayMelius/soci-agent-q4)"))
     if has_groq:
         options.append(("groq", "Groq (free tier, 30 req/min)"))
     if has_gemini:
         options.append(("gemini", "Gemini (free tier, 15 req/min via AI Studio)"))
-    options.append(("ollama", "Ollama / Soci Agent local (soci-agent)"))
     # If only one option, use it
     if len(options) == 1:

     # Priority: Soci fine-tuned model first, then free cloud providers, then Ollama.
     options = []
     if has_hf:
+        options.append(("hf", "Soci Agent / HF Inference (RayMelius/soci-agent-7b)"))
     if has_groq:
         options.append(("groq", "Groq (free tier, 30 req/min)"))
     if has_gemini:
         options.append(("gemini", "Gemini (free tier, 15 req/min via AI Studio)"))
+    options.append(("ollama", "Ollama / Soci Agent local (soci-agent-7b)"))
     # If only one option, use it
     if len(options) == 1:

src/soci/engine/llm.py CHANGED Viewed

@@ -64,10 +64,10 @@ MODEL_HF_QWEN = "Qwen/Qwen2.5-7B-Instruct"          # default — auto-routed, g
 MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
 MODEL_HF_MISTRAL = "mistralai/Mistral-7B-Instruct-v0.3"
 MODEL_HF_SMOL = "HuggingFaceTB/SmolLM3-3B:hf-inference"  # CPU inference, no credits needed
-MODEL_HF_SOCI = "RayMelius/soci-agent-q4"            # Soci fine-tuned Qwen2.5-0.5B (LoRA)
 # Ollama model IDs for Soci fine-tuned models
-MODEL_OLLAMA_SOCI = "soci-agent"   # load via: ollama create soci-agent -f Modelfile
 # Approximate cost per 1M tokens (USD) — Ollama is free, Groq is very cheap
 COST_PER_1M = {

 MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
 MODEL_HF_MISTRAL = "mistralai/Mistral-7B-Instruct-v0.3"
 MODEL_HF_SMOL = "HuggingFaceTB/SmolLM3-3B:hf-inference"  # CPU inference, no credits needed
+MODEL_HF_SOCI = "RayMelius/soci-agent-7b"            # Soci fine-tuned Qwen2.5-7B (LoRA, GGUF)
 # Ollama model IDs for Soci fine-tuned models
+MODEL_OLLAMA_SOCI = "soci-agent-7b"   # load via: ollama create soci-agent-7b -f Modelfile
 # Approximate cost per 1M tokens (USD) — Ollama is free, Groq is very cheap
 COST_PER_1M = {