RayMelius Claude Sonnet 4.6 commited on
Commit
4c88c91
·
1 Parent(s): 97d584e

Set soci-agent-7b as default LLM for Ollama and HF providers

Browse files

- MODEL_OLLAMA_SOCI: soci-agent -> soci-agent-7b
- MODEL_HF_SOCI: RayMelius/soci-agent-q4 -> RayMelius/soci-agent-7b
- Update server.py menu labels to match

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. src/soci/api/server.py +2 -2
  2. src/soci/engine/llm.py +2 -2
src/soci/api/server.py CHANGED
@@ -277,12 +277,12 @@ def _choose_provider() -> str:
277
  # Priority: Soci fine-tuned model first, then free cloud providers, then Ollama.
278
  options = []
279
  if has_hf:
280
- options.append(("hf", "Soci Agent / HF Inference (RayMelius/soci-agent-q4)"))
281
  if has_groq:
282
  options.append(("groq", "Groq (free tier, 30 req/min)"))
283
  if has_gemini:
284
  options.append(("gemini", "Gemini (free tier, 15 req/min via AI Studio)"))
285
- options.append(("ollama", "Ollama / Soci Agent local (soci-agent)"))
286
 
287
  # If only one option, use it
288
  if len(options) == 1:
 
277
  # Priority: Soci fine-tuned model first, then free cloud providers, then Ollama.
278
  options = []
279
  if has_hf:
280
+ options.append(("hf", "Soci Agent / HF Inference (RayMelius/soci-agent-7b)"))
281
  if has_groq:
282
  options.append(("groq", "Groq (free tier, 30 req/min)"))
283
  if has_gemini:
284
  options.append(("gemini", "Gemini (free tier, 15 req/min via AI Studio)"))
285
+ options.append(("ollama", "Ollama / Soci Agent local (soci-agent-7b)"))
286
 
287
  # If only one option, use it
288
  if len(options) == 1:
src/soci/engine/llm.py CHANGED
@@ -64,10 +64,10 @@ MODEL_HF_QWEN = "Qwen/Qwen2.5-7B-Instruct" # default — auto-routed, g
64
  MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
65
  MODEL_HF_MISTRAL = "mistralai/Mistral-7B-Instruct-v0.3"
66
  MODEL_HF_SMOL = "HuggingFaceTB/SmolLM3-3B:hf-inference" # CPU inference, no credits needed
67
- MODEL_HF_SOCI = "RayMelius/soci-agent-q4" # Soci fine-tuned Qwen2.5-0.5B (LoRA)
68
 
69
  # Ollama model IDs for Soci fine-tuned models
70
- MODEL_OLLAMA_SOCI = "soci-agent" # load via: ollama create soci-agent -f Modelfile
71
 
72
  # Approximate cost per 1M tokens (USD) — Ollama is free, Groq is very cheap
73
  COST_PER_1M = {
 
64
  MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
65
  MODEL_HF_MISTRAL = "mistralai/Mistral-7B-Instruct-v0.3"
66
  MODEL_HF_SMOL = "HuggingFaceTB/SmolLM3-3B:hf-inference" # CPU inference, no credits needed
67
+ MODEL_HF_SOCI = "RayMelius/soci-agent-7b" # Soci fine-tuned Qwen2.5-7B (LoRA, GGUF)
68
 
69
  # Ollama model IDs for Soci fine-tuned models
70
+ MODEL_OLLAMA_SOCI = "soci-agent-7b" # load via: ollama create soci-agent-7b -f Modelfile
71
 
72
  # Approximate cost per 1M tokens (USD) — Ollama is free, Groq is very cheap
73
  COST_PER_1M = {