Dmitry Beresnev commited on
Commit
944c08a
·
1 Parent(s): e80973f
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -9,21 +9,20 @@ from typing import Optional
9
 
10
  app = FastAPI()
11
 
12
- # Predefined list of available models
13
  AVAILABLE_MODELS = {
14
  # === Financial & Summarization Models (Recommended) ===
15
- "qwen-2.5-7b": "bartowski/Qwen2.5-7B-Instruct-GGUF:Qwen2.5-7B-Instruct-Q4_K_M.gguf", # Best for financial + multilingual
16
- "kimi-k2-9b": "bartowski/k2-chat-GGUF:k2-chat-Q4_K_M.gguf", # Kimi K2 - long context, good reasoning
17
- "yi-1.5-9b": "bartowski/Yi-1.5-9B-Chat-GGUF:Yi-1.5-9B-Chat-Q4_K_M.gguf", # Excellent for finance
18
- "llama-3.1-8b": "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", # Great reasoning
19
  "mistral-7b": "TheBloke/Mistral-7B-Instruct-v0.3-GGUF:mistral-7b-instruct-v0.3.Q4_K_M.gguf", # Reliable summarization
 
20
 
21
  # === Coding Models ===
22
  "deepseek-coder": "TheBloke/deepseek-coder-6.7B-instruct-GGUF:deepseek-coder-6.7b-instruct.Q4_K_M.gguf",
23
 
24
  # === General Purpose ===
25
  "deepseek-chat": "TheBloke/deepseek-llm-7B-chat-GGUF:deepseek-llm-7b-chat.Q4_K_M.gguf",
26
- "llama-3.2-3b": "bartowski/Llama-3.2-3B-Instruct-GGUF:Llama-3.2-3B-Instruct-Q4_K_M.gguf", # Fast & lightweight
27
  }
28
 
29
  # Global state
 
9
 
10
  app = FastAPI()
11
 
12
+ # Predefined list of available models (verified working paths)
13
  AVAILABLE_MODELS = {
14
  # === Financial & Summarization Models (Recommended) ===
15
+ "qwen-2.5-7b": "Qwen/Qwen2.5-7B-Instruct-GGUF:qwen2.5-7b-instruct-q4_k_m.gguf", # Best for financial + multilingual
16
+ "yi-1.5-9b": "TheBloke/Yi-1.5-9B-Chat-GGUF:yi-1.5-9b-chat.Q4_K_M.gguf", # Excellent for finance
17
+ "llama-3.1-8b": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF:Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", # Great reasoning
 
18
  "mistral-7b": "TheBloke/Mistral-7B-Instruct-v0.3-GGUF:mistral-7b-instruct-v0.3.Q4_K_M.gguf", # Reliable summarization
19
+ "llama-3.2-3b": "lmstudio-community/Llama-3.2-3B-Instruct-GGUF:Llama-3.2-3B-Instruct-Q4_K_M.gguf", # Fast & lightweight
20
 
21
  # === Coding Models ===
22
  "deepseek-coder": "TheBloke/deepseek-coder-6.7B-instruct-GGUF:deepseek-coder-6.7b-instruct.Q4_K_M.gguf",
23
 
24
  # === General Purpose ===
25
  "deepseek-chat": "TheBloke/deepseek-llm-7B-chat-GGUF:deepseek-llm-7b-chat.Q4_K_M.gguf",
 
26
  }
27
 
28
  # Global state