3morixd commited on
Commit
46e9ad9
·
verified ·
1 Parent(s): 640ecdb

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. src/dispatchai/core.py +26 -5
src/dispatchai/core.py CHANGED
@@ -71,6 +71,17 @@ class DispatchModel:
71
  self._tokenizer = None
72
  self._loaded = False
73
 
 
 
 
 
 
 
 
 
 
 
 
74
  def _load(self):
75
  """Lazily load the model on first use."""
76
  if self._loaded:
@@ -102,7 +113,13 @@ class DispatchModel:
102
 
103
  from huggingface_hub import hf_hub_download
104
  gguf_path = hf_hub_download(self.repo_id, "model.gguf")
105
- self._model = Llama(model_path=gguf_path, n_ctx=512, n_threads=4, verbose=False)
 
 
 
 
 
 
106
 
107
  self._loaded = True
108
 
@@ -150,13 +167,17 @@ class DispatchModel:
150
  return response.strip()
151
 
152
  elif self.backend == "gguf":
153
- response = self._model(
154
- message,
 
 
 
 
 
155
  max_tokens=max_tokens,
156
  temperature=temperature,
157
- echo=False,
158
  )
159
- return response["choices"][0]["text"].strip()
160
 
161
  return ""
162
 
 
71
  self._tokenizer = None
72
  self._loaded = False
73
 
74
+ def _detect_chat_format(self, model_name: str) -> Optional[str]:
75
+ """Auto-detect the correct chat format from model name."""
76
+ lower = model_name.lower()
77
+ if "smollm" in lower or "llama-3" in lower:
78
+ return "llama-3"
79
+ elif "gemma" in lower:
80
+ return "gemma"
81
+ elif "qwen" in lower or "phi" in lower or "tinyllama" in lower or "minicpm" in lower or "moondream" in lower:
82
+ return "chatml"
83
+ return "chatml" # Safe default
84
+
85
  def _load(self):
86
  """Lazily load the model on first use."""
87
  if self._loaded:
 
113
 
114
  from huggingface_hub import hf_hub_download
115
  gguf_path = hf_hub_download(self.repo_id, "model.gguf")
116
+
117
+ # Determine chat format from model name
118
+ chat_format = self._detect_chat_format(self.model_name)
119
+ kwargs = dict(model_path=gguf_path, n_ctx=512, n_threads=4, verbose=False)
120
+ if chat_format:
121
+ kwargs["chat_format"] = chat_format
122
+ self._model = Llama(**kwargs)
123
 
124
  self._loaded = True
125
 
 
167
  return response.strip()
168
 
169
  elif self.backend == "gguf":
170
+ messages = []
171
+ if system:
172
+ messages.append({"role": "system", "content": system})
173
+ messages.append({"role": "user", "content": message})
174
+
175
+ response = self._model.create_chat_completion(
176
+ messages=messages,
177
  max_tokens=max_tokens,
178
  temperature=temperature,
 
179
  )
180
+ return response["choices"][0]["message"]["content"].strip()
181
 
182
  return ""
183