dispatchAI
/

dispatchAI-SDK

Model card Files Files and versions

xet

Community

3morixd commited on 1 day ago

Commit

46e9ad9

verified ·

1 Parent(s): 640ecdb

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

src/dispatchai/core.py +26 -5

src/dispatchai/core.py CHANGED Viewed

@@ -71,6 +71,17 @@ class DispatchModel:
         self._tokenizer = None
         self._loaded = False
     def _load(self):
         """Lazily load the model on first use."""
         if self._loaded:
@@ -102,7 +113,13 @@ class DispatchModel:
             from huggingface_hub import hf_hub_download
             gguf_path = hf_hub_download(self.repo_id, "model.gguf")
-            self._model = Llama(model_path=gguf_path, n_ctx=512, n_threads=4, verbose=False)
         self._loaded = True
@@ -150,13 +167,17 @@ class DispatchModel:
             return response.strip()
         elif self.backend == "gguf":
-            response = self._model(
-                message,
                 max_tokens=max_tokens,
                 temperature=temperature,
-                echo=False,
             )
-            return response["choices"][0]["text"].strip()
         return ""

         self._tokenizer = None
         self._loaded = False
+    def _detect_chat_format(self, model_name: str) -> Optional[str]:
+        """Auto-detect the correct chat format from model name."""
+        lower = model_name.lower()
+        if "smollm" in lower or "llama-3" in lower:
+            return "llama-3"
+        elif "gemma" in lower:
+            return "gemma"
+        elif "qwen" in lower or "phi" in lower or "tinyllama" in lower or "minicpm" in lower or "moondream" in lower:
+            return "chatml"
+        return "chatml"  # Safe default
     def _load(self):
         """Lazily load the model on first use."""
         if self._loaded:
             from huggingface_hub import hf_hub_download
             gguf_path = hf_hub_download(self.repo_id, "model.gguf")
+            # Determine chat format from model name
+            chat_format = self._detect_chat_format(self.model_name)
+            kwargs = dict(model_path=gguf_path, n_ctx=512, n_threads=4, verbose=False)
+            if chat_format:
+                kwargs["chat_format"] = chat_format
+            self._model = Llama(**kwargs)
         self._loaded = True
             return response.strip()
         elif self.backend == "gguf":
+            messages = []
+            if system:
+                messages.append({"role": "system", "content": system})
+            messages.append({"role": "user", "content": message})
+            response = self._model.create_chat_completion(
+                messages=messages,
                 max_tokens=max_tokens,
                 temperature=temperature,
             )
+            return response["choices"][0]["message"]["content"].strip()
         return ""