Spaces:

mojaalagevai
/

Dolphin-llamacpp

Paused

App Files Files Community

sitammeur commited on Apr 14, 2025

Commit

beb3668

verified ·

1 Parent(s): 32663ba

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -11

app.py CHANGED Viewed

@@ -2,23 +2,27 @@
 import warnings
 warnings.filterwarnings("ignore")
 import json
 import subprocess
 import sys
 from llama_cpp import Llama
 from llama_cpp_agent import LlamaCppAgent
 from llama_cpp_agent import MessagesFormatterType
 from llama_cpp_agent.providers import LlamaCppPythonProvider
 from llama_cpp_agent.chat_history import BasicChatHistory
 from llama_cpp_agent.chat_history.messages import Roles
-import gradio as gr
 from huggingface_hub import hf_hub_download
-from typing import List, Tuple
 from logger import logging
 from exception import CustomExceptionHandling
 # Download gguf model files
 hf_hub_download(
     repo_id="bartowski/Dolphin3.0-Llama3.2-1B-GGUF",
     filename="Dolphin3.0-Llama3.2-1B-Q6_K.gguf",
@@ -42,13 +46,13 @@ llm_model = None
 def respond(
     message: str,
     history: List[Tuple[str, str]],
-    model: str,
-    system_message: str,
-    max_tokens: int,
-    temperature: float,
-    top_p: float,
-    top_k: int,
-    repeat_penalty: float,
 ):
     """
     Respond to a message using the Dolphin-3 model via Llama.cpp.
@@ -72,8 +76,18 @@ def respond(
         global llm
         global llm_model
         # Load the model
         if llm is None or llm_model != model:
             llm = Llama(
                 model_path=f"models/{model}",
                 flash_attn=False,
@@ -205,11 +219,18 @@ demo = gr.ChatInterface(
     stop_btn="Stop",
     title=title,
     description=description,
-    chatbot=gr.Chatbot(scale=1, show_copy_button=True),
     flagging_mode="never",
 )
 # Launch the chat interface
 if __name__ == "__main__":
-    demo.launch(debug=False)

 import warnings
 warnings.filterwarnings("ignore")
+import os
 import json
 import subprocess
 import sys
+from typing import List, Tuple
 from llama_cpp import Llama
 from llama_cpp_agent import LlamaCppAgent
 from llama_cpp_agent import MessagesFormatterType
 from llama_cpp_agent.providers import LlamaCppPythonProvider
 from llama_cpp_agent.chat_history import BasicChatHistory
 from llama_cpp_agent.chat_history.messages import Roles
 from huggingface_hub import hf_hub_download
+import gradio as gr
 from logger import logging
 from exception import CustomExceptionHandling
 # Download gguf model files
+if not os.path.exists("./models"):
+    os.makedirs("./models")
 hf_hub_download(
     repo_id="bartowski/Dolphin3.0-Llama3.2-1B-GGUF",
     filename="Dolphin3.0-Llama3.2-1B-Q6_K.gguf",
 def respond(
     message: str,
     history: List[Tuple[str, str]],
+    model: str = "Dolphin3.0-Qwen2.5-0.5B-Q6_K.gguf",  # Set default model
+    system_message: str = "You are a helpful assistant.",
+    max_tokens: int = 1024,
+    temperature: float = 0.7,
+    top_p: float = 0.95,
+    top_k: int = 40,
+    repeat_penalty: float = 1.1,
 ):
     """
     Respond to a message using the Dolphin-3 model via Llama.cpp.
         global llm
         global llm_model
+        # Ensure model is not None
+        if model is None:
+            model = "Dolphin3.0-Qwen2.5-0.5B-Q6_K.gguf"
         # Load the model
         if llm is None or llm_model != model:
+            # Check if model file exists
+            model_path = f"models/{model}"
+            if not os.path.exists(model_path):
+                yield f"Error: Model file not found at {model_path}. Please check your model path."
+                return
             llm = Llama(
                 model_path=f"models/{model}",
                 flash_attn=False,
     stop_btn="Stop",
     title=title,
     description=description,
+    chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
     flagging_mode="never",
+    editable=True,
+    cache_examples=False,
 )
 # Launch the chat interface
 if __name__ == "__main__":
+    demo.launch(
+        share=False,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_api=False,
+    )