Spaces:

SRP-base-model-training
/

our_model

Runtime error

App Files Files Community

Beibars003 commited on Jun 24, 2025

Commit

37af6d1

verified ·

1 Parent(s): b7e70d3

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -75

app.py CHANGED Viewed

@@ -1,3 +1,6 @@
 import os
 import json
 import subprocess
@@ -11,14 +14,26 @@ from llama_cpp_agent.chat_history.messages import Roles
 from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
 from huggingface_hub import hf_hub_download
 import gradio as gr
 # Load the Environment Variables from .env file
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 hf_hub_download(
-    repo_id="SRP-base-model-training/gemma_3_800M_sft_v2_translation-kazparc_latest",
-    filename="gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
 )
@@ -43,10 +58,12 @@ gemma_3_formatter = MessagesFormatter(
 # Set the title and description
-title = "Kazakh Language Model"
-description = """Model created in ISSAI by using architecture Gemma 3. Base model was trained on data provided by ISSAI. This interactive chat interface allows you to experiment with the [`gemma-3-1b-it`](https://huggingface.co/google/gemma-3-1b-it) text model using various prompts and generation parameters.
 Users can select different model variants (GGUF format), system prompts, and observe generated responses in real-time.
-Key generation parameters, such as ⁣`temperature`, `max_tokens`, `top_k` and others are exposed below for tuning model behavior."""
 llm = None
@@ -55,7 +72,7 @@ llm_model = None
 def respond(
     message: str,
     history: List[Tuple[str, str]],
-    model: str = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",  # Set default model
     system_message: str = "You are a helpful assistant.",
     max_tokens: int = 1024,
     temperature: float = 0.7,
@@ -78,86 +95,102 @@ def respond(
     Returns:
         str: The response to the message.
     """
-    # Load the global variables
-    global llm
-    global llm_model
-    # Ensure model is not None
-    if model is None:
-        model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
-    # Load the model
-    if llm is None or llm_model != model:
-        # Check if model file exists
-        model_path = f"{model}"
-        if not os.path.exists(model_path):
-            yield f"Error: Model file not found at {model_path}. Please check your model path."
-            return
-        llm = Llama(
-            model_path=f"{model}",
-            flash_attn=False,
-            n_gpu_layers=0,
-            n_batch=8,
-            n_ctx=2048,
-            n_threads=8,
-            n_threads_batch=8,
         )
-        llm_model = model
-    provider = LlamaCppPythonProvider(llm)
-    # Create the agent
-    agent = LlamaCppAgent(
-        provider,
-        system_prompt=f"{system_message}",
-        custom_messages_formatter=gemma_3_formatter,
-        debug_output=True,
-    )
-    # Set the settings like temperature, top-k, top-p, max tokens, etc.
-    settings = provider.get_provider_default_settings()
-    settings.temperature = temperature
-    settings.top_k = top_k
-    settings.top_p = top_p
-    settings.max_tokens = max_tokens
-    settings.repeat_penalty = repeat_penalty
-    settings.stream = True
-    messages = BasicChatHistory()
-    # Add the chat history
-    for msn in history:
-        user = {"role": Roles.user, "content": msn[0]}
-        assistant = {"role": Roles.assistant, "content": msn[1]}
-        messages.add_message(user)
-        messages.add_message(assistant)
-    # Get the response stream
-    stream = agent.get_chat_response(
-        message,
-        llm_sampling_settings=settings,
-        chat_history=messages,
-        returns_streaming_generator=True,
-        print_output=False,
-    )
-    # Generate the response
-    outputs = ""
-    for output in stream:
-        outputs += output
-        yield outputs
 # Create a chat interface
 demo = gr.ChatInterface(
     respond,
-    examples=[],
     additional_inputs_accordion=gr.Accordion(
         label="⚙️ Parameters", open=False, render=False
     ),
     additional_inputs=[
         gr.Textbox(
             value="You are a helpful assistant.",
             label="System Prompt",
@@ -210,7 +243,9 @@ demo = gr.ChatInterface(
     stop_btn="Stop",
     title=title,
     description=description,
-    chatbot=gr.Chatbot(scale=1, show_copy_button=True),
     cache_examples=False,
 )
@@ -222,4 +257,4 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=7860,
         show_api=False,
-    )

+import warnings
+warnings.filterwarnings("ignore")
 import os
 import json
 import subprocess
 from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
 from huggingface_hub import hf_hub_download
 import gradio as gr
+from logger import logging
+from exception import CustomExceptionHandling
 # Load the Environment Variables from .env file
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
+# Download gguf model files
+if not os.path.exists("./models"):
+    os.makedirs("./models")
 hf_hub_download(
+    repo_id="bartowski/google_gemma-3-1b-it-GGUF",
+    filename="google_gemma-3-1b-it-Q4_K_M.gguf",
+    local_dir="./models",
+)
+hf_hub_download(
+    repo_id="bartowski/google_gemma-3-1b-it-GGUF",
+    filename="google_gemma-3-1b-it-Q5_K_M.gguf",
+    local_dir="./models",
 )
 # Set the title and description
+title = "Gemma Llama.cpp"
+description = """Google released **[Gemma 3](https://blog.google/technology/developers/gemma-3/)**, a family of multimodal models that offers advanced capabilities like large context and multilingual support.
+This interactive chat interface allows you to experiment with the [`gemma-3-1b-it`](https://huggingface.co/google/gemma-3-1b-it) text model using various prompts and generation parameters.
 Users can select different model variants (GGUF format), system prompts, and observe generated responses in real-time.
+Key generation parameters, such as ⁣`temperature`, `max_tokens`, `top_k` and others are exposed below for tuning model behavior.
+For a detailed technical walkthrough, please refer to the accompanying **[blog post](https://sitammeur.medium.com/build-your-own-gemma-3-chatbot-with-gradio-and-llama-cpp-46457b22a28e)**."""
 llm = None
 def respond(
     message: str,
     history: List[Tuple[str, str]],
+    model: str = "google_gemma-3-1b-it-Q4_K_M.gguf",  # Set default model
     system_message: str = "You are a helpful assistant.",
     max_tokens: int = 1024,
     temperature: float = 0.7,
     Returns:
         str: The response to the message.
     """
+    try:
+        # Load the global variables
+        global llm
+        global llm_model
+        # Ensure model is not None
+        if model is None:
+            model = "google_gemma-3-1b-it-Q4_K_M.gguf"
+        # Load the model
+        if llm is None or llm_model != model:
+            # Check if model file exists
+            model_path = f"models/{model}"
+            if not os.path.exists(model_path):
+                yield f"Error: Model file not found at {model_path}. Please check your model path."
+                return
+            llm = Llama(
+                model_path=f"models/{model}",
+                flash_attn=False,
+                n_gpu_layers=0,
+                n_batch=8,
+                n_ctx=2048,
+                n_threads=8,
+                n_threads_batch=8,
+            )
+            llm_model = model
+        provider = LlamaCppPythonProvider(llm)
+        # Create the agent
+        agent = LlamaCppAgent(
+            provider,
+            system_prompt=f"{system_message}",
+            custom_messages_formatter=gemma_3_formatter,
+            debug_output=True,
         )
+        # Set the settings like temperature, top-k, top-p, max tokens, etc.
+        settings = provider.get_provider_default_settings()
+        settings.temperature = temperature
+        settings.top_k = top_k
+        settings.top_p = top_p
+        settings.max_tokens = max_tokens
+        settings.repeat_penalty = repeat_penalty
+        settings.stream = True
+        messages = BasicChatHistory()
+        # Add the chat history
+        for msn in history:
+            user = {"role": Roles.user, "content": msn[0]}
+            assistant = {"role": Roles.assistant, "content": msn[1]}
+            messages.add_message(user)
+            messages.add_message(assistant)
+        # Get the response stream
+        stream = agent.get_chat_response(
+            message,
+            llm_sampling_settings=settings,
+            chat_history=messages,
+            returns_streaming_generator=True,
+            print_output=False,
+        )
+        # Log the success
+        logging.info("Response stream generated successfully")
+        # Generate the response
+        outputs = ""
+        for output in stream:
+            outputs += output
+            yield outputs
+    # Handle exceptions that may occur during the process
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e
 # Create a chat interface
 demo = gr.ChatInterface(
     respond,
+    examples=[["What is the capital of France?"], ["Tell me something about artificial intelligence."], ["What is gravity?"]],
     additional_inputs_accordion=gr.Accordion(
         label="⚙️ Parameters", open=False, render=False
     ),
     additional_inputs=[
+        gr.Dropdown(
+            choices=[
+                "google_gemma-3-1b-it-Q4_K_M.gguf",
+                "google_gemma-3-1b-it-Q5_K_M.gguf",
+            ],
+            value="google_gemma-3-1b-it-Q4_K_M.gguf",
+            label="Model",
+            info="Select the AI model to use for chat",
+        ),
         gr.Textbox(
             value="You are a helpful assistant.",
             label="System Prompt",
     stop_btn="Stop",
     title=title,
     description=description,
+    chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
+    flagging_mode="never",
+    editable=True,
     cache_examples=False,
 )
         server_name="0.0.0.0",
         server_port=7860,
         show_api=False,
+    )