Spaces:

SRP-base-model-training
/

our_model

Runtime error

App Files Files Community

Beibars003 commited on Jul 2, 2025

Commit

b1aeab7

verified ·

1 Parent(s): cc6bdb3

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -74

app.py CHANGED Viewed

@@ -14,8 +14,8 @@ from llama_cpp_agent.chat_history.messages import Roles
 from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
 from huggingface_hub import hf_hub_download
 import gradio as gr
-from logger import logging
-from exception import CustomExceptionHandling
 # Load the Environment Variables from .env file
@@ -87,83 +87,83 @@ def respond(
     Returns:
         str: The response to the message.
     """
-    try:
-        # Load the global variables
-        global llm
-        global llm_model
-        # Ensure model is not None
-        if model is None:
-            model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
-        # Load the model
-        if llm is None or llm_model != model:
-            # Check if model file exists
-            model_path = f"models/{model}"
-            if not os.path.exists(model_path):
-                yield f"Error: Model file not found at {model_path}. Please check your model path."
-                return
-            llm = Llama(
-                model_path=f"models/{model}",
-                flash_attn=False,
-                n_gpu_layers=0,
-                n_batch=8,
-                n_ctx=2048,
-                n_threads=8,
-                n_threads_batch=8,
-            )
-            llm_model = model
-        provider = LlamaCppPythonProvider(llm)
-        # Create the agent
-        agent = LlamaCppAgent(
-            provider,
-            system_prompt=f"{system_message}",
-            custom_messages_formatter=gemma_3_formatter,
-            debug_output=True,
         )
-        # Set the settings like temperature, top-k, top-p, max tokens, etc.
-        settings = provider.get_provider_default_settings()
-        settings.temperature = temperature
-        settings.top_k = top_k
-        settings.top_p = top_p
-        settings.max_tokens = max_tokens
-        settings.repeat_penalty = repeat_penalty
-        settings.stream = True
-        messages = BasicChatHistory()
-        # Add the chat history
-        for msn in history:
-            user = {"role": Roles.user, "content": msn[0]}
-            assistant = {"role": Roles.assistant, "content": msn[1]}
-            messages.add_message(user)
-            messages.add_message(assistant)
-        # Get the response stream
-        stream = agent.get_chat_response(
-            message,
-            llm_sampling_settings=settings,
-            chat_history=messages,
-            returns_streaming_generator=True,
-            print_output=False,
-        )
-        # Log the success
-        logging.info("Response stream generated successfully")
-        # Generate the response
-        outputs = ""
-        for output in stream:
-            outputs += output
-            yield outputs
-    # Handle exceptions that may occur during the process
-    except Exception as e:
-        # Custom exception handling
-        raise CustomExceptionHandling(e, sys) from e
 # Create a chat interface

 from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
 from huggingface_hub import hf_hub_download
 import gradio as gr
+# from logger import logging
+# from exception import CustomExceptionHandling
 # Load the Environment Variables from .env file
     Returns:
         str: The response to the message.
     """
+    # try:
+    # Load the global variables
+    global llm
+    global llm_model
+    # Ensure model is not None
+    if model is None:
+        model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
+    # Load the model
+    if llm is None or llm_model != model:
+        # Check if model file exists
+        model_path = f"models/{model}"
+        if not os.path.exists(model_path):
+            yield f"Error: Model file not found at {model_path}. Please check your model path."
+            return
+        llm = Llama(
+            model_path=f"models/{model}",
+            flash_attn=False,
+            n_gpu_layers=0,
+            n_batch=8,
+            n_ctx=2048,
+            n_threads=8,
+            n_threads_batch=8,
         )
+        llm_model = model
+    provider = LlamaCppPythonProvider(llm)
+    # Create the agent
+    agent = LlamaCppAgent(
+        provider,
+        system_prompt=f"{system_message}",
+        custom_messages_formatter=gemma_3_formatter,
+        debug_output=True,
+    )
+    # Set the settings like temperature, top-k, top-p, max tokens, etc.
+    settings = provider.get_provider_default_settings()
+    settings.temperature = temperature
+    settings.top_k = top_k
+    settings.top_p = top_p
+    settings.max_tokens = max_tokens
+    settings.repeat_penalty = repeat_penalty
+    settings.stream = True
+    messages = BasicChatHistory()
+    # Add the chat history
+    for msn in history:
+        user = {"role": Roles.user, "content": msn[0]}
+        assistant = {"role": Roles.assistant, "content": msn[1]}
+        messages.add_message(user)
+        messages.add_message(assistant)
+    # Get the response stream
+    stream = agent.get_chat_response(
+        message,
+        llm_sampling_settings=settings,
+        chat_history=messages,
+        returns_streaming_generator=True,
+        print_output=False,
+    )
+    # Log the success
+    # logging.info("Response stream generated successfully")
+    # Generate the response
+    outputs = ""
+    for output in stream:
+        outputs += output
+        yield outputs
+    # # Handle exceptions that may occur during the process
+    # except Exception as e:
+    #     # Custom exception handling
+    #     raise CustomExceptionHandling(e, sys) from e
 # Create a chat interface