Beibars003 commited on
Commit
b1aeab7
·
verified ·
1 Parent(s): cc6bdb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -74
app.py CHANGED
@@ -14,8 +14,8 @@ from llama_cpp_agent.chat_history.messages import Roles
14
  from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
15
  from huggingface_hub import hf_hub_download
16
  import gradio as gr
17
- from logger import logging
18
- from exception import CustomExceptionHandling
19
 
20
 
21
  # Load the Environment Variables from .env file
@@ -87,83 +87,83 @@ def respond(
87
  Returns:
88
  str: The response to the message.
89
  """
90
- try:
91
- # Load the global variables
92
- global llm
93
- global llm_model
94
-
95
- # Ensure model is not None
96
- if model is None:
97
- model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
98
-
99
- # Load the model
100
- if llm is None or llm_model != model:
101
- # Check if model file exists
102
- model_path = f"models/{model}"
103
- if not os.path.exists(model_path):
104
- yield f"Error: Model file not found at {model_path}. Please check your model path."
105
- return
106
-
107
- llm = Llama(
108
- model_path=f"models/{model}",
109
- flash_attn=False,
110
- n_gpu_layers=0,
111
- n_batch=8,
112
- n_ctx=2048,
113
- n_threads=8,
114
- n_threads_batch=8,
115
- )
116
- llm_model = model
117
- provider = LlamaCppPythonProvider(llm)
118
-
119
- # Create the agent
120
- agent = LlamaCppAgent(
121
- provider,
122
- system_prompt=f"{system_message}",
123
- custom_messages_formatter=gemma_3_formatter,
124
- debug_output=True,
125
  )
 
 
 
 
 
 
 
 
 
 
126
 
127
- # Set the settings like temperature, top-k, top-p, max tokens, etc.
128
- settings = provider.get_provider_default_settings()
129
- settings.temperature = temperature
130
- settings.top_k = top_k
131
- settings.top_p = top_p
132
- settings.max_tokens = max_tokens
133
- settings.repeat_penalty = repeat_penalty
134
- settings.stream = True
135
-
136
- messages = BasicChatHistory()
137
-
138
- # Add the chat history
139
- for msn in history:
140
- user = {"role": Roles.user, "content": msn[0]}
141
- assistant = {"role": Roles.assistant, "content": msn[1]}
142
- messages.add_message(user)
143
- messages.add_message(assistant)
144
-
145
- # Get the response stream
146
- stream = agent.get_chat_response(
147
- message,
148
- llm_sampling_settings=settings,
149
- chat_history=messages,
150
- returns_streaming_generator=True,
151
- print_output=False,
152
- )
153
 
154
- # Log the success
155
- logging.info("Response stream generated successfully")
156
 
157
- # Generate the response
158
- outputs = ""
159
- for output in stream:
160
- outputs += output
161
- yield outputs
162
 
163
- # Handle exceptions that may occur during the process
164
- except Exception as e:
165
- # Custom exception handling
166
- raise CustomExceptionHandling(e, sys) from e
167
 
168
 
169
  # Create a chat interface
 
14
  from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
15
  from huggingface_hub import hf_hub_download
16
  import gradio as gr
17
+ # from logger import logging
18
+ # from exception import CustomExceptionHandling
19
 
20
 
21
  # Load the Environment Variables from .env file
 
87
  Returns:
88
  str: The response to the message.
89
  """
90
+ # try:
91
+ # Load the global variables
92
+ global llm
93
+ global llm_model
94
+
95
+ # Ensure model is not None
96
+ if model is None:
97
+ model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
98
+
99
+ # Load the model
100
+ if llm is None or llm_model != model:
101
+ # Check if model file exists
102
+ model_path = f"models/{model}"
103
+ if not os.path.exists(model_path):
104
+ yield f"Error: Model file not found at {model_path}. Please check your model path."
105
+ return
106
+
107
+ llm = Llama(
108
+ model_path=f"models/{model}",
109
+ flash_attn=False,
110
+ n_gpu_layers=0,
111
+ n_batch=8,
112
+ n_ctx=2048,
113
+ n_threads=8,
114
+ n_threads_batch=8,
 
 
 
 
 
 
 
 
 
 
115
  )
116
+ llm_model = model
117
+ provider = LlamaCppPythonProvider(llm)
118
+
119
+ # Create the agent
120
+ agent = LlamaCppAgent(
121
+ provider,
122
+ system_prompt=f"{system_message}",
123
+ custom_messages_formatter=gemma_3_formatter,
124
+ debug_output=True,
125
+ )
126
 
127
+ # Set the settings like temperature, top-k, top-p, max tokens, etc.
128
+ settings = provider.get_provider_default_settings()
129
+ settings.temperature = temperature
130
+ settings.top_k = top_k
131
+ settings.top_p = top_p
132
+ settings.max_tokens = max_tokens
133
+ settings.repeat_penalty = repeat_penalty
134
+ settings.stream = True
135
+
136
+ messages = BasicChatHistory()
137
+
138
+ # Add the chat history
139
+ for msn in history:
140
+ user = {"role": Roles.user, "content": msn[0]}
141
+ assistant = {"role": Roles.assistant, "content": msn[1]}
142
+ messages.add_message(user)
143
+ messages.add_message(assistant)
144
+
145
+ # Get the response stream
146
+ stream = agent.get_chat_response(
147
+ message,
148
+ llm_sampling_settings=settings,
149
+ chat_history=messages,
150
+ returns_streaming_generator=True,
151
+ print_output=False,
152
+ )
153
 
154
+ # Log the success
155
+ # logging.info("Response stream generated successfully")
156
 
157
+ # Generate the response
158
+ outputs = ""
159
+ for output in stream:
160
+ outputs += output
161
+ yield outputs
162
 
163
+ # # Handle exceptions that may occur during the process
164
+ # except Exception as e:
165
+ # # Custom exception handling
166
+ # raise CustomExceptionHandling(e, sys) from e
167
 
168
 
169
  # Create a chat interface