Beibars003 commited on
Commit
81b72f9
·
verified ·
1 Parent(s): 80757d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -72
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
  import sys
3
  from typing import List, Tuple
4
  from llama_cpp import Llama
@@ -9,6 +11,9 @@ from llama_cpp_agent.chat_history.messages import Roles
9
  from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
10
  from huggingface_hub import hf_hub_download
11
  import gradio as gr
 
 
 
12
 
13
  # Load the Environment Variables from .env file
14
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
@@ -23,43 +28,32 @@ hf_hub_download(
23
  local_dir="./models",
24
  )
25
 
 
26
  # Define the prompt markers for Gemma 3
27
  gemma_3_prompt_markers = {
28
- Roles.system: PromptMarkers("<start_of_turn>system\n", "<end_of_turn>\n"),
29
  Roles.user: PromptMarkers("<start_of_turn>user\n", "<end_of_turn>\n"),
30
  Roles.assistant: PromptMarkers("<start_of_turn>assistant", ""),
31
- Roles.tool: PromptMarkers("", ""),
 
32
  }
33
 
 
34
  gemma_3_formatter = MessagesFormatter(
35
- pre_prompt="",
36
  prompt_markers=gemma_3_prompt_markers,
37
- include_sys_prompt_in_first_user_message=True,
38
  default_stop_sequences=["<end_of_turn>", "<start_of_turn>"],
39
- strip_prompt=False,
40
- bos_token="<bos>",
41
- eos_token="<eos>",
42
  )
43
 
44
- # Translation direction to prompts mapping
45
- direction_to_prompts = {
46
- "English to Kazakh": {
47
- "system": "You are a professional translator. Translate the following sentence into қазақ.",
48
- "prefix": "<src=en><tgt=kk>"
49
- },
50
- "Kazakh to English": {
51
- "system": "Сіз кәсіби аудармашысыз. Төмендегі сөйлемді English тіліне аударыңыз.",
52
- "prefix": "<src=kk><tgt=en>"
53
- },
54
- "Kazakh to Russian": {
55
- "system": "Сіз кәсіби аудармашысыз. Төмендегі сөйлемді орыс тіліне аударыңыз.",
56
- "prefix": "<src=kk><tgt=ru>"
57
- },
58
- "Russian to Kazakh": {
59
- "system": "Вы профессиональный переводчик. Переведите следующее предложение на қазақ язык.",
60
- "prefix": "<src=ru><tgt=kk>"
61
- }
62
- }
63
 
64
  llm = None
65
  llm_model = None
@@ -67,8 +61,8 @@ llm_model = None
67
  def respond(
68
  message: str,
69
  history: List[Tuple[str, str]],
70
- direction: str,
71
- model: str = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
72
  max_tokens: int = 64,
73
  temperature: float = 0.7,
74
  top_p: float = 0.95,
@@ -76,34 +70,37 @@ def respond(
76
  repeat_penalty: float = 1.1,
77
  ):
78
  """
79
- Respond to a message by translating it using the specified direction.
80
-
81
  Args:
82
- message (str): The text to translate.
83
- history (List[Tuple[str, str]]): The chat history.
84
- direction (str): The translation direction (e.g., "English to Kazakh").
85
- model (str): The model file to use.
86
- max_tokens (int): Maximum number of tokens to generate.
87
- temperature (float): Sampling temperature.
88
- top_p (float): Top-p sampling parameter.
89
- top_k (int): Top-k sampling parameter.
90
- repeat_penalty (float): Penalty for repetition.
91
-
92
- Yields:
93
- str: The translated text as it is generated.
94
  """
 
 
 
 
95
 
96
  # Ensure model is not None
97
-
98
  if model is None:
99
  model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
100
-
101
- global llm, llm_model
102
  if llm is None or llm_model != model:
 
103
  model_path = f"models/{model}"
104
  if not os.path.exists(model_path):
105
- yield f"Error: Model file not found at {model}. SUIIIIIIII ankara messi"
106
  return
 
107
  llm = Llama(
108
  model_path=f"models/{model}",
109
  flash_attn=False,
@@ -116,18 +113,15 @@ def respond(
116
  llm_model = model
117
  provider = LlamaCppPythonProvider(llm)
118
 
119
- # Get system prompt and user prefix based on direction
120
- prompts = direction_to_prompts[direction]
121
- system_message = prompts["system"]
122
- user_prefix = prompts["prefix"]
123
-
124
  agent = LlamaCppAgent(
125
  provider,
126
- system_prompt=system_message,
127
  custom_messages_formatter=gemma_3_formatter,
128
  debug_output=True,
129
  )
130
 
 
131
  settings = provider.get_provider_default_settings()
132
  settings.temperature = temperature
133
  settings.top_k = top_k
@@ -137,35 +131,59 @@ def respond(
137
  settings.stream = True
138
 
139
  messages = BasicChatHistory()
140
- for user_msg, assistant_msg in history:
141
- full_user_msg = user_prefix + " " + user_msg
142
- messages.add_message({"role": Roles.user, "content": full_user_msg})
143
- messages.add_message({"role": Roles.assistant, "content": assistant_msg})
144
 
145
- full_message = user_prefix + " " + message
 
 
 
 
 
146
 
 
147
  stream = agent.get_chat_response(
148
- full_message,
149
  llm_sampling_settings=settings,
150
  chat_history=messages,
151
  returns_streaming_generator=True,
152
  print_output=False,
153
  )
154
 
 
 
 
 
155
  outputs = ""
156
  for output in stream:
157
  outputs += output
158
  yield outputs
159
 
 
 
 
 
 
 
 
160
  demo = gr.ChatInterface(
161
  respond,
162
- examples=[["Hello"], ["Сәлем"], ["Привет"]],
163
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
 
 
164
  additional_inputs=[
165
  gr.Dropdown(
166
- choices=["English to Kazakh", "Kazakh to English", "Kazakh to Russian", "Russian to Kazakh"],
167
- label="Translation Direction",
168
- info="Select the direction of translation"
 
 
 
 
 
 
 
 
 
169
  ),
170
  gr.Slider(
171
  minimum=512,
@@ -173,7 +191,7 @@ demo = gr.ChatInterface(
173
  value=1024,
174
  step=1,
175
  label="Max Tokens",
176
- info="Maximum length of the translation"
177
  ),
178
  gr.Slider(
179
  minimum=0.1,
@@ -181,7 +199,7 @@ demo = gr.ChatInterface(
181
  value=0.7,
182
  step=0.1,
183
  label="Temperature",
184
- info="Controls randomness (higher = more creative)"
185
  ),
186
  gr.Slider(
187
  minimum=0.1,
@@ -189,7 +207,7 @@ demo = gr.ChatInterface(
189
  value=0.95,
190
  step=0.05,
191
  label="Top-p",
192
- info="Nucleus sampling threshold"
193
  ),
194
  gr.Slider(
195
  minimum=1,
@@ -197,7 +215,7 @@ demo = gr.ChatInterface(
197
  value=40,
198
  step=1,
199
  label="Top-k",
200
- info="Limits vocabulary to top K tokens"
201
  ),
202
  gr.Slider(
203
  minimum=1.0,
@@ -205,22 +223,24 @@ demo = gr.ChatInterface(
205
  value=1.1,
206
  step=0.1,
207
  label="Repetition Penalty",
208
- info="Penalizes repeated words"
209
  ),
210
  ],
211
  theme="Ocean",
212
- submit_btn="Translate",
213
  stop_btn="Stop",
214
- title="Kazakh Translation Model",
215
- description="Translate text between Kazakh, English, and Russian using a specialized language model.",
216
  chatbot=gr.Chatbot(scale=1, show_copy_button=True),
217
  cache_examples=False,
218
  )
219
 
 
 
220
  if __name__ == "__main__":
221
  demo.launch(
222
  share=False,
223
  server_name="0.0.0.0",
224
  server_port=7860,
225
  show_api=False,
226
- )
 
1
  import os
2
+ import json
3
+ import subprocess
4
  import sys
5
  from typing import List, Tuple
6
  from llama_cpp import Llama
 
11
  from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
12
  from huggingface_hub import hf_hub_download
13
  import gradio as gr
14
+ # from logger import logging
15
+ # from exception import CustomExceptionHandling
16
+
17
 
18
  # Load the Environment Variables from .env file
19
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 
28
  local_dir="./models",
29
  )
30
 
31
+
32
  # Define the prompt markers for Gemma 3
33
  gemma_3_prompt_markers = {
34
+ Roles.system: PromptMarkers("<start_of_turn>system\n", "<end_of_turn>\n"), # System prompt should be included within user message
35
  Roles.user: PromptMarkers("<start_of_turn>user\n", "<end_of_turn>\n"),
36
  Roles.assistant: PromptMarkers("<start_of_turn>assistant", ""),
37
+
38
+ Roles.tool: PromptMarkers("", ""), # If you need tool support
39
  }
40
 
41
+ # Create the formatter
42
  gemma_3_formatter = MessagesFormatter(
43
+ pre_prompt="", # No pre-prompt
44
  prompt_markers=gemma_3_prompt_markers,
45
+ include_sys_prompt_in_first_user_message=True, # Include system prompt in first user message
46
  default_stop_sequences=["<end_of_turn>", "<start_of_turn>"],
47
+ strip_prompt=False, # Don't strip whitespace from the prompt
48
+ bos_token="<bos>", # Beginning of sequence token for Gemma 3
49
+ eos_token="<eos>", # End of sequence token for Gemma 3
50
  )
51
 
52
+
53
+ # Set the title and description
54
+ title = "Kazakh Language Model"
55
+ description = """"""
56
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  llm = None
59
  llm_model = None
 
61
  def respond(
62
  message: str,
63
  history: List[Tuple[str, str]],
64
+ model: str = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf", # Set default model
65
+ system_message: str = "",
66
  max_tokens: int = 64,
67
  temperature: float = 0.7,
68
  top_p: float = 0.95,
 
70
  repeat_penalty: float = 1.1,
71
  ):
72
  """
73
+ Respond to a message using the Gemma3 model via Llama.cpp.
 
74
  Args:
75
+ - message (str): The message to respond to.
76
+ - history (List[Tuple[str, str]]): The chat history.
77
+ - model (str): The model to use.
78
+ - system_message (str): The system message to use.
79
+ - max_tokens (int): The maximum number of tokens to generate.
80
+ - temperature (float): The temperature of the model.
81
+ - top_p (float): The top-p of the model.
82
+ - top_k (int): The top-k of the model.
83
+ - repeat_penalty (float): The repetition penalty of the model.
84
+ Returns:
85
+ str: The response to the message.
 
86
  """
87
+ # try:
88
+ # Load the global variables
89
+ global llm
90
+ global llm_model
91
 
92
  # Ensure model is not None
 
93
  if model is None:
94
  model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
95
+
96
+ # Load the model
97
  if llm is None or llm_model != model:
98
+ # Check if model file exists
99
  model_path = f"models/{model}"
100
  if not os.path.exists(model_path):
101
+ yield f"Error: Model file not found at {model_path}. Please check your model path."
102
  return
103
+
104
  llm = Llama(
105
  model_path=f"models/{model}",
106
  flash_attn=False,
 
113
  llm_model = model
114
  provider = LlamaCppPythonProvider(llm)
115
 
116
+ # Create the agent
 
 
 
 
117
  agent = LlamaCppAgent(
118
  provider,
119
+ system_prompt=f"{system_message}",
120
  custom_messages_formatter=gemma_3_formatter,
121
  debug_output=True,
122
  )
123
 
124
+ # Set the settings like temperature, top-k, top-p, max tokens, etc.
125
  settings = provider.get_provider_default_settings()
126
  settings.temperature = temperature
127
  settings.top_k = top_k
 
131
  settings.stream = True
132
 
133
  messages = BasicChatHistory()
 
 
 
 
134
 
135
+ # Add the chat history
136
+ for msn in history:
137
+ user = {"role": Roles.user, "content": msn[0]}
138
+ assistant = {"role": Roles.assistant, "content": msn[1]}
139
+ messages.add_message(user)
140
+ messages.add_message(assistant)
141
 
142
+ # Get the response stream
143
  stream = agent.get_chat_response(
144
+ message,
145
  llm_sampling_settings=settings,
146
  chat_history=messages,
147
  returns_streaming_generator=True,
148
  print_output=False,
149
  )
150
 
151
+ # Log the success
152
+ # logging.info("Response stream generated successfully")
153
+
154
+ # Generate the response
155
  outputs = ""
156
  for output in stream:
157
  outputs += output
158
  yield outputs
159
 
160
+ # # Handle exceptions that may occur during the process
161
+ # except Exception as e:
162
+ # # Custom exception handling
163
+ # raise CustomExceptionHandling(e, sys) from e
164
+
165
+
166
+ # Create a chat interface
167
  demo = gr.ChatInterface(
168
  respond,
169
+ examples=[["Сәлем"], ["Привет"], ["Hello"]],
170
+ additional_inputs_accordion=gr.Accordion(
171
+ label="⚙️ Parameters", open=False, render=False
172
+ ),
173
  additional_inputs=[
174
  gr.Dropdown(
175
+ choices=[
176
+ "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
177
+ ],
178
+ value="gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
179
+ label="Model",
180
+ info="Select the AI model to use for chat",
181
+ ),
182
+ gr.Textbox(
183
+ value="You are a helpful assistant.",
184
+ label="System Prompt",
185
+ info="Define the AI assistant's personality and behavior",
186
+ lines=2,
187
  ),
188
  gr.Slider(
189
  minimum=512,
 
191
  value=1024,
192
  step=1,
193
  label="Max Tokens",
194
+ info="Maximum length of response (higher = longer replies)",
195
  ),
196
  gr.Slider(
197
  minimum=0.1,
 
199
  value=0.7,
200
  step=0.1,
201
  label="Temperature",
202
+ info="Creativity level (higher = more creative, lower = more focused)",
203
  ),
204
  gr.Slider(
205
  minimum=0.1,
 
207
  value=0.95,
208
  step=0.05,
209
  label="Top-p",
210
+ info="Nucleus sampling threshold",
211
  ),
212
  gr.Slider(
213
  minimum=1,
 
215
  value=40,
216
  step=1,
217
  label="Top-k",
218
+ info="Limit vocabulary choices to top K tokens",
219
  ),
220
  gr.Slider(
221
  minimum=1.0,
 
223
  value=1.1,
224
  step=0.1,
225
  label="Repetition Penalty",
226
+ info="Penalize repeated words (higher = less repetition)",
227
  ),
228
  ],
229
  theme="Ocean",
230
+ submit_btn="Send",
231
  stop_btn="Stop",
232
+ title=title,
233
+ description=description,
234
  chatbot=gr.Chatbot(scale=1, show_copy_button=True),
235
  cache_examples=False,
236
  )
237
 
238
+
239
+ # Launch the chat interface
240
  if __name__ == "__main__":
241
  demo.launch(
242
  share=False,
243
  server_name="0.0.0.0",
244
  server_port=7860,
245
  show_api=False,
246
+ )