ItsMeDevRoland commited on
Commit
56b07de
·
verified ·
1 Parent(s): 761a03d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -29
app.py CHANGED
@@ -45,7 +45,7 @@ def download_and_load_model(
45
  filename (str): Specific GGUF model filename
46
 
47
  Returns:
48
- str: Path to downloaded model
49
  """
50
  try:
51
  # Try to import llama-cpp directly to ensure it's available
@@ -96,27 +96,9 @@ except Exception as e:
96
  print(f"Fatal error initializing model: {e}")
97
  sys.exit(1)
98
 
99
- def format_history(history):
100
- """
101
- Format chat history into a list of messages
102
-
103
- Args:
104
- history (list): Chat history of (user, assistant) tuples
105
-
106
- Returns:
107
- list: Formatted messages for model input
108
- """
109
- messages = []
110
- for user, assistant in history:
111
- if user:
112
- messages.append({"role": "user", "content": user})
113
- if assistant:
114
- messages.append({"role": "assistant", "content": assistant})
115
- return messages
116
-
117
  def respond(
118
  message,
119
- history: list[tuple[str, str]],
120
  system_message="You are a friendly Chatbot.",
121
  max_tokens=512,
122
  temperature=0.7,
@@ -133,8 +115,8 @@ def respond(
133
  temperature (float): Sampling temperature
134
  top_p (float): Nucleus sampling probability threshold
135
 
136
- Yields:
137
- str: Streaming response
138
  """
139
  # Prepare the full prompt with system message and history
140
  full_prompt = system_message + "\n\n"
@@ -150,9 +132,9 @@ def respond(
150
  full_prompt += f"User: {message}\n"
151
  full_prompt += "Assistant: "
152
 
153
- # Generate response with streaming
154
- response = ""
155
  try:
 
156
  for chunk in llm_model.generate(
157
  full_prompt,
158
  max_tokens=max_tokens,
@@ -162,10 +144,11 @@ def respond(
162
  stream=True
163
  ):
164
  response += chunk
165
- yield response
 
166
  except Exception as e:
167
  print(f"Error generating response: {e}")
168
- yield f"An error occurred: {e}"
169
 
170
  # Create Gradio interface with updated configuration
171
  demo = gr.ChatInterface(
@@ -181,9 +164,7 @@ demo = gr.ChatInterface(
181
  step=0.05,
182
  label="Top-p (nucleus sampling)",
183
  ),
184
- ],
185
- # Explicitly set chatbot type to messages
186
- chatbot=gr.Chatbot(type="messages")
187
  )
188
 
189
  if __name__ == "__main__":
 
45
  filename (str): Specific GGUF model filename
46
 
47
  Returns:
48
+ tuple: Loaded model and model path
49
  """
50
  try:
51
  # Try to import llama-cpp directly to ensure it's available
 
96
  print(f"Fatal error initializing model: {e}")
97
  sys.exit(1)
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  def respond(
100
  message,
101
+ history,
102
  system_message="You are a friendly Chatbot.",
103
  max_tokens=512,
104
  temperature=0.7,
 
115
  temperature (float): Sampling temperature
116
  top_p (float): Nucleus sampling probability threshold
117
 
118
+ Returns:
119
+ str: Generated response
120
  """
121
  # Prepare the full prompt with system message and history
122
  full_prompt = system_message + "\n\n"
 
132
  full_prompt += f"User: {message}\n"
133
  full_prompt += "Assistant: "
134
 
135
+ # Generate response
 
136
  try:
137
+ response = ""
138
  for chunk in llm_model.generate(
139
  full_prompt,
140
  max_tokens=max_tokens,
 
144
  stream=True
145
  ):
146
  response += chunk
147
+
148
+ return response
149
  except Exception as e:
150
  print(f"Error generating response: {e}")
151
+ return f"An error occurred: {e}"
152
 
153
  # Create Gradio interface with updated configuration
154
  demo = gr.ChatInterface(
 
164
  step=0.05,
165
  label="Top-p (nucleus sampling)",
166
  ),
167
+ ]
 
 
168
  )
169
 
170
  if __name__ == "__main__":