Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,13 +2,11 @@ import gradio as gr
|
|
| 2 |
from huggingface_hub import InferenceClient
|
| 3 |
from transformers import AutoTokenizer # Import the tokenizer
|
| 4 |
|
| 5 |
-
# Import the tokenizer
|
| 6 |
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
| 7 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
| 8 |
-
|
| 9 |
# Define a maximum context length (tokens). Check your model's documentation!
|
| 10 |
MAX_CONTEXT_LENGTH = 4096 # Example: Adjust this based on your model!
|
| 11 |
-
|
| 12 |
default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
|
| 13 |
1. **Goal of the Conversation**
|
| 14 |
- Translate the user’s story or judgments into feelings and needs.
|
|
@@ -74,13 +72,9 @@ default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Com
|
|
| 74 |
- “I sense some frustration. Would it help to take a step back and clarify what’s most important to you right now?”
|
| 75 |
13. **Ending the Conversation**
|
| 76 |
- If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
|
| 77 |
-
- “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>"""
|
| 78 |
-
|
| 79 |
-
def count_tokens(text: str) -> int:
|
| 80 |
"""Counts the number of tokens in a given string."""
|
| 81 |
-
return len(tokenizer.encode(text))
|
| 82 |
-
|
| 83 |
-
def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
|
| 84 |
"""Truncates the conversation history to fit within the maximum token limit.
|
| 85 |
|
| 86 |
Args:
|
|
@@ -94,22 +88,17 @@ def truncate_history(history: list[tuple[str, str]], system_message: str, max_le
|
|
| 94 |
truncated_history = []
|
| 95 |
system_message_tokens = count_tokens(system_message)
|
| 96 |
current_length = system_message_tokens
|
| 97 |
-
|
| 98 |
# Iterate backwards through the history (newest to oldest)
|
| 99 |
for user_msg, assistant_msg in reversed(history):
|
| 100 |
user_tokens = count_tokens(user_msg) if user_msg else 0
|
| 101 |
assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
|
| 102 |
turn_tokens = user_tokens + assistant_tokens
|
| 103 |
-
|
| 104 |
if current_length + turn_tokens <= max_length:
|
| 105 |
truncated_history.insert(0, (user_msg, assistant_msg)) # Add to the beginning
|
| 106 |
current_length += turn_tokens
|
| 107 |
else:
|
| 108 |
break # Stop adding turns if we exceed the limit
|
| 109 |
-
|
| 110 |
-
return truncated_history
|
| 111 |
-
|
| 112 |
-
def respond(
|
| 113 |
message,
|
| 114 |
history: list[tuple[str, str]],
|
| 115 |
system_message, # System message is now an argument
|
|
@@ -118,7 +107,6 @@ def respond(
|
|
| 118 |
top_p,
|
| 119 |
):
|
| 120 |
"""Responds to a user message, maintaining conversation history, using special tokens and message list."""
|
| 121 |
-
|
| 122 |
if message.lower() == "clear memory": # Check for the clear memory command
|
| 123 |
return "", [] # Return empty message and empty history to reset the chat
|
| 124 |
|
|
@@ -128,11 +116,10 @@ def respond(
|
|
| 128 |
messages = [{"role": "system", "content": formatted_system_message}] # Start with system message as before
|
| 129 |
for user_msg, assistant_msg in truncated_history:
|
| 130 |
if user_msg:
|
| 131 |
-
messages.append({"role": "user", "content":
|
| 132 |
if assistant_msg:
|
| 133 |
-
messages.append({"role": "assistant", "content":
|
| 134 |
-
|
| 135 |
-
messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"}) # Format current user message
|
| 136 |
|
| 137 |
response = ""
|
| 138 |
try:
|
|
@@ -145,11 +132,15 @@ def respond(
|
|
| 145 |
):
|
| 146 |
token = chunk.choices[0].delta.content
|
| 147 |
response += token
|
| 148 |
-
yield
|
|
|
|
|
|
|
|
|
|
| 149 |
except Exception as e:
|
| 150 |
print(f"An error occurred: {e}") # It's a good practice add a try-except block
|
| 151 |
yield "I'm sorry, I encountered an error. Please try again."
|
| 152 |
|
|
|
|
| 153 |
# --- Gradio Interface ---
|
| 154 |
demo = gr.ChatInterface(
|
| 155 |
respond,
|
|
|
|
| 2 |
from huggingface_hub import InferenceClient
|
| 3 |
from transformers import AutoTokenizer # Import the tokenizer
|
| 4 |
|
| 5 |
+
# Import the tokenizer - No need to import twice, remove the second import
|
| 6 |
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
| 7 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
|
|
| 8 |
# Define a maximum context length (tokens). Check your model's documentation!
|
| 9 |
MAX_CONTEXT_LENGTH = 4096 # Example: Adjust this based on your model!
|
|
|
|
| 10 |
default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
|
| 11 |
1. **Goal of the Conversation**
|
| 12 |
- Translate the user’s story or judgments into feelings and needs.
|
|
|
|
| 72 |
- “I sense some frustration. Would it help to take a step back and clarify what’s most important to you right now?”
|
| 73 |
13. **Ending the Conversation**
|
| 74 |
- If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
|
| 75 |
+
- “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>"""def count_tokens(text: str) -> int:
|
|
|
|
|
|
|
| 76 |
"""Counts the number of tokens in a given string."""
|
| 77 |
+
return len(tokenizer.encode(text))def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
|
|
|
|
|
|
|
| 78 |
"""Truncates the conversation history to fit within the maximum token limit.
|
| 79 |
|
| 80 |
Args:
|
|
|
|
| 88 |
truncated_history = []
|
| 89 |
system_message_tokens = count_tokens(system_message)
|
| 90 |
current_length = system_message_tokens
|
|
|
|
| 91 |
# Iterate backwards through the history (newest to oldest)
|
| 92 |
for user_msg, assistant_msg in reversed(history):
|
| 93 |
user_tokens = count_tokens(user_msg) if user_msg else 0
|
| 94 |
assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
|
| 95 |
turn_tokens = user_tokens + assistant_tokens
|
|
|
|
| 96 |
if current_length + turn_tokens <= max_length:
|
| 97 |
truncated_history.insert(0, (user_msg, assistant_msg)) # Add to the beginning
|
| 98 |
current_length += turn_tokens
|
| 99 |
else:
|
| 100 |
break # Stop adding turns if we exceed the limit
|
| 101 |
+
return truncated_historydef respond(
|
|
|
|
|
|
|
|
|
|
| 102 |
message,
|
| 103 |
history: list[tuple[str, str]],
|
| 104 |
system_message, # System message is now an argument
|
|
|
|
| 107 |
top_p,
|
| 108 |
):
|
| 109 |
"""Responds to a user message, maintaining conversation history, using special tokens and message list."""
|
|
|
|
| 110 |
if message.lower() == "clear memory": # Check for the clear memory command
|
| 111 |
return "", [] # Return empty message and empty history to reset the chat
|
| 112 |
|
|
|
|
| 116 |
messages = [{"role": "system", "content": formatted_system_message}] # Start with system message as before
|
| 117 |
for user_msg, assistant_msg in truncated_history:
|
| 118 |
if user_msg:
|
| 119 |
+
messages.append({"role": "user", "content": user_msg}) # Format history user message - Removed extra tags
|
| 120 |
if assistant_msg:
|
| 121 |
+
messages.append({"role": "assistant", "content": assistant_msg}) # Format history assistant message - Removed extra tags
|
| 122 |
+
messages.append({"role": "user", "content": message}) # Format current user message - Removed extra tags
|
|
|
|
| 123 |
|
| 124 |
response = ""
|
| 125 |
try:
|
|
|
|
| 132 |
):
|
| 133 |
token = chunk.choices[0].delta.content
|
| 134 |
response += token
|
| 135 |
+
# Post-processing to remove prefixes (example - add to your existing yield) - Solution 3 (Fallback)
|
| 136 |
+
processed_response = response.replace("User:", "").replace("Assistant:", "").replace("Roos:", "").lstrip()
|
| 137 |
+
yield processed_response
|
| 138 |
+
|
| 139 |
except Exception as e:
|
| 140 |
print(f"An error occurred: {e}") # It's a good practice add a try-except block
|
| 141 |
yield "I'm sorry, I encountered an error. Please try again."
|
| 142 |
|
| 143 |
+
|
| 144 |
# --- Gradio Interface ---
|
| 145 |
demo = gr.ChatInterface(
|
| 146 |
respond,
|