Spaces:
Sleeping
Sleeping
Swaroop Ingavale
commited on
Commit
·
02ce8d2
1
Parent(s):
e2b56c1
Update
Browse files
app.py
CHANGED
|
@@ -1,272 +1,236 @@
|
|
| 1 |
-
import
|
| 2 |
-
from
|
| 3 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 4 |
-
import numpy as np
|
| 5 |
from groq import Groq
|
| 6 |
-
import
|
| 7 |
-
import
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
""
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
return []
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
for
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
if
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
try:
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
except AttributeError:
|
| 92 |
-
# Fall back to the format in main.py
|
| 93 |
-
summary_content = summary.choices[0].text
|
| 94 |
-
|
| 95 |
-
memory.append({"role": "system", "content": summary_content})
|
| 96 |
-
|
| 97 |
-
def get_chatbot_response(
|
| 98 |
-
message,
|
| 99 |
-
history,
|
| 100 |
-
system_message,
|
| 101 |
-
max_tokens,
|
| 102 |
-
temperature,
|
| 103 |
-
top_p,
|
| 104 |
-
use_memory=True,
|
| 105 |
-
memory_size=50,
|
| 106 |
-
):
|
| 107 |
-
"""
|
| 108 |
-
Generate a response using the chatbot with memory capabilities.
|
| 109 |
-
"""
|
| 110 |
-
if use_memory:
|
| 111 |
-
# Process history to maintain memory
|
| 112 |
-
for i, (user_msg, bot_msg) in enumerate(history):
|
| 113 |
-
if i < len(history) - 1: # Skip the current message which is already in the history
|
| 114 |
-
add_to_memory("user", user_msg)
|
| 115 |
-
if bot_msg: # Check if bot message exists (might be None for the most recent one)
|
| 116 |
-
add_to_memory("assistant", bot_msg)
|
| 117 |
-
|
| 118 |
-
# Construct prompt with relevant memory
|
| 119 |
-
prompt = construct_prompt(memory, message)
|
| 120 |
-
|
| 121 |
-
# Use the prompt with groq client
|
| 122 |
-
completion = client.chat.completions.create(
|
| 123 |
messages=[
|
| 124 |
-
{"role": "system", "content":
|
| 125 |
-
{"role": "user", "content":
|
| 126 |
],
|
| 127 |
-
|
| 128 |
-
temperature=temperature,
|
| 129 |
-
max_tokens=max_tokens,
|
| 130 |
-
top_p=top_p,
|
| 131 |
-
stream=True,
|
| 132 |
)
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
completion = client.chat.completions.create(
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
temperature=
|
| 164 |
-
max_tokens=
|
| 165 |
-
top_p=
|
| 166 |
-
stream=
|
|
|
|
| 167 |
)
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
padding: 10px 15px !important;
|
| 201 |
-
}
|
| 202 |
-
|
| 203 |
-
.bot-message {
|
| 204 |
-
background-color: #f1f8e9 !important;
|
| 205 |
-
border-radius: 15px !important;
|
| 206 |
-
padding: 10px 15px !important;
|
| 207 |
-
}
|
| 208 |
-
"""
|
| 209 |
-
|
| 210 |
-
# Create the Gradio interface
|
| 211 |
-
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
|
| 212 |
-
# Header
|
| 213 |
-
with gr.Row(elem_classes="header-row"):
|
| 214 |
-
gr.Markdown("""
|
| 215 |
-
<div style="text-align: center; margin-bottom: 10px; padding: 10px; background-color: #f0f4f8; border-radius: 8px;">
|
| 216 |
-
<h1 style="margin: 0; color: #2c3e50;">AI Chatbot With Memory</h1>
|
| 217 |
-
<h3 style="margin: 5px 0 0 0; color: #34495e;">Developed by Dhiraj and Swaroop</h3>
|
| 218 |
-
</div>
|
| 219 |
-
""")
|
| 220 |
-
|
| 221 |
-
with gr.Row():
|
| 222 |
-
with gr.Column(scale=3):
|
| 223 |
-
# Create ChatInterface without css_classes parameter
|
| 224 |
-
chatbot = gr.ChatInterface(
|
| 225 |
-
get_chatbot_response,
|
| 226 |
-
additional_inputs=[
|
| 227 |
-
gr.Textbox(value="You are a helpful assistant with memory capabilities.", label="System message"),
|
| 228 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 229 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 230 |
-
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|
| 231 |
-
gr.Checkbox(value=True, label="Use Memory", info="Enable or disable memory capabilities"),
|
| 232 |
-
gr.Slider(minimum=10, maximum=200, value=50, step=10, label="Memory Size", info="Maximum number of entries in memory"),
|
| 233 |
-
],
|
| 234 |
-
examples=[
|
| 235 |
-
["Tell me about machine learning"],
|
| 236 |
-
["What are the best practices for data preprocessing?"],
|
| 237 |
-
["Can you explain neural networks?"],
|
| 238 |
-
],
|
| 239 |
-
title="Chat with AI Assistant",
|
| 240 |
-
# Removed css_classes parameter
|
| 241 |
-
)
|
| 242 |
-
|
| 243 |
-
with gr.Column(scale=1):
|
| 244 |
-
with gr.Group():
|
| 245 |
-
gr.Markdown("## Memory Management")
|
| 246 |
-
memory_display = gr.Textbox(label="Memory Contents", lines=20, max_lines=30, interactive=False)
|
| 247 |
-
view_memory_btn = gr.Button("View Memory Contents")
|
| 248 |
-
clear_memory_btn = gr.Button("Clear Memory")
|
| 249 |
-
summarize_memory_btn = gr.Button("Summarize Memory")
|
| 250 |
-
memory_status = gr.Textbox(label="Memory Status", lines=2, interactive=False)
|
| 251 |
-
|
| 252 |
-
# Set up button actions
|
| 253 |
-
view_memory_btn.click(view_memory, inputs=[], outputs=[memory_display])
|
| 254 |
-
clear_memory_btn.click(clear_memory_action, inputs=[], outputs=[memory_status])
|
| 255 |
-
summarize_memory_btn.click(
|
| 256 |
-
lambda: (summarize_memory(), "Memory summarized successfully."),
|
| 257 |
-
inputs=[],
|
| 258 |
-
outputs=[memory_status]
|
| 259 |
-
)
|
| 260 |
-
|
| 261 |
-
# Footer
|
| 262 |
-
with gr.Row(elem_classes="footer-row"):
|
| 263 |
-
gr.Markdown(f"""
|
| 264 |
-
<div style="text-align: center; margin-top: 20px; padding: 10px; background-color: #f0f4f8; border-radius: 8px;">
|
| 265 |
-
<p style="margin: 0; color: #2c3e50;">
|
| 266 |
-
Developed by Dhiraj and Swaroop | © {datetime.datetime.now().year} | Version 1.0
|
| 267 |
-
</p>
|
| 268 |
-
</div>
|
| 269 |
-
""")
|
| 270 |
-
|
| 271 |
-
if __name__ == "__main__":
|
| 272 |
-
demo.launch()
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from flask import Flask, render_template, request, jsonify, session
|
| 3 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
| 4 |
from groq import Groq
|
| 5 |
+
import numpy as np
|
| 6 |
+
import logging
|
| 7 |
+
from transformers import AutoTokenizer, AutoModel # Keep these
|
| 8 |
+
import torch
|
| 9 |
+
import torch.nn.functional as F
|
| 10 |
+
|
| 11 |
+
# Configure logging
|
| 12 |
+
logging.basicConfig(level=logging.INFO)
|
| 13 |
+
|
| 14 |
+
# --- Flask App Setup --- (MUST come before routes or app-dependent code) ---
|
| 15 |
+
app = Flask(__name__)
|
| 16 |
+
app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'a_default_secret_key_please_change')
|
| 17 |
+
|
| 18 |
+
# --- Initialize Models ---
|
| 19 |
+
device = torch.device("cpu") # Force CPU for free tier
|
| 20 |
+
if torch.cuda.is_available():
|
| 21 |
+
device = torch.device("cuda") # Should not happen on free tier
|
| 22 |
+
logging.info(f"Using device: {device}")
|
| 23 |
+
|
| 24 |
+
tokenizer = None
|
| 25 |
+
model = None
|
| 26 |
+
client = None
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
# Load tokenizer and model from HuggingFace Hub using transformers
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
| 31 |
+
# Re-add from_tf=True here for AutoModel.from_pretrained
|
| 32 |
+
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2', from_tf=True).to(device)
|
| 33 |
+
logging.info("Tokenizer and AutoModel loaded successfully with from_tf=True.")
|
| 34 |
+
except Exception as e:
|
| 35 |
+
logging.error(f"Error loading Transformer models: {e}")
|
| 36 |
+
tokenizer = None
|
| 37 |
+
model = None
|
| 38 |
+
|
| 39 |
+
# Initialize the Groq client
|
| 40 |
+
groq_api_key = os.environ.get("GROQ_API_KEY")
|
| 41 |
+
if not groq_api_key:
|
| 42 |
+
logging.error("GROQ_API_KEY environment variable not set.")
|
| 43 |
+
client = None
|
| 44 |
+
else:
|
| 45 |
+
client = Groq(api_key=groq_api_key)
|
| 46 |
+
logging.info("Groq client initialized.")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# --- Helper function for Mean Pooling ---
|
| 50 |
+
def mean_pooling(model_output, attention_mask):
|
| 51 |
+
token_embeddings = model_output[0]
|
| 52 |
+
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float().to(token_embeddings.device)
|
| 53 |
+
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
| 54 |
+
|
| 55 |
+
# --- Function to get embedding ---
|
| 56 |
+
def get_embedding(text):
|
| 57 |
+
if tokenizer is None or model is None:
|
| 58 |
+
logging.error("Embedding models not loaded. Cannot generate embedding.")
|
| 59 |
+
return None
|
| 60 |
+
try:
|
| 61 |
+
encoded_input = tokenizer(text, padding=True, truncation=True, return_tensors='pt').to(device)
|
| 62 |
+
with torch.no_grad():
|
| 63 |
+
model_output = model(**encoded_input)
|
| 64 |
+
sentence_embedding = mean_pooling(model_output, encoded_input['attention_mask'])
|
| 65 |
+
sentence_embedding = F.normalize(sentence_embedding, p=2, dim=1)
|
| 66 |
+
return sentence_embedding.cpu().numpy()[0]
|
| 67 |
+
except Exception as e:
|
| 68 |
+
logging.error(f"Error generating embedding: {e}")
|
| 69 |
+
return None
|
| 70 |
+
|
| 71 |
+
# --- Memory Management Functions (rely on get_embedding) ---
|
| 72 |
+
# ... (add_to_memory, retrieve_relevant_memory, construct_prompt, trim_memory, summarize_memory - these remain the same, calling get_embedding) ...
|
| 73 |
+
|
| 74 |
+
def add_to_memory(mem_list, role, content):
|
| 75 |
+
if not content or not content.strip():
|
| 76 |
+
logging.warning(f"Attempted to add empty content to memory for role: {role}")
|
| 77 |
+
return mem_list
|
| 78 |
+
embedding = get_embedding(content)
|
| 79 |
+
if embedding is not None:
|
| 80 |
+
mem_list.append({"role": role, "content": content, "embedding": embedding.tolist()})
|
| 81 |
+
else:
|
| 82 |
+
logging.warning(f"Failed to get embedding for message: {content[:50]}...")
|
| 83 |
+
mem_list.append({"role": role, "content": content, "embedding": None})
|
| 84 |
+
return mem_list
|
| 85 |
+
|
| 86 |
+
def retrieve_relevant_memory(mem_list, user_input, top_k=5):
|
| 87 |
+
if not mem_list or tokenizer is None or model is None:
|
| 88 |
+
return []
|
| 89 |
+
user_embedding = get_embedding(user_input)
|
| 90 |
+
if user_embedding is None:
|
| 91 |
+
logging.error("Failed to get user input embedding for retrieval.")
|
| 92 |
return []
|
| 93 |
|
| 94 |
+
valid_memory_items = []
|
| 95 |
+
memory_embeddings_np = []
|
| 96 |
+
for m in mem_list:
|
| 97 |
+
if m.get("embedding") is not None and isinstance(m["embedding"], list):
|
| 98 |
+
try:
|
| 99 |
+
np_embedding = np.array(m["embedding"])
|
| 100 |
+
if np_embedding.shape == (model.config.hidden_size,): # Use model config for dimension
|
| 101 |
+
valid_memory_items.append(m)
|
| 102 |
+
memory_embeddings_np.append(np_embedding)
|
| 103 |
+
else:
|
| 104 |
+
logging.warning(f"Embedding dimension mismatch for memory entry: {m['content'][:50]}...")
|
| 105 |
+
except Exception as conv_e:
|
| 106 |
+
logging.warning(f"Could not convert embedding for memory entry: {m['content'][:50]}... Error: {conv_e}")
|
| 107 |
+
pass
|
| 108 |
+
|
| 109 |
+
if not valid_memory_items:
|
| 110 |
+
return []
|
| 111 |
+
similarities = cosine_similarity([user_embedding], np.array(memory_embeddings_np))[0]
|
| 112 |
+
relevant_messages_sorted = sorted(zip(similarities, valid_memory_items), key=lambda x: x[0], reverse=True)
|
| 113 |
+
return [m[1] for m in relevant_messages_sorted[:top_k]]
|
| 114 |
+
|
| 115 |
+
def construct_prompt(mem_list, user_input, max_tokens_in_prompt=1000):
|
| 116 |
+
relevant_memory_items = retrieve_relevant_memory(mem_list, user_input)
|
| 117 |
+
relevant_content_set = {m["content"] for m in relevant_memory_items if "content" in m}
|
| 118 |
+
|
| 119 |
+
messages_for_api = []
|
| 120 |
+
messages_for_api.append({"role": "system", "content": "You are a helpful and friendly AI assistant."})
|
| 121 |
+
current_prompt_tokens = len(messages_for_api[0]["content"].split())
|
| 122 |
+
|
| 123 |
+
context_messages = []
|
| 124 |
+
for msg in mem_list:
|
| 125 |
+
if "content" in msg and msg["content"] in relevant_content_set and msg["role"] in ["user", "assistant", "system"]:
|
| 126 |
+
msg_text = f'{msg["role"]}: {msg["content"]}\n'
|
| 127 |
+
msg_tokens = len(msg_text.split())
|
| 128 |
+
if current_prompt_tokens + msg_tokens > max_tokens_in_prompt:
|
| 129 |
+
break
|
| 130 |
+
context_messages.append({"role": msg["role"], "content": msg["content"]})
|
| 131 |
+
current_prompt_tokens += msg_tokens
|
| 132 |
+
|
| 133 |
+
messages_for_api.extend(context_messages)
|
| 134 |
+
user_input_tokens = len(user_input.split())
|
| 135 |
+
if current_prompt_tokens + user_input_tokens > max_tokens_in_prompt and len(messages_for_api) > 1:
|
| 136 |
+
logging.warning(f"User input exceeds max_tokens_in_prompt with existing context. Context may be truncated.")
|
| 137 |
+
messages_for_api.append({"role": "user", "content": user_input})
|
| 138 |
+
return messages_for_api
|
| 139 |
+
|
| 140 |
+
def trim_memory(mem_list, max_size=50):
|
| 141 |
+
while len(mem_list) > max_size:
|
| 142 |
+
mem_list.pop(0)
|
| 143 |
+
return mem_list
|
| 144 |
+
|
| 145 |
+
def summarize_memory(mem_list):
|
| 146 |
+
if not mem_list or client is None:
|
| 147 |
+
logging.warning("Memory is empty or Groq client not initialized. Cannot summarize.")
|
| 148 |
+
return []
|
| 149 |
+
long_term_memory = " ".join([m["content"] for m in mem_list if "content" in m])
|
| 150 |
+
if not long_term_memory.strip():
|
| 151 |
+
logging.warning("Memory content is empty. Cannot summarize.")
|
| 152 |
+
return []
|
| 153 |
try:
|
| 154 |
+
summary_completion = client.chat.completions.create(
|
| 155 |
+
model="llama-3.1-8b-instruct-fpt",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
messages=[
|
| 157 |
+
{"role": "system", "content": "Summarize the following conversation for key points. Keep it concise."},
|
| 158 |
+
{"role": "user", "content": long_term_memory},
|
| 159 |
],
|
| 160 |
+
max_tokens= 500,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
)
|
| 162 |
+
summary_text = summary_completion.choices[0].message.content
|
| 163 |
+
logging.info("Memory summarized.")
|
| 164 |
+
return [{"role": "system", "content": f"Previous conversation summary: {summary_text}"}]
|
| 165 |
+
except Exception as e:
|
| 166 |
+
logging.error(f"Error summarizing memory: {e}")
|
| 167 |
+
return mem_list
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
# --- Flask Routes --- (MUST come AFTER app is defined) ---
|
| 171 |
+
|
| 172 |
+
@app.route('/')
|
| 173 |
+
def index():
|
| 174 |
+
if 'chat_memory' not in session:
|
| 175 |
+
session['chat_memory'] = []
|
| 176 |
+
return render_template('index.html')
|
| 177 |
+
|
| 178 |
+
@app.route('/chat', methods=['POST'])
|
| 179 |
+
def chat():
|
| 180 |
+
# Check if Groq client AND embedding models are initialized
|
| 181 |
+
if client is None or tokenizer is None or model is None:
|
| 182 |
+
status_code = 500
|
| 183 |
+
error_message = "Chatbot backend is not fully initialized (API key or embedding models missing)."
|
| 184 |
+
logging.error(error_message)
|
| 185 |
+
return jsonify({"response": error_message}), status_code
|
| 186 |
+
|
| 187 |
+
user_input = request.json.get('message')
|
| 188 |
+
if not user_input or not user_input.strip():
|
| 189 |
+
return jsonify({"response": "Please enter a message."}), 400
|
| 190 |
+
|
| 191 |
+
current_memory_serializable = session.get('chat_memory', [])
|
| 192 |
+
|
| 193 |
+
messages_for_api = construct_prompt(current_memory_serializable, user_input)
|
| 194 |
+
|
| 195 |
+
try:
|
| 196 |
completion = client.chat.completions.create(
|
| 197 |
+
model="llama-3.1-8b-instruct-fpt",
|
| 198 |
+
messages=messages_for_api,
|
| 199 |
+
temperature=0.6,
|
| 200 |
+
max_tokens=1024,
|
| 201 |
+
top_p=0.95,
|
| 202 |
+
stream=False,
|
| 203 |
+
stop=None,
|
| 204 |
)
|
| 205 |
+
ai_response_content = completion.choices[0].message.content
|
| 206 |
+
|
| 207 |
+
except Exception as e:
|
| 208 |
+
logging.error(f"Error calling Groq API: {e}")
|
| 209 |
+
ai_response_content = "Sorry, I encountered an error when trying to respond. Please try again later."
|
| 210 |
+
|
| 211 |
+
current_memory_serializable = add_to_memory(current_memory_serializable, "user", user_input)
|
| 212 |
+
current_memory_serializable = add_to_memory(current_memory_serialable, "assistant", ai_response_content)
|
| 213 |
+
|
| 214 |
+
current_memory_serializable = trim_memory(current_memory_serializable, max_size=20)
|
| 215 |
+
|
| 216 |
+
session['chat_memory'] = current_memory_serializable
|
| 217 |
+
|
| 218 |
+
return jsonify({"response": ai_response_content})
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
@app.route('/clear_memory', methods=['POST'])
|
| 222 |
+
def clear_memory():
|
| 223 |
+
session['chat_memory'] = []
|
| 224 |
+
logging.info("Chat memory cleared.")
|
| 225 |
+
return jsonify({"status": "Memory cleared."})
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
# --- Running the App ---
|
| 229 |
+
if __name__ == '__main__':
|
| 230 |
+
# Using Uvicorn instead of Waitress
|
| 231 |
+
logging.info("Starting Uvicorn server...")
|
| 232 |
+
port = int(os.environ.get('PORT', 7860))
|
| 233 |
+
# Use uvicorn.run to start the Flask app (which is a WSGI app)
|
| 234 |
+
# It automatically detects it's a WSGI app
|
| 235 |
+
import uvicorn
|
| 236 |
+
uvicorn.run(app, host="0.0.0.0", port=port)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|