Spaces:

swaroop77
/

ai-chatbot-with-memory

Build error

Swaroop Ingavale

app

5f2f289 10 months ago

9.56 kB

	import gradio as gr
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np
	from groq import Groq
	import os
	import datetime

	client = Groq(
	api_key=os.environ.get("GROQ_API_KEY"),
	)

	# Initialize sentence transformer model
	embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

	# Global memory buffer with embeddings
	memory = []

	def add_to_memory(role, content):
	"""
	Add a message to memory along with its embedding.
	"""
	embedding = embedding_model.encode(content, convert_to_numpy=True)
	memory.append({"role": role, "content": content, "embedding": embedding})

	def retrieve_relevant_memory(user_input, top_k=5):
	"""
	Retrieve the top-k most relevant messages from memory based on cosine similarity.
	"""
	if not memory:
	return []

	# Compute the embedding of the user input
	user_embedding = embedding_model.encode(user_input, convert_to_numpy=True)

	# Calculate similarities
	similarities = [cosine_similarity([user_embedding], [m["embedding"]])[0][0] for m in memory]

	# Sort memory by similarity and return the top-k messages
	relevant_messages = sorted(zip(similarities, memory), key=lambda x: x[0], reverse=True)
	return [m[1] for m in relevant_messages[:top_k]]

	def construct_prompt(memory, user_input, max_tokens=500):
	"""
	Construct the prompt by combining relevant memory and the current user input.
	"""
	relevant_memory = retrieve_relevant_memory(user_input)

	# Combine relevant memory into the prompt
	prompt = ""
	token_count = 0
	for message in relevant_memory:
	message_text = f'{message["role"]}: {message["content"]}\n'
	token_count += len(message_text.split())
	if token_count > max_tokens:
	break
	prompt += message_text

	# Add the user input at the end
	prompt += f'user: {user_input}\n'
	return prompt

	def trim_memory(max_size=50):
	"""
	Trim the memory to keep it within the specified max size.
	"""
	if len(memory) > max_size:
	memory.pop(0) # Remove the oldest entry

	def summarize_memory():
	"""
	Summarize the memory buffer to free up space.
	"""
	if not memory:
	return

	long_term_memory = " ".join([m["content"] for m in memory])
	summary = client.chat.completions.create(
	messages=[
	{"role": "system", "content": "Summarize the following text for key points."},
	{"role": "user", "content": long_term_memory},
	],
	model="meta-llama/llama-4-scout-17b-16e-instruct",
	max_tokens=4096,
	)
	memory.clear()
	# Match the access pattern from main.py if needed
	try:
	# Try the format in app.py first
	summary_content = summary.choices[0].message.content
	except AttributeError:
	# Fall back to the format in main.py
	summary_content = summary.choices[0].text

	memory.append({"role": "system", "content": summary_content})

	def get_chatbot_response(
	message,
	history,
	system_message,
	max_tokens,
	temperature,
	top_p,
	use_memory=True,
	memory_size=50,
	):
	"""
	Generate a response using the chatbot with memory capabilities.
	"""
	if use_memory:
	# Process history to maintain memory
	for i, (user_msg, bot_msg) in enumerate(history):
	if i < len(history) - 1: # Skip the current message which is already in the history
	add_to_memory("user", user_msg)
	if bot_msg: # Check if bot message exists (might be None for the most recent one)
	add_to_memory("assistant", bot_msg)

	# Construct prompt with relevant memory
	prompt = construct_prompt(memory, message)

	# Use the prompt with groq client
	completion = client.chat.completions.create(
	messages=[
	{"role": "system", "content": system_message},
	{"role": "user", "content": prompt}
	],
	model="deepseek-r1-distill-llama-70b",
	temperature=temperature,
	max_tokens=max_tokens,
	top_p=top_p,
	stream=True,
	)

	# Stream the response
	response = ""
	for chunk in completion:
	response_part = chunk.choices[0].delta.content or ""
	response += response_part
	yield response

	# Update memory with the current message and response
	add_to_memory("user", message)
	add_to_memory("assistant", response)

	# Trim memory if needed
	trim_memory(max_size=memory_size)

	else:
	# If not using memory, just use regular chat completion
	messages = [{"role": "system", "content": system_message}]

	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	messages.append({"role": "user", "content": message})

	completion = client.chat.completions.create(
	messages=messages,
	model="deepseek-r1-distill-llama-70b",
	temperature=temperature,
	max_tokens=max_tokens,
	top_p=top_p,
	stream=True,
	)

	response = ""
	for chunk in completion:
	response_part = chunk.choices[0].delta.content or ""
	response += response_part
	yield response

	def view_memory():
	"""
	Create a formatted string showing the current memory contents.
	"""
	if not memory:
	return "Memory is empty."

	memory_view = "Current Memory Contents:\n\n"
	for i, m in enumerate(memory):
	memory_view += f"Memory {i+1}: {m['role']}: {m['content']}\n\n"

	return memory_view

	def clear_memory_action():
	"""
	Clear the memory buffer.
	"""
	memory.clear()
	return "Memory has been cleared."

	# Custom CSS for the chat interface - apply using elem_classes
	custom_css = """
	.user-message {
	background-color: #e3f2fd !important;
	border-radius: 15px !important;
	padding: 10px 15px !important;
	}

	.bot-message {
	background-color: #f1f8e9 !important;
	border-radius: 15px !important;
	padding: 10px 15px !important;
	}
	"""

	# Create the Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
	# Header
	with gr.Row(elem_classes="header-row"):
	gr.Markdown("""
	<div style="text-align: center; margin-bottom: 10px; padding: 10px; background-color: #f0f4f8; border-radius: 8px;">
	<h1 style="margin: 0; color: #2c3e50;">AI Chatbot With Memory</h1>
	<h3 style="margin: 5px 0 0 0; color: #34495e;">Developed by Dhiraj and Swaroop</h3>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=3):
	# Create ChatInterface without css_classes parameter
	chatbot = gr.ChatInterface(
	get_chatbot_response,
	additional_inputs=[
	gr.Textbox(value="You are a helpful assistant with memory capabilities.", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
	gr.Checkbox(value=True, label="Use Memory", info="Enable or disable memory capabilities"),
	gr.Slider(minimum=10, maximum=200, value=50, step=10, label="Memory Size", info="Maximum number of entries in memory"),
	],
	examples=[
	["Tell me about machine learning"],
	["What are the best practices for data preprocessing?"],
	["Can you explain neural networks?"],
	],
	title="Chat with AI Assistant",
	# Removed css_classes parameter
	)

	with gr.Column(scale=1):
	with gr.Group():
	gr.Markdown("## Memory Management")
	memory_display = gr.Textbox(label="Memory Contents", lines=20, max_lines=30, interactive=False)
	view_memory_btn = gr.Button("View Memory Contents")
	clear_memory_btn = gr.Button("Clear Memory")
	summarize_memory_btn = gr.Button("Summarize Memory")
	memory_status = gr.Textbox(label="Memory Status", lines=2, interactive=False)

	# Set up button actions
	view_memory_btn.click(view_memory, inputs=[], outputs=[memory_display])
	clear_memory_btn.click(clear_memory_action, inputs=[], outputs=[memory_status])
	summarize_memory_btn.click(
	lambda: (summarize_memory(), "Memory summarized successfully."),
	inputs=[],
	outputs=[memory_status]
	)

	# Footer
	with gr.Row(elem_classes="footer-row"):
	gr.Markdown(f"""
	<div style="text-align: center; margin-top: 20px; padding: 10px; background-color: #f0f4f8; border-radius: 8px;">
	<p style="margin: 0; color: #2c3e50;">
	Developed by Dhiraj and Swaroop \| © {datetime.datetime.now().year} \| Version 1.0
	</p>
	</div>
	""")

	if __name__ == "__main__":
	demo.launch()