Swaroop Ingavale
app
5f2f289
import gradio as gr
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from groq import Groq
import os
import datetime
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"),
)
# Initialize sentence transformer model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Global memory buffer with embeddings
memory = []
def add_to_memory(role, content):
"""
Add a message to memory along with its embedding.
"""
embedding = embedding_model.encode(content, convert_to_numpy=True)
memory.append({"role": role, "content": content, "embedding": embedding})
def retrieve_relevant_memory(user_input, top_k=5):
"""
Retrieve the top-k most relevant messages from memory based on cosine similarity.
"""
if not memory:
return []
# Compute the embedding of the user input
user_embedding = embedding_model.encode(user_input, convert_to_numpy=True)
# Calculate similarities
similarities = [cosine_similarity([user_embedding], [m["embedding"]])[0][0] for m in memory]
# Sort memory by similarity and return the top-k messages
relevant_messages = sorted(zip(similarities, memory), key=lambda x: x[0], reverse=True)
return [m[1] for m in relevant_messages[:top_k]]
def construct_prompt(memory, user_input, max_tokens=500):
"""
Construct the prompt by combining relevant memory and the current user input.
"""
relevant_memory = retrieve_relevant_memory(user_input)
# Combine relevant memory into the prompt
prompt = ""
token_count = 0
for message in relevant_memory:
message_text = f'{message["role"]}: {message["content"]}\n'
token_count += len(message_text.split())
if token_count > max_tokens:
break
prompt += message_text
# Add the user input at the end
prompt += f'user: {user_input}\n'
return prompt
def trim_memory(max_size=50):
"""
Trim the memory to keep it within the specified max size.
"""
if len(memory) > max_size:
memory.pop(0) # Remove the oldest entry
def summarize_memory():
"""
Summarize the memory buffer to free up space.
"""
if not memory:
return
long_term_memory = " ".join([m["content"] for m in memory])
summary = client.chat.completions.create(
messages=[
{"role": "system", "content": "Summarize the following text for key points."},
{"role": "user", "content": long_term_memory},
],
model="meta-llama/llama-4-scout-17b-16e-instruct",
max_tokens=4096,
)
memory.clear()
# Match the access pattern from main.py if needed
try:
# Try the format in app.py first
summary_content = summary.choices[0].message.content
except AttributeError:
# Fall back to the format in main.py
summary_content = summary.choices[0].text
memory.append({"role": "system", "content": summary_content})
def get_chatbot_response(
message,
history,
system_message,
max_tokens,
temperature,
top_p,
use_memory=True,
memory_size=50,
):
"""
Generate a response using the chatbot with memory capabilities.
"""
if use_memory:
# Process history to maintain memory
for i, (user_msg, bot_msg) in enumerate(history):
if i < len(history) - 1: # Skip the current message which is already in the history
add_to_memory("user", user_msg)
if bot_msg: # Check if bot message exists (might be None for the most recent one)
add_to_memory("assistant", bot_msg)
# Construct prompt with relevant memory
prompt = construct_prompt(memory, message)
# Use the prompt with groq client
completion = client.chat.completions.create(
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": prompt}
],
model="deepseek-r1-distill-llama-70b",
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
stream=True,
)
# Stream the response
response = ""
for chunk in completion:
response_part = chunk.choices[0].delta.content or ""
response += response_part
yield response
# Update memory with the current message and response
add_to_memory("user", message)
add_to_memory("assistant", response)
# Trim memory if needed
trim_memory(max_size=memory_size)
else:
# If not using memory, just use regular chat completion
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
completion = client.chat.completions.create(
messages=messages,
model="deepseek-r1-distill-llama-70b",
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
stream=True,
)
response = ""
for chunk in completion:
response_part = chunk.choices[0].delta.content or ""
response += response_part
yield response
def view_memory():
"""
Create a formatted string showing the current memory contents.
"""
if not memory:
return "Memory is empty."
memory_view = "Current Memory Contents:\n\n"
for i, m in enumerate(memory):
memory_view += f"Memory {i+1}: {m['role']}: {m['content']}\n\n"
return memory_view
def clear_memory_action():
"""
Clear the memory buffer.
"""
memory.clear()
return "Memory has been cleared."
# Custom CSS for the chat interface - apply using elem_classes
custom_css = """
.user-message {
background-color: #e3f2fd !important;
border-radius: 15px !important;
padding: 10px 15px !important;
}
.bot-message {
background-color: #f1f8e9 !important;
border-radius: 15px !important;
padding: 10px 15px !important;
}
"""
# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
# Header
with gr.Row(elem_classes="header-row"):
gr.Markdown("""
<div style="text-align: center; margin-bottom: 10px; padding: 10px; background-color: #f0f4f8; border-radius: 8px;">
<h1 style="margin: 0; color: #2c3e50;">AI Chatbot With Memory</h1>
<h3 style="margin: 5px 0 0 0; color: #34495e;">Developed by Dhiraj and Swaroop</h3>
</div>
""")
with gr.Row():
with gr.Column(scale=3):
# Create ChatInterface without css_classes parameter
chatbot = gr.ChatInterface(
get_chatbot_response,
additional_inputs=[
gr.Textbox(value="You are a helpful assistant with memory capabilities.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
gr.Checkbox(value=True, label="Use Memory", info="Enable or disable memory capabilities"),
gr.Slider(minimum=10, maximum=200, value=50, step=10, label="Memory Size", info="Maximum number of entries in memory"),
],
examples=[
["Tell me about machine learning"],
["What are the best practices for data preprocessing?"],
["Can you explain neural networks?"],
],
title="Chat with AI Assistant",
# Removed css_classes parameter
)
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("## Memory Management")
memory_display = gr.Textbox(label="Memory Contents", lines=20, max_lines=30, interactive=False)
view_memory_btn = gr.Button("View Memory Contents")
clear_memory_btn = gr.Button("Clear Memory")
summarize_memory_btn = gr.Button("Summarize Memory")
memory_status = gr.Textbox(label="Memory Status", lines=2, interactive=False)
# Set up button actions
view_memory_btn.click(view_memory, inputs=[], outputs=[memory_display])
clear_memory_btn.click(clear_memory_action, inputs=[], outputs=[memory_status])
summarize_memory_btn.click(
lambda: (summarize_memory(), "Memory summarized successfully."),
inputs=[],
outputs=[memory_status]
)
# Footer
with gr.Row(elem_classes="footer-row"):
gr.Markdown(f"""
<div style="text-align: center; margin-top: 20px; padding: 10px; background-color: #f0f4f8; border-radius: 8px;">
<p style="margin: 0; color: #2c3e50;">
Developed by Dhiraj and Swaroop | © {datetime.datetime.now().year} | Version 1.0
</p>
</div>
""")
if __name__ == "__main__":
demo.launch()