Spaces:

genaitiwari
/

AutogenMultiAgent

Sleeping

App Files Files Community

genaitiwari commited on Jan 20, 2025

Commit

3392ab1

1 Parent(s): ac85c1d

Added cache seed and CAG in autogen usecase

Browse files

Files changed (15) hide show

.gitignore +1 -0
README.md +10 -0
app.py +12 -0
cag_chat.png +0 -0
configfile.ini +1 -1
requirements.txt +18 -3
src/LLMS/groqllm.py +19 -8
src/cag/__init__.py +0 -0
src/cag/cache_manager.py +44 -0
src/cag/embedding_utils.py +61 -0
src/cag/generation_model.py +62 -0
src/cag/main.py +161 -0
src/streamlitui/loadui.py +9 -1
src/usecases/basicexample.py +1 -1
src/usecases/cag_chat.py +25 -0

.gitignore CHANGED Viewed

@@ -6,3 +6,4 @@ codegen/tmp_code_3e1806a0bf22b99c6c5d2b77650fe9a8.py
 /codegen
 /tmp/chromadb
 /tmp/db

 /codegen
 /tmp/chromadb
 /tmp/db
+/.cache

README.md CHANGED Viewed

@@ -55,6 +55,16 @@ Requirements
 #### Basic Example
 ![alt text](basic_example.png)
 #### MultiAgent Chat
 prompt : As a user , create a asp.net form with razor view page for health insaurance feedback page
 ![alt text](multiagent_chat.png)

 #### Basic Example
 ![alt text](basic_example.png)
+### Chat with CAG
+prompt1: what is dotnet
+prompt2: what is python
+prompt3: what is python
+prompt4: what is dotnet
+prompt5: what is python
+![alt text](cag_chat.png)
 #### MultiAgent Chat
 prompt : As a user , create a asp.net form with razor view page for health insaurance feedback page
 ![alt text](multiagent_chat.png)

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import streamlit as st
 from configfile import Config
 from src.streamlitui.loadui import LoadStreamlitUI
 from src.usecases.multiagentschat import MultiAgentChat
@@ -9,6 +10,7 @@ from src.usecases.agentchatsqlspider import AgentChatSqlSpider
 from src.LLMS.groqllm import GroqLLM
 from src.usecases.multiagentragchat import MultiAgentRAGChat
 from src.usecases.basicexample import BasicExample
 # MAIN Function START
@@ -67,5 +69,15 @@ if __name__ == "__main__":
                                                     problem=problem)
             obj_basic_example.run()

 import streamlit as st
+from src.cag.main import CAGLLM
 from configfile import Config
 from src.streamlitui.loadui import LoadStreamlitUI
 from src.usecases.multiagentschat import MultiAgentChat
 from src.LLMS.groqllm import GroqLLM
 from src.usecases.multiagentragchat import MultiAgentRAGChat
 from src.usecases.basicexample import BasicExample
+from src.usecases.cag_chat import CAGLLMChat
 # MAIN Function START
                                                     problem=problem)
             obj_basic_example.run()
+        elif user_input['selected_usecase'] == "Chat with CAG":
+            obj_chat = CAGLLMChat(llm_config=llm_config,problem=problem)
+            response = obj_chat.start_chat()
+            obj_cag_llm = CAGLLM(problem,response)
+            obj_cag_llm.process_cag_llm()

cag_chat.png ADDED Viewed

configfile.ini CHANGED Viewed

@@ -1,6 +1,6 @@
 [DEFAULT]
 PAGE_TITLE = AUTOGEN IN ACTION
 LLM_OPTIONS = Groq, Huggingface
-USECASE_OPTIONS = Basic Example, MultiAgent Chat, MultiAgent Code Execution, RAG Chat, With LLamaIndex Tool
 GROQ_MODEL_OPTIONS = llama-3.3-70b-versatile, mixtral-8x7b-32768, llama3-8b-8192, llama3-70b-8192, gemma2-9b-it

 [DEFAULT]
 PAGE_TITLE = AUTOGEN IN ACTION
 LLM_OPTIONS = Groq, Huggingface
+USECASE_OPTIONS = Basic Example, Chat with CAG, MultiAgent Chat, MultiAgent Code Execution, RAG Chat, With LLamaIndex Tool
 GROQ_MODEL_OPTIONS = llama-3.3-70b-versatile, mixtral-8x7b-32768, llama3-8b-8192, llama3-70b-8192, gemma2-9b-it

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 streamlit
-pyautogen
-groq
 llama-index
 llama-index-tools-wikipedia
 llama-index-readers-wikipedia
@@ -10,4 +10,19 @@ spider-env
 pyautogen[retrievechat]
 pyautogen[retrievechat-qdrant]
 flaml[automl]
-sentence_transformers

 streamlit
+pyautogen==0.2.32
+groq==0.9.0
 llama-index
 llama-index-tools-wikipedia
 llama-index-readers-wikipedia
 pyautogen[retrievechat]
 pyautogen[retrievechat-qdrant]
 flaml[automl]
+# Core Libraries
+python-dotenv
+streamlit>=1.30.0
+numpy>=1.24.0
+scikit-learn>=1.2.2
+plotly>=5.17.0
+pandas>=2.0.0
+requests
+# Streamlit Extensions for Enhanced UI
+streamlit-extras>=0.2.0
+# LLM Integration and Embedding Tools
+torch>=2.0.0
+transformers>=4.35.0
+sentence-transformers>=2.2.2

src/LLMS/groqllm.py CHANGED Viewed

@@ -9,14 +9,25 @@ class GroqLLM:
         self.user_controls_input = user_controls_input
     def groq_llm_config(self):
-        config_list = [
-            {
-             "api_type": 'groq',
-             "model": self.user_controls_input['selected_groq_model'],
-             "api_key": st.session_state["GROQ_API_KEY"],
-             "cache_seed": None
-            }
-        ]
         llm_config = {"config_list": config_list, "request_timeout": 60}
         st.session_state['llm_config'] = llm_config

         self.user_controls_input = user_controls_input
     def groq_llm_config(self):
+        if st.session_state["Cache_Seed"]:
+            config_list = [
+                {
+                "api_type": 'groq',
+                "model": self.user_controls_input['selected_groq_model'],
+                "api_key": st.session_state["GROQ_API_KEY"],
+                "cache_seed": 41
+                }
+            ]
+        else :
+            config_list = [
+                {
+                "api_type": 'groq',
+                "model": self.user_controls_input['selected_groq_model'],
+                "api_key": st.session_state["GROQ_API_KEY"],
+                "cache_seed": None
+                }
+            ]
         llm_config = {"config_list": config_list, "request_timeout": 60}
         st.session_state['llm_config'] = llm_config

src/cag/__init__.py ADDED Viewed

File without changes

src/cag/cache_manager.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import time
+import threading
+class CacheManager:
+    _instance = None
+    _lock = threading.Lock()
+    def __new__(cls, max_cache_size=100):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super(CacheManager, cls).__new__(cls)
+                    cls._instance.cache = {}
+                    cls._instance.max_cache_size = max_cache_size
+        return cls._instance
+    def normalize_key(self, key):
+        return key.strip().lower()
+    def add_to_cache(self, key, value, embedding=None):
+        normalized_key = self.normalize_key(key)
+        if len(self.cache) >= self.max_cache_size:
+            self.evict_cache()
+        self.cache[normalized_key] = {
+            "response": value,
+            "timestamp": time.time(),
+            "embedding": embedding
+        }
+    def get_from_cache(self, key):
+        normalized_key = self.normalize_key(key)
+        return self.cache.get(normalized_key, {}).get("response", None)
+    def get_embedding(self, key):
+        normalized_key = self.normalize_key(key)
+        return self.cache.get(normalized_key, {}).get("embedding", None)
+    def evict_cache(self):
+        if self.cache:
+            oldest_key = min(self.cache, key=lambda k: self.cache[k]["timestamp"])
+            del self.cache[oldest_key]
+    def clear_cache(self):
+        self.cache.clear()

src/cag/embedding_utils.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+class EmbeddingUtils:
+    def __init__(self, model_name="all-MiniLM-L6-v2"):
+        """
+        Initialize the embedding utility with a pre-trained model.
+        Args:
+        - model_name (str): Name of the sentence-transformers model.
+        """
+        self.model = SentenceTransformer(model_name)
+    def generate_embedding(self, text):
+        """
+        Generate embedding for a given text.
+        Args:
+        - text (str): Input text to generate embedding for.
+        Returns:
+        - np.ndarray: Embedding vector.
+        """
+        return self.model.encode([text])[0]  # Encode returns a list; we extract the first item
+    def calculate_similarity(self, embedding1, embedding2):
+        """
+        Calculate cosine similarity between two embeddings.
+        Args:
+        - embedding1 (np.ndarray): First embedding vector.
+        - embedding2 (np.ndarray): Second embedding vector.
+        Returns:
+        - float: Cosine similarity score.
+        """
+        return cosine_similarity([embedding1], [embedding2])[0][0]
+    def find_best_match(self, query_embedding, cache_embeddings, threshold=0.8):
+        """
+        Find the best match for a query embedding from a list of cached embeddings.
+        Args:
+        - query_embedding (np.ndarray): Embedding of the input query.
+        - cache_embeddings (list of np.ndarray): List of cached embeddings.
+        - threshold (float): Minimum similarity score to consider a match.
+        Returns:
+        - int: Index of the best match if above threshold, otherwise -1.
+        """
+        if not cache_embeddings:
+            return -1  # No cached embeddings to compare
+        similarities = cosine_similarity([query_embedding], cache_embeddings)[0]
+        best_match_index = np.argmax(similarities)
+        best_match_score = similarities[best_match_index]
+        if best_match_score >= threshold:
+            return best_match_index
+        return -1

src/cag/generation_model.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import requests
+import numpy as np
+import os
+import time
+import warnings
+from dotenv import load_dotenv
+from src.cag.cache_manager import CacheManager
+from src.cag.embedding_utils import EmbeddingUtils
+# Suppress PyTorch Warnings
+warnings.filterwarnings("ignore", message="Tried to instantiate class '__path__._path'")
+load_dotenv()
+class LLMIntegration:
+    def __init__(self,cache_size=100, similarity_threshold=0.8):
+        """Initialize the LLM Integration with API Key, Cache, and Embedding Utilities."""
+        self.cache_manager = CacheManager(max_cache_size=cache_size)
+        self.embedding_utils = EmbeddingUtils()
+        self.similarity_threshold = similarity_threshold
+    def generate_response(self, query,response):
+        """Generate a response with cache checking and similarity matching."""
+        query_key = self.cache_manager.normalize_key(query)
+        # Check for cache match
+        cached_response = self.cache_manager.get_from_cache(query_key)
+        if cached_response:
+            return f"Cache Hit! {cached_response}"
+        # Generate query embedding
+        query_embedding = self.embedding_utils.generate_embedding(query)
+        # Check for approximate match
+        best_match_key = self._find_best_match(query_embedding)
+        if best_match_key:
+            cached_response = self.cache_manager.get_from_cache(best_match_key)
+            return f"Cache Hit! {cached_response}"
+        # If no cache match, query the API
+        response = response
+        # ✅ Only cache successful responses
+        if response :
+            self.cache_manager.add_to_cache(query_key, response, embedding=query_embedding)
+            return f"Cache Miss! {response}"
+        else:
+            return "**Error: Could not generate a response.**"
+    def _find_best_match(self, query_embedding):
+        """Find the best match in the cache using similarity checking."""
+        best_match_key = None
+        highest_similarity = 0
+        for key in self.cache_manager.cache:
+            cached_embedding = self.cache_manager.get_embedding(key)
+            if cached_embedding is not None:
+                similarity = self.embedding_utils.calculate_similarity(query_embedding, cached_embedding)
+                if similarity > highest_similarity and similarity >= self.similarity_threshold:
+                    best_match_key = key
+                    highest_similarity = similarity
+        return best_match_key

src/cag/main.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import sys
+import os
+import time
+import streamlit as st
+import plotly.express as px
+from datetime import datetime
+from dotenv import load_dotenv
+# Import the LLM Integration Model
+from src.cag.generation_model import LLMIntegration
+# Load environment variables and secrets
+load_dotenv()
+class CAGLLM:
+    def __init__(self,query,response):
+        self.query = query
+        self.response = response
+    def process_cag_llm(self):
+        # Initialize LLM Integration with API Key
+        llm_system = LLMIntegration()
+        # Cache statistics and tracking initialization
+        if "cache_hits" not in st.session_state:
+            st.session_state.cache_hits = 0
+            st.session_state.cache_misses = 0
+            st.session_state.response_times = []
+            st.session_state.query_timestamps = []
+            st.session_state.history = []
+        # st.set_page_config(
+        #     page_title="CAG Chatbot",
+        #     layout="wide",
+        #     page_icon="🧀",
+        #     initial_sidebar_state="expanded"
+        # )
+        # CSS for Styling Graph
+        st.markdown(
+            """
+            <style>
+                body { font-family: 'Arial', sans-serif; }
+                .stTextInput, .stButton { border-radius: 8px; }
+                .stProgress > div > div { border-radius: 20px; }
+                .custom-link { color: #1f77b4; text-decoration: none; font-weight: bold; transition: color 0.3s ease-in-out; }
+                .custom-link:hover { color: #ff4b4b; }
+                .fixed-graph-container { max-height: 300px !important; overflow-y: auto; }
+            </style>
+            """,
+            unsafe_allow_html=True
+        )
+        # Page Title and Description
+        st.title("💡 Cache Augmented Generation (CAG) Chatbot")
+        st.write("**A chatbot with enhanced responses powered by smart caching.**")
+        # Layout Columns: Configurator | Chat | Statistics
+        col1, col2, col3 = st.columns([1.2, 2, 1.2])
+        # 🛠️ **Configurator Section (Left Panel)**
+        with col1:
+            st.header("⚙️ Configurator")
+            cache_size = st.slider("🗄️ Cache Size", min_value=50, max_value=500, value=100)
+            similarity_threshold = st.slider("📈 Similarity Threshold", min_value=0.5, max_value=1.0, value=0.8)
+            clear_cache = st.button("🧹 Clear Cache")
+            if clear_cache:
+                llm_system.cache_manager.clear_cache()
+                st.session_state.cache_hits = 0
+                st.session_state.cache_misses = 0
+                st.session_state.response_times = []
+                st.session_state.query_timestamps = []
+                st.session_state.history = []
+                st.success("✅ Cache cleared successfully!")
+            # 📦 **Cache Content Section**
+            with st.expander("📦 **View Cache Content**"):
+                if llm_system.cache_manager.cache:
+                    for key, value in llm_system.cache_manager.cache.items():
+                        st.write(f"**Query:** {key}")
+                        st.write(f"**Response:** {value['response']}")
+                        st.write(f"**Timestamp:** {datetime.fromtimestamp(value['timestamp']).strftime('%Y-%m-%d %H:%M:%S')}")
+                        st.write("---")
+                else:
+                    st.write("🗑️ Cache is currently empty.")
+        # 💬 **Chat Interaction Section (Middle Panel)**
+        with col2:
+            st.header("💬 Chat with CAG")
+            query = self.query
+            if self.query:
+                start_time = time.time()
+                # Step 1: Check Cache
+                st.info("⏳ Checking Cache...")
+                cached_response = llm_system.cache_manager.get_from_cache(llm_system.cache_manager.normalize_key(query))
+                if cached_response:
+                    # Step 2: If Cache Hit, Return
+                    st.success("✅ Cache Hit! Returning cached response.")
+                    response = cached_response
+                    st.session_state.cache_hits += 1
+                else:
+                    # Step 3: If Cache Miss, Query LLM
+                    st.warning("❌ Cache Miss. Fetching from LLM...")
+                    response = llm_system.generate_response(query,self.response)
+                    st.session_state.cache_misses += 1
+                # Response Time and Save Data
+                response_time = time.time() - start_time
+                st.session_state.response_times.append(response_time)
+                st.session_state.query_timestamps.append(datetime.now().strftime('%H:%M:%S'))
+                st.session_state.history.append({"query": query, "response": response, "time": response_time})
+                # 🎯 Chat Response
+                st.success(f"**🗨️ {response}**")
+                st.info(f"⏱️ **Response Time:** {response_time:.2f} seconds")
+            # 📜 **Query History Section**
+            with st.expander("🕰️ **Query History**"):
+                for entry in st.session_state.history[-10:]:
+                    st.write(f"**Query:** {entry['query']}")
+                    st.write(f"**Response:** {entry['response']}")
+                    st.write(f"⏱️ **Time Taken:** {entry['time']:.2f} seconds")
+                    st.write("---")
+        # 📊 **Cache Statistics Section (Right Panel)**
+        with col3:
+            st.header("📊 Cache Statistics")
+            # Real-Time Metrics
+            col1_stat, col2_stat, col3_stat = st.columns(3)
+            col1_stat.metric("✅ Hits", st.session_state.cache_hits)
+            col2_stat.metric("❌ Misses", st.session_state.cache_misses)
+            col3_stat.metric("📦 Cache Size", len(llm_system.cache_manager.cache))
+            # Cache Hit/Miss Ratio
+            total_queries = st.session_state.cache_hits + st.session_state.cache_misses
+            hit_ratio = (st.session_state.cache_hits / total_queries) * 100 if total_queries > 0 else 0
+            miss_ratio = (st.session_state.cache_misses / total_queries) * 100 if total_queries > 0 else 0
+            st.progress(hit_ratio / 100, text=f"✅ Cache Hit Ratio: {hit_ratio:.2f}%")
+            st.progress(miss_ratio / 100, text=f"❌ Cache Miss Ratio: {miss_ratio:.2f}%")
+            # 📈 **Response Time Graph**
+            if st.session_state.response_times:
+                st.markdown('<div class="fixed-graph-container">', unsafe_allow_html=True)
+                fig = px.line(
+                    x=st.session_state.query_timestamps,
+                    y=st.session_state.response_times,
+                    title="📈 Response Time Trend",
+                    labels={"x": "Timestamp", "y": "Response Time (s)"}
+                )
+                st.plotly_chart(fig, use_container_width=True)
+                st.markdown('</div>', unsafe_allow_html=True)

src/streamlitui/loadui.py CHANGED Viewed

@@ -34,7 +34,15 @@ class LoadStreamlitUI:
-            st.session_state["chat_with_history"] = st.sidebar.toggle("Chat With History")
         if self.user_controls['selected_usecase'] == "With LLamaIndex Tool":
             st.subheader("🏝️ Trip Advisor Specialist using wikipedia")

+            if st.sidebar.toggle("Chat With History"):
+                st.session_state["chat_with_history"]=False
+            else :
+                st.session_state["chat_with_history"]=True
+            if st.sidebar.toggle("LLM Caching"):
+                st.session_state["Cache_Seed"]=True
+            else :
+                st.session_state["Cache_Seed"]=False
         if self.user_controls['selected_usecase'] == "With LLamaIndex Tool":
             st.subheader("🏝️ Trip Advisor Specialist using wikipedia")

src/usecases/basicexample.py CHANGED Viewed

@@ -24,7 +24,7 @@ class BasicExample:
         asyncio.set_event_loop(self.loop)
     async def initiate_chat(self):
-        await self.user_proxy.a_initiate_chat(self.assistant, max_turns=2,  message=self.problem, clear_history=st.session_state["chat_with_history"])
     def run(self):
         self.loop.run_until_complete(self.initiate_chat())

         asyncio.set_event_loop(self.loop)
     async def initiate_chat(self):
+        await self.user_proxy.a_initiate_chat(self.assistant, max_turns=4,  message=self.problem, clear_history=st.session_state["chat_with_history"])
     def run(self):
         self.loop.run_until_complete(self.initiate_chat())

src/usecases/cag_chat.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+from autogen import AssistantAgent, UserProxyAgent
+import streamlit as st
+class CAGLLMChat:
+    def __init__(self,llm_config,problem):
+        self.llm_config = llm_config
+        self.problem = problem
+    def start_chat(self):
+        llm_config= self.llm_config
+        problem = self.problem
+        assistant = AssistantAgent("assistant", llm_config=llm_config,code_execution_config=False,human_input_mode='NEVER')
+        user_proxy = UserProxyAgent("user_proxy", code_execution_config=False,human_input_mode='NEVER')
+        # Start the chat
+        response = user_proxy.initiate_chat(
+            assistant,
+            message=problem,
+            max_turns=1,
+            clear_history=st.session_state["chat_with_history"]
+        )
+        return response