Spaces:

Rabbit-Innotech
/

GBVR_Chatbot

Sleeping

App Files Files Community

Rabbit-Innotech commited on Apr 29, 2025

Commit

bd83779

verified ·

1 Parent(s): 7f2bd5c

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -188

app.py CHANGED Viewed

@@ -928,6 +928,7 @@
 # if __name__ == "__main__":
 #     demo.launch(share=True, inbrowser=True, debug=True)
 import os
 from langchain_groq import ChatGroq
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
@@ -942,14 +943,39 @@ from langchain_core.prompts import ChatPromptTemplate
 import gradio as gr
 from PyPDF2 import PdfReader
 from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_core.messages import HumanMessage, AIMessage
-from langchain_core.runnables import RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
-# Set up environment variables
-groq_api_key = os.environ.get('GBV')
-# Initialize embedding model
 embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
 def scrape_websites(base_urls):
@@ -1042,12 +1068,12 @@ def extract_pdf_text(pdf_url):
 def clean_body_content(html_content):
     soup = BeautifulSoup(html_content, "html.parser")
-    # Remove scripts and styles
     for script_or_style in soup(["script", "style"]):
         script_or_style.extract()
-    # Get cleaned text
     cleaned_content = soup.get_text(separator="\n")
     cleaned_content = "\n".join(
         line.strip() for line in cleaned_content.splitlines() if line.strip()
@@ -1055,91 +1081,51 @@ def clean_body_content(html_content):
     return cleaned_content
-def chunk_string(s, chunk_size=1000):
-    return [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]
-# Setup vectorstore for RAG
-def setup_vectorstore():
-    if __name__ == "__main__":
-        website = ["https://haguruka.org.rw/"]
-        all_content = scrape_websites(website)
-        temp_list = []
-        for url, content in all_content.items():
-            temp_list.append((url, content))
-        processed_texts = []
-        for element in temp_list:
-            if isinstance(element, tuple):
-                url, content = element
-                processed_texts.append(f"url: {url}, content: {content}")
-            elif isinstance(element, str):
-                processed_texts.append(element)
-            else:
-                processed_texts.append(str(element))
-        chunked_texts = []
-        for text in processed_texts:
-            chunked_texts.extend(chunk_string(text))
-        vectorstore = Chroma(
-            collection_name="GBVR_Dataset",
-            embedding_function=embed_model,
-            persist_directory="./",
-        )
-        vectorstore.add_texts(chunked_texts)
-        return vectorstore
     else:
-        # If imported as a module, just load the existing vectorstore
-        vectorstore = Chroma(
-            collection_name="GBVR_Dataset",
-            embedding_function=embed_model,
-            persist_directory="./",
-        )
-        return vectorstore
-# Session Manager class to handle conversation history
-class SessionManager:
-    def __init__(self):
-        self.sessions = {}
-    def get_session(self, session_id):
-        if session_id not in self.sessions:
-            self.sessions[session_id] = []
-        return self.sessions[session_id]
-    def add_message(self, session_id, role, content):
-        session = self.get_session(session_id)
-        if role == "human":
-            session.append(HumanMessage(content=content))
-        elif role == "ai":
-            session.append(AIMessage(content=content))
-    def get_history_as_string(self, session_id, max_turns=5):
-        """Convert recent conversation history to string format for context"""
-        session = self.get_session(session_id)
-        # Get the most recent conversations (limited to max_turns)
-        recent_messages = session[-max_turns*2:] if len(session) > max_turns*2 else session
-        history_str = ""
-        for msg in recent_messages:
-            role = "User" if isinstance(msg, HumanMessage) else "Assistant"
-            history_str += f"{role}: {msg.content}\n"
-        return history_str.strip()
-# Initialize session manager
-session_manager = SessionManager()
-# Modified template to include conversation history
-template = """
-    You are a friendly, intelligent, and conversational AI assistant designed to provide accurate, engaging, and human-like responses based on the given context. Your goal is to extract relevant details from the provided context and assist the user effectively. Follow these guidelines:
     1. **Warm & Natural Interaction**
        - If the user greets you (e.g., "Hello," "Hi," "Good morning"), respond warmly and acknowledge them.
@@ -1148,7 +1134,7 @@ template = """
          - "Hello! What can I do for you? 🚀"
     2. **Precise Information Extraction**
-       - Provide only the relevant details from the given context.
        - Do not generate extra content or assumptions beyond the provided information.
     3. **Conversational & Engaging Tone**
@@ -1164,133 +1150,81 @@ template = """
     6. **Personalized Interaction**
        - Use the conversation history to provide more personalized and contextually relevant responses.
     7. **Direct, Concise Responses**
        - If the user requests specific data, provide only the requested details without unnecessary explanations unless asked.
     8. **Extracting Relevant Links**
-       - If the user asks for a link related to their request, extract the most relevant URL from the context and provide it directly.
        - Example response:
          - "Here is the link you requested: [URL]"
-    **Context from knowledge base:** {context}
-    **Previous conversation history:**
-    {history}
-    **Current User's Question:** {question}
-    **Your Response:**
-"""
-# Create prompt template with history
 rag_prompt = PromptTemplate.from_template(template)
-# Initialize Groq LLM
 llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key)
 # Define the RAG chain with session history
-def get_rag_chain(vectorstore):
-    retriever = vectorstore.as_retriever()
-    def rag_chain_with_history(query, session_id):
-        # Get conversation history
-        history = session_manager.get_history_as_string(session_id)
-        # Get relevant documents from retriever
-        retrieved_docs = retriever.invoke(query)
-        context = "\n".join([doc.page_content for doc in retrieved_docs])
-        # Create the prompt with context and history
-        prompt = rag_prompt.format(
-            context=context,
-            history=history,
-            question=query
-        )
-        # Generate response
-        response = llm.invoke(prompt)
-        # Add to session history
-        session_manager.add_message(session_id, "human", query)
-        session_manager.add_message(session_id, "ai", response.content)
-        return response.content
-    return rag_chain_with_history
-# Initialize the vectorstore
-vectorstore = setup_vectorstore()
-# Get the RAG chain
-rag_chain_fn = get_rag_chain(vectorstore)
-# Define the streaming function for Gradio
-def rag_memory_stream(message, history, session_id=None):
     if session_id is None:
-        # Generate a simple session ID if none provided
-        # In a production app, you would use something more sophisticated
         session_id = "default_session"
-    # Process the message and get the response
-    response = rag_chain_fn(message, session_id)
     # Stream the response word by word
-    words = response.split()
-    partial_response = ""
     for word in words:
-        partial_response += word + " "
-        yield partial_response.strip()
-# Create the Chat Interface with session management
-def create_chat_interface():
-    with gr.Blocks(theme="soft", css=custom_css) as demo:
-        gr.Markdown(f"# {title}")
-        # Hidden session ID - in a real app, this would be managed by authentication
-        session_id = gr.State(value="default_session")
-        chatbot = gr.Chatbot(height=600)
-        msg = gr.Textbox(
-            placeholder="Ask me anything about GBV resources...",
-            container=False,
-            scale=7
-        )
-        def user_input(message, chat_history, session_id_val):
-            if message.strip() == "":
-                return "", chat_history
-            chat_history.append([message, None])
-            return "", chat_history
-        def bot_response(chat_history, session_id_val):
-            if chat_history and chat_history[-1][1] is None:
-                user_message = chat_history[-1][0]
-                bot_message = ""
-                for chunk in rag_memory_stream(user_message, chat_history, session_id_val):
-                    bot_message = chunk
-                    chat_history[-1][1] = bot_message
-                    yield chat_history
-        send = gr.Button("Send", variant="primary", scale=1)
-        clear = gr.Button("Clear Chat", variant="secondary")
-        # Event handlers
-        send_event = msg.submit(user_input, [msg, chatbot, session_id], [msg, chatbot]).then(
-            bot_response, [chatbot, session_id], chatbot
-        )
-        send.click(user_input, [msg, chatbot, session_id], [msg, chatbot]).then(
-            bot_response, [chatbot, session_id], chatbot
-        )
-        clear.click(lambda: [], outputs=[chatbot])
-    return demo
 # Title with emojis
-title = "🤖 GBVR Chatbot"
 # Custom CSS for styling the interface
 custom_css = """
@@ -1314,7 +1248,15 @@ body {
 }
 """
 # Launch the app
 if __name__ == "__main__":
-    demo = create_chat_interface()
     demo.launch(share=True, inbrowser=True, debug=True)

 # if __name__ == "__main__":
 #     demo.launch(share=True, inbrowser=True, debug=True)
 import os
 from langchain_groq import ChatGroq
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
 import gradio as gr
 from PyPDF2 import PdfReader
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+# Simple session management
+class SessionManager:
+    def __init__(self):
+        self.sessions = {}
+    def get_or_create_session(self, session_id):
+        if session_id not in self.sessions:
+            self.sessions[session_id] = []
+        return self.sessions[session_id]
+    def add_interaction(self, session_id, user_message, ai_response):
+        session = self.get_or_create_session(session_id)
+        session.append({"user": user_message, "ai": ai_response})
+    def get_history(self, session_id, max_turns=5):
+        session = self.get_or_create_session(session_id)
+        recent_history = session[-max_turns:] if len(session) > max_turns else session
+        history_text = ""
+        for interaction in recent_history:
+            history_text += f"User: {interaction['user']}\n"
+            history_text += f"Assistant: {interaction['ai']}\n\n"
+        return history_text.strip()
+# Initialize session manager
+session_manager = SessionManager()
+groq_api_key= os.environ.get('GBV')
 embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
 def scrape_websites(base_urls):
 def clean_body_content(html_content):
     soup = BeautifulSoup(html_content, "html.parser")
     for script_or_style in soup(["script", "style"]):
         script_or_style.extract()
     cleaned_content = soup.get_text(separator="\n")
     cleaned_content = "\n".join(
         line.strip() for line in cleaned_content.splitlines() if line.strip()
     return cleaned_content
+if __name__ == "__main__":
+    website = ["https://haguruka.org.rw/"
+               ]
+    all_content = scrape_websites(website)
+    temp_list = []
+    for url, content in all_content.items():
+        temp_list.append((url, content))
+processed_texts = []
+for element in temp_list:
+    if isinstance(element, tuple):
+        url, content = element
+        processed_texts.append(f"url: {url}, content: {content}")
+    elif isinstance(element, str):
+        processed_texts.append(element)
     else:
+        processed_texts.append(str(element))
+def chunk_string(s, chunk_size=1000):
+    return [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]
+chunked_texts = []
+for text in processed_texts:
+  chunked_texts.extend(chunk_string(text))
+vectorstore = Chroma(
+    collection_name="GBVR_Dataset",
+    embedding_function=embed_model,
+    persist_directory="./",
+)
+vectorstore.get().keys()
+vectorstore.add_texts(chunked_texts)
+# Updated template to include conversation history
+template = ("""
+    You are a friendly, intelligent, and conversational AI assistant designed to provide accurate, engaging, and human-like responses based on the given context. Your goal is to extract relevant details from the provided context: {context} and assist the user effectively. Follow these guidelines:
     1. **Warm & Natural Interaction**
        - If the user greets you (e.g., "Hello," "Hi," "Good morning"), respond warmly and acknowledge them.
          - "Hello! What can I do for you? 🚀"
     2. **Precise Information Extraction**
+       - Provide only the relevant details from the given context: {context}.
        - Do not generate extra content or assumptions beyond the provided information.
     3. **Conversational & Engaging Tone**
     6. **Personalized Interaction**
        - Use the conversation history to provide more personalized and contextually relevant responses.
+       - Previous conversation history: {conversation_history}
     7. **Direct, Concise Responses**
        - If the user requests specific data, provide only the requested details without unnecessary explanations unless asked.
     8. **Extracting Relevant Links**
+       - If the user asks for a link related to their request `{question}`, extract the most relevant URL from `{context}` and provide it directly.
        - Example response:
          - "Here is the link you requested: [URL]"
+    **Context:** {context}
+    **User's Question:** {question}
+    **Your Response:**
+""")
 rag_prompt = PromptTemplate.from_template(template)
+retriever = vectorstore.as_retriever()
 llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key)
+# Dictionary to store user sessions with session IDs
+user_sessions = {}
 # Define the RAG chain with session history
+def rag_chain(question, session_id="default"):
+    # Get conversation history if available
+    conversation_history = session_manager.get_history(session_id)
+    # Get context from retriever
+    context_docs = retriever.invoke(question)
+    context = "\n".join(doc.page_content for doc in context_docs)
+    # Create prompt with history
+    prompt = rag_prompt.format(
+        context=context,
+        question=question,
+        conversation_history=conversation_history
+    )
+    # Generate response
+    response = llm.invoke(prompt).content
+    # Store the interaction
+    session_manager.add_interaction(session_id, question, response)
+    return response
+# Define the RAG memory stream function
+def rag_memory_stream(message, history):
+    # Generate a session ID based on the first message if not exists
+    session_id = None
+    for msg in history:
+        if msg[0]:  # If there's a user message
+            # Use first few characters of first message as simple session ID
+            session_id = hash(msg[0][:20]) if session_id is None else session_id
+            break
+    # Default session ID if history is empty
     if session_id is None:
         session_id = "default_session"
+    # Process the message and get response
+    response = rag_chain(message, str(session_id))
     # Stream the response word by word
+    partial_text = ""
+    words = response.split(' ')
     for word in words:
+        partial_text += word + " "
+        yield partial_text.strip()
 # Title with emojis
+title = "GBVR Chatbot"
 # Custom CSS for styling the interface
 custom_css = """
 }
 """
+# Create the Chat Interface
+demo = gr.ChatInterface(
+    fn=rag_memory_stream,
+    title=title,
+    fill_height=True,
+    theme="soft",
+    css=custom_css, # Apply the custom CSS
+)
 # Launch the app
 if __name__ == "__main__":
     demo.launch(share=True, inbrowser=True, debug=True)