Spaces:

Rabbit-Innotech
/

GBVR_Chatbot

Sleeping

App Files Files Community

Rabbit-Innotech commited on Apr 30, 2025

Commit

57fd9a2

verified ·

1 Parent(s): bd83779

Update app.py

Browse files

Files changed (1) hide show

app.py +955 -595

app.py CHANGED Viewed

@@ -1,658 +1,932 @@
-# # import os
-# # import time
-# # import pandas as pd
-# # import gradio as gr
-# # from langchain_groq import ChatGroq
-# # from langchain_huggingface import HuggingFaceEmbeddings
-# # from langchain_community.vectorstores import Chroma
-# # from langchain_core.prompts import PromptTemplate
-# # from langchain_core.output_parsers import StrOutputParser
-# # from langchain_core.runnables import RunnablePassthrough
-# # from PyPDF2 import PdfReader
-# # # Configuration constants
-# # COLLECTION_NAME = "GBVRS"
-# # DATA_FOLDER = "./"
-# # APP_VERSION = "v1.0.0"
-# # APP_NAME = "Ijwi ry'Ubufasha"
-# # MAX_HISTORY_MESSAGES = 8  # Limit history to avoid token limits
-# # # Global variables for application state
-# # llm = None
-# # embed_model = None
-# # vectorstore = None
-# # retriever = None
-# # rag_chain = None
-# # # User session management
-# # class UserSession:
-# #     def __init__(self, session_id, llm):
-# #         """Initialize a user session with unique ID and language model."""
-# #         self.session_id = session_id
-# #         self.user_info = {"Nickname": "Guest"}
-# #         self.conversation_history = []
-# #         self.llm = llm
-# #         self.welcome_message = None
-# #         self.last_activity = time.time()
-# #     def set_user(self, user_info):
-# #         """Set user information and generate welcome message."""
-# #         self.user_info = user_info
-# #         self.generate_welcome_message()
-# #         # Initialize conversation history with welcome message
-# #         welcome = self.get_welcome_message()
-# #         self.conversation_history = [
-# #             {"role": "assistant", "content": welcome},
-# #         ]
-# #     def get_user(self):
-# #         """Get current user information."""
-# #         return self.user_info
-# #     def generate_welcome_message(self):
-# #         """Generate a dynamic welcome message using the LLM."""
-# #         try:
-# #             nickname = self.user_info.get("Nickname", "Guest")
-# #             # Use the LLM to generate the message
-# #             prompt = (
-# #                 f"Create a brief and warm welcome message for {nickname} that's about 1-2 sentences. "
-# #                 f"Emphasize this is a safe space for discussing gender-based violence issues "
-# #                 f"and that we provide support and resources. Keep it warm and reassuring."
-# #             )
-# #             response = self.llm.invoke(prompt)
-# #             welcome = response.content.strip()
-# #             # Format the message with HTML styling
-# #             self.welcome_message = (
-# #                 f"<div style='font-size: 18px; color: #4E6BBF;'>"
-# #                 f"{welcome}"
-# #                 f"</div>"
-# #             )
-# #         except Exception as e:
-# #             # Fallback welcome message
-# #             nickname = self.user_info.get("Nickname", "Guest")
-# #             self.welcome_message = (
-# #                 f"<div style='font-size: 18px; color: #4E6BBF;'>"
-# #                 f"Welcome, {nickname}! You're in a safe space. We're here to provide support with "
-# #                 f"gender-based violence issues and connect you with resources that can help."
-# #                 f"</div>"
-# #             )
-# #     def get_welcome_message(self):
-# #         """Get the formatted welcome message."""
-# #         if not self.welcome_message:
-# #             self.generate_welcome_message()
-# #         return self.welcome_message
-# #     def add_to_history(self, role, message):
-# #         """Add a message to the conversation history."""
-# #         self.conversation_history.append({"role": role, "content": message})
-# #         self.last_activity = time.time()
-# #         # Trim history if it gets too long
-# #         if len(self.conversation_history) > MAX_HISTORY_MESSAGES * 2:  # Keep pairs of messages
-# #             # Keep the first message (welcome) and the most recent messages
-# #             self.conversation_history = [self.conversation_history[0]] + self.conversation_history[-MAX_HISTORY_MESSAGES*2+1:]
-# #     def get_conversation_history(self):
-# #         """Get the full conversation history."""
-# #         return self.conversation_history
-# #     def get_formatted_history(self):
-# #         """Get conversation history formatted as a string for the LLM."""
-# #         # Skip the welcome message and only include the last few exchanges
-# #         recent_history = self.conversation_history[1:] if len(self.conversation_history) > 1 else []
-# #         # Limit to last MAX_HISTORY_MESSAGES exchanges
-# #         if len(recent_history) > MAX_HISTORY_MESSAGES * 2:
-# #             recent_history = recent_history[-MAX_HISTORY_MESSAGES*2:]
-# #         formatted_history = ""
-# #         for entry in recent_history:
-# #             role = "User" if entry["role"] == "user" else "Assistant"
-# #             # Truncate very long messages to avoid token limits
-# #             content = entry["content"]
-# #             if len(content) > 500:  # Limit message length
-# #                 content = content[:500] + "..."
-# #             formatted_history += f"{role}: {content}\n\n"
-# #         return formatted_history
-# #     def is_expired(self, timeout_seconds=3600):
-# #         """Check if the session has been inactive for too long."""
-# #         return (time.time() - self.last_activity) > timeout_seconds
-# # # Session manager to handle multiple users
-# # class SessionManager:
-# #     def __init__(self):
-# #         """Initialize the session manager."""
-# #         self.sessions = {}
-# #         self.session_timeout = 3600  # 1 hour timeout
-# #     def get_session(self, session_id):
-# #         """Get an existing session or create a new one."""
-# #         # Clean expired sessions first
-# #         self._clean_expired_sessions()
-# #         # Create new session if needed
-# #         if session_id not in self.sessions:
-# #             self.sessions[session_id] = UserSession(session_id, llm)
-# #         return self.sessions[session_id]
-# #     def _clean_expired_sessions(self):
-# #         """Remove expired sessions to free up memory."""
-# #         expired_keys = []
-# #         for key, session in self.sessions.items():
-# #             if session.is_expired(self.session_timeout):
-# #                 expired_keys.append(key)
-# #         for key in expired_keys:
-# #             del self.sessions[key]
-# # # Initialize the session manager
-# # session_manager = SessionManager()
-# # def initialize_assistant():
-# #     """Initialize the assistant with necessary components and configurations."""
-# #     global llm, embed_model, vectorstore, retriever, rag_chain
-# #     # Initialize API key - try both possible key names
-# #     groq_api_key = os.environ.get('GBV') or os.environ.get('GBV')
-# #     if not groq_api_key:
-# #         print("WARNING: No GROQ API key found in userdata.")
-# #     # Initialize LLM - Default to Llama model which is more widely available
-# #     llm = ChatGroq(
-# #         model="llama-3.3-70b-versatile",  # More reliable than whisper model
-# #         api_key=groq_api_key
-# #     )
-# #     # Set up embedding model
-# #     try:
-# #         embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
-# #     except Exception as e:
-# #         # Fallback to smaller model
-# #         embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# #     # Process data and create vector store
-# #     print("Processing data files...")
-# #     data = process_data_files()
-# #     print("Creating vector store...")
-# #     vectorstore = create_vectorstore(data)
-# #     retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
-# #     # Create RAG chain
-# #     print("Setting up RAG chain...")
-# #     rag_chain = create_rag_chain()
-# #     print(f"✅ {APP_NAME} initialized successfully")
-# # def process_data_files():
-# #     """Process all data files from the specified folder."""
-# #     context_data = []
-# #     try:
-# #         if not os.path.exists(DATA_FOLDER):
-# #             print(f"WARNING: Data folder does not exist: {DATA_FOLDER}")
-# #             return context_data
-# #         # Get list of data files
-# #         all_files = os.listdir(DATA_FOLDER)
-# #         data_files = [f for f in all_files if f.lower().endswith(('.csv', '.xlsx', '.xls'))]
-# #         if not data_files:
-# #             print(f"WARNING: No data files found in: {DATA_FOLDER}")
-# #             return context_data
-# #         # Process each file
-# #         for index, file_name in enumerate(data_files, 1):
-# #             print(f"Processing file {index}/{len(data_files)}: {file_name}")
-# #             file_path = os.path.join(DATA_FOLDER, file_name)
-# #             try:
-# #                 # Read file based on extension
-# #                 if file_name.lower().endswith('.csv'):
-# #                     df = pd.read_csv(file_path)
-# #                 else:
-# #                     df = pd.read_excel(file_path)
-# #                 # Check if column 3 exists (source data is in third column)
-# #                 if df.shape[1] > 2:
-# #                     column_data = df.iloc[:, 2].dropna().astype(str).tolist()
-# #                     # Each row becomes one chunk with metadata
-# #                     for i, text in enumerate(column_data):
-# #                         if text and len(text.strip()) > 0:
-# #                             context_data.append({
-# #                                 "page_content": text,
-# #                                 "metadata": {
-# #                                     "source": file_name,
-# #                                     "row": i+1
-# #                                 }
-# #                             })
-# #                 else:
-# #                     print(f"WARNING: File {file_name} has fewer than 3 columns.")
-# #             except Exception as e:
-# #                 print(f"ERROR processing file {file_name}: {e}")
-# #         print(f"✅ Created {len(context_data)} chunks from {len(data_files)} files.")
-# #     except Exception as e:
-# #         print(f"ERROR accessing data folder: {e}")
-# #     return context_data
-# # def create_vectorstore(data):
-# #     """
-# #     Creates and returns a Chroma vector store populated with the provided data.
-# #     Parameters:
-# #         data (list): A list of dictionaries, each containing 'page_content' and 'metadata'.
-# #     Returns:
-# #         Chroma: The populated Chroma vector store instance.
-# #     """
-# #     # Initialize the vector store
-# #     vectorstore = Chroma(
-# #         collection_name=COLLECTION_NAME,
-# #         embedding_function=embed_model,
-# #         persist_directory="./"
-# #     )
-# #     if not data:
-# #         print("⚠️ No data provided. Returning an empty vector store.")
-# #         return vectorstore
-# #     try:
-# #         # Extract text and metadata from the data
-# #         texts = [doc["page_content"] for doc in data]
-# #         # Add the texts and metadata to the vector store
-# #         vectorstore.add_texts(texts)
-# #     except Exception as e:
-# #         print(f"❌ Failed to add documents to vector store: {e}")
-# #     # Fix: Return vectorstore instead of vs
-# #     return vectorstore  # Changed from 'return vs' to 'return vectorstore'
-# # def create_rag_chain():
-# #     """Create the RAG chain for processing user queries."""
-# #     # Define the prompt template
-# #     template = """
-# #      You are a compassionate and supportive AI assistant specializing in helping individuals affected by Gender-Based Violence (GBV). Your responses must be based EXCLUSIVELY on the information provided in the context. Your primary goal is to provide emotionally intelligent support while maintaining appropriate boundaries.
-# #         **Previous conversation:** {conversation_history}
-# #         **Context information:** {context}
-# #         **User's Question:** {question}
-# #         When responding follow these guidelines:
-# #         1. **Strict Context Adherence**
-# #            - Only use information that appears in the provided {context}
-# #            - If the answer is not found in the context, state "I don't have that information in my available resources" rather than generating a response
-# #         2. **Personalized Communication**
-# #            - Avoid contractions (e.g., use I am instead of I'm)
-# #            - Incorporate thoughtful pauses or reflective questions when the conversation involves difficult topics
-# #            - Use selective emojis (😊, 🤗, ❤️) only when tone-appropriate and not during crisis discussions
-# #            - Balance warmth with professionalism
-# #         3. **Emotional Intelligence**
-# #            - Validate feelings without judgment
-# #            - Offer reassurance when appropriate, always centered on empowerment
-# #            - Adjust your tone based on the emotional state conveyed
-# #         4. **Conversation Management**
-# #            - Refer to {conversation_history} to maintain continuity and avoid repetition
-# #            - Use clear paragraph breaks for readability
-# #         5. **Information Delivery**
-# #            - Extract only relevant information from {context} that directly addresses the question
-# #            - Present information in accessible, non-technical language
-# #            - When information is unavailable, respond with: "I don't have that specific information right now, {first_name}. Would it be helpful if I focus on [alternative support option]?"
-# #         6. **Safety and Ethics**
-# #            - Do not generate any speculative content or advice not supported by the context
-# #            - If the context contains safety information, prioritize sharing that information
-# #         Your response must come entirely from the provided context, maintaining the supportive tone while never introducing information from outside the provided materials.
-# #         **Context:** {context}
-# #         **User's Question:** {question}
-# #         **Your Response:**
-# #     """
-# #     rag_prompt = PromptTemplate.from_template(template)
-# #     def get_context_and_question(query_with_session):
-# #         # Extract query and session_id
-# #         query = query_with_session["query"]
-# #         session_id = query_with_session["session_id"]
-# #         # Get the user session
-# #         session = session_manager.get_session(session_id)
-# #         user_info = session.get_user()
-# #         first_name = user_info.get("Nickname", "User")
-# #         conversation_hist = session.get_formatted_history()
-# #         try:
-# #             # Retrieve relevant documents
-# #             retrieved_docs = retriever.invoke(query)
-# #             context_str = format_context(retrieved_docs)
-# #         except Exception as e:
-# #             print(f"ERROR retrieving documents: {e}")
-# #             context_str = "No relevant information found."
-# #         # Return the combined inputs for the prompt
-# #         return {
-# #             "context": context_str,
-# #             "question": query,
-# #             "first_name": first_name,
-# #             "conversation_history": conversation_hist
-# #         }
-# #     # Build the chain
-# #     try:
-# #         chain = (
-# #             RunnablePassthrough()
-# #             | get_context_and_question
-# #             | rag_prompt
-# #             | llm
-# #             | StrOutputParser()
-# #         )
-# #         return chain
-# #     except Exception as e:
-# #         print(f"ERROR creating RAG chain: {e}")
-# #         # Return a simple function as fallback
-# #         def fallback_chain(query_with_session):
-# #             session_id = query_with_session["session_id"]
-# #             session = session_manager.get_session(session_id)
-# #             nickname = session.get_user().get("Nickname", "there")
-# #             return f"I'm here to help you, {nickname}, but I'm experiencing some technical difficulties right now. Please try again shortly."
-# #         return fallback_chain
-# # def format_context(retrieved_docs):
-# #     """Format retrieved documents into a string context."""
-# #     if not retrieved_docs:
-# #         return "No relevant information available."
-# #     return "\n\n".join([doc.page_content for doc in retrieved_docs])
-# # def rag_memory_stream(message, history, session_id):
-# #     """Process user message and generate response with memory."""
-# #     # Get the user session
-# #     session = session_manager.get_session(session_id)
-# #     # Add user message to history
-# #     session.add_to_history("user", message)
-# #     try:
-# #         # Get response from RAG chain
-# #         print(f"Processing message for session {session_id}: {message[:50]}...")
-# #         # Pass both query and session_id to the chain
-# #         response = rag_chain.invoke({
-# #             "query": message,
-# #             "session_id": session_id
-# #         })
-# #         print(f"Generated response: {response[:50]}...")
-# #         # Add assistant response to history
-# #         session.add_to_history("assistant", response)
-# #         # Yield the response
-# #         yield response
-# #     except Exception as e:
-# #         import traceback
-# #         print(f"ERROR in rag_memory_stream: {e}")
-# #         print(f"Detailed error: {traceback.format_exc()}")
-# #         nickname = session.get_user().get("Nickname", "there")
-# #         error_msg = f"I'm sorry, {nickname}. I encountered an error processing your request. Let's try a different question."
-# #         session.add_to_history("assistant", error_msg)
-# #         yield error_msg
-# # def collect_user_info(nickname, session_id):
-# #     """Store user details and initialize session."""
-# #     if not nickname or nickname.strip() == "":
-# #         return "Nickname is required to proceed.", gr.update(visible=False), gr.update(visible=True), []
-# #     # Store user info for chat session
-# #     user_info = {
-# #         "Nickname": nickname.strip(),
-# #         "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
-# #     }
-# #     # Get the session and set user info
-# #     session = session_manager.get_session(session_id)
-# #     session.set_user(user_info)
-# #     # Generate welcome message
-# #     welcome_message = session.get_welcome_message()
-# #     # Return welcome message and update UI
-# #     return welcome_message, gr.update(visible=True), gr.update(visible=False), [(None, welcome_message)]
-# # def get_css():
-# #     """Define CSS for the UI."""
-# #     return """
-# #     :root {
-# #         --primary: #4E6BBF;
-# #         --primary-light: #697BBF;
-# #         --text-primary: #333333;
-# #         --text-secondary: #666666;
-# #         --background: #F9FAFC;
-# #         --card-bg: #FFFFFF;
-# #         --border: #E1E5F0;
-# #         --shadow: rgba(0, 0, 0, 0.05);
-# #     }
-# #     body, .gradio-container {
-# #         margin: 0;
-# #         padding: 0;
-# #         width: 100vw;
-# #         height: 100vh;
-# #         display: flex;
-# #         flex-direction: column;
-# #         justify-content: center;
-# #         align-items: center;
-# #         background: var(--background);
-# #         color: var(--text-primary);
-# #         font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-# #     }
-# #     .gradio-container {
-# #         max-width: 100%;
-# #         max-height: 100%;
-# #     }
-# #     .gr-box {
-# #         background: var(--card-bg);
-# #         color: var(--text-primary);
-# #         border-radius: 12px;
-# #         padding: 2rem;
-# #         border: 1px solid var(--border);
-# #         box-shadow: 0 4px 12px var(--shadow);
-# #     }
-# #     .gr-button-primary {
-# #         background: var(--primary);
-# #         color: white;
-# #         padding: 12px 24px;
-# #         border-radius: 8px;
-# #         transition: all 0.3s ease;
-# #         border: none;
-# #         font-weight: bold;
-# #     }
-# #     .gr-button-primary:hover {
-# #         transform: translateY(-1px);
-# #         box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
-# #         background: var(--primary-light);
-# #     }
-# #     footer {
-# #         text-align: center;
-# #         color: var(--text-secondary);
-# #         padding: 1rem;
-# #         font-size: 0.9em;
-# #     }
-# #     .gr-markdown h2 {
-# #         color: var(--primary);
-# #         margin-bottom: 0.5rem;
-# #         font-size: 1.8em;
-# #     }
-# #     .gr-markdown h3 {
-# #         color: var(--text-secondary);
-# #         margin-bottom: 1.5rem;
-# #         font-weight: normal;
-# #     }
-# #     #chatbot_container .chat-title h1,
-# #     #chatbot_container .empty-chatbot {
-# #         color: var(--primary);
-# #     }
-# #     #input_nickname {
-# #         padding: 12px;
-# #         border-radius: 8px;
-# #         border: 1px solid var(--border);
-# #         background: var(--card-bg);
-# #         transition: all 0.3s ease;
-# #     }
-# #     #input_nickname:focus {
-# #         border-color: var(--primary);
-# #         box-shadow: 0 0 0 2px rgba(78, 107, 191, 0.2);
-# #         outline: none;
-# #     }
-# #     .chatbot-container .message.user {
-# #         background: #E8F0FE;
-# #         border-radius: 12px 12px 0 12px;
-# #     }
-# #     .chatbot-container .message.bot {
-# #         background: #F5F7FF;
-# #         border-radius: 12px 12px 12px 0;
-# #     }
-# #     """
-# # def create_ui():
-# #     """Create and configure the Gradio UI."""
-# #     with gr.Blocks(css=get_css(), theme=gr.themes.Soft()) as demo:
-# #         # Create a unique session ID for this browser tab
-# #         session_id = gr.State(value=f"session_{int(time.time())}_{os.urandom(4).hex()}")
-# #         # Registration section
-# #         with gr.Column(visible=True, elem_id="registration_container") as registration_container:
-# #             gr.Markdown(f"## Welcome to {APP_NAME}")
-# #             gr.Markdown("### Your privacy is important to us. Please provide a nickname to continue.")
-# #             with gr.Row():
-# #                 first_name = gr.Textbox(
-# #                     label="Nickname",
-# #                     placeholder="Enter your nickname",
-# #                     scale=1,
-# #                     elem_id="input_nickname"
-# #                 )
-# #             with gr.Row():
-# #                 submit_btn = gr.Button("Start Chatting", variant="primary", scale=2)
-# #             response_message = gr.Markdown()
-# #         # Chatbot section (initially hidden)
-# #         with gr.Column(visible=False, elem_id="chatbot_container") as chatbot_container:
-# #             # Create a custom chat interface to pass session_id to our function
-# #             chatbot = gr.Chatbot(
-# #                 elem_id="chatbot",
-# #                 height=500,
-# #                 show_label=False
-# #             )
-# #             with gr.Row():
-# #                 msg = gr.Textbox(
-# #                     placeholder="Type your message here...",
-# #                     show_label=False,
-# #                     container=False,
-# #                     scale=9
-# #                 )
-# #                 submit = gr.Button("Send", scale=1, variant="primary")
-# #             examples = gr.Examples(
-# #                 examples=[
-# #                     "What resources are available for GBV victims?",
-# #                     "How can I report an incident?",
-# #                     "What are my legal rights?",
-# #                     "I need help, what should I do first?"
-# #                 ],
-# #                 inputs=msg
-# #             )
-# #             # Footer with version info
-# #             gr.Markdown(f"{APP_NAME} {APP_VERSION} © 2025")
-# #             # Handle chat message submission
-# #             def respond(message, chat_history, session_id):
-# #                 bot_message = ""
-# #                 for chunk in rag_memory_stream(message, chat_history, session_id):
-# #                     bot_message += chunk
-# #                 chat_history.append((message, bot_message))
-# #                 return "", chat_history
-# #             msg.submit(respond, [msg, chatbot, session_id], [msg, chatbot])
-# #             submit.click(respond, [msg, chatbot, session_id], [msg, chatbot])
-# #         # Handle user registration
-# #         submit_btn.click(
-# #             collect_user_info,
-# #             inputs=[first_name, session_id],
-# #             outputs=[response_message, chatbot_container, registration_container, chatbot]
-# #         )
-# #     return demo
-# # def launch_app():
-# #     """Launch the Gradio interface."""
-# #     ui = create_ui()
-# #     ui.launch(share=True)
-# # # Main execution
-# # if __name__ == "__main__":
 # #     try:
-# #         # Initialize and launch the assistant
-# #         initialize_assistant()
-# #         launch_app()
 # #     except Exception as e:
-# #         import traceback
-# #         print(f"❌ Fatal error initializing GBV Assistant: {e}")
-# #         print(traceback.format_exc())
-# #         # Create a minimal emergency UI to display the error
-# #         with gr.Blocks() as error_demo:
-# #             gr.Markdown("## System Error")
-# #             gr.Markdown(f"An error occurred while initializing the application: {str(e)}")
-# #             gr.Markdown("Please check your configuration and try again.")
-# #         error_demo.launch(share=True, inbrowser=True, debug=True)
-# ############################################################################################################
 # import os
@@ -669,6 +943,36 @@
 # import gradio as gr
 # from PyPDF2 import PdfReader
 # from langchain_huggingface import HuggingFaceEmbeddings
 # groq_api_key= os.environ.get('GBV')
@@ -809,9 +1113,6 @@
 #   chunked_texts.extend(chunk_string(text))
 # vectorstore = Chroma(
 #     collection_name="GBVR_Dataset",
 #     embedding_function=embed_model,
@@ -822,7 +1123,7 @@
 # vectorstore.add_texts(chunked_texts)
 # template = ("""
 #     You are a friendly, intelligent, and conversational AI assistant designed to provide accurate, engaging, and human-like responses based on the given context. Your goal is to extract relevant details from the provided context: {context} and assist the user effectively. Follow these guidelines:
@@ -848,7 +1149,8 @@
 #          - "I don't have that information at the moment, but I'm happy to help with something else! 😊"
 #     6. **Personalized Interaction**
-#        - If user history is available, tailor responses based on their previous interactions for a more natural and engaging conversation.
 #     7. **Direct, Concise Responses**
 #        - If the user requests specific data, provide only the requested details without unnecessary explanations unless asked.
@@ -868,30 +1170,62 @@
 # retriever = vectorstore.as_retriever()
-# from langchain_core.output_parsers import StrOutputParser
-# from langchain_core.runnables import RunnablePassthrough
-# llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key )
-# rag_chain = (
-#     {"context": retriever, "question": RunnablePassthrough()}
-#     | rag_prompt
-#     | llm
-#     | StrOutputParser()
-# )
 # # Define the RAG memory stream function
 # def rag_memory_stream(message, history):
 #     partial_text = ""
-#     for new_text in rag_chain.stream(message):  # Replace with actual streaming logic
-#         partial_text += new_text
-#         yield partial_text
 # # Title with emojis
 # title = "GBVR Chatbot"
 # # Custom CSS for styling the interface
 # custom_css = """
 # body {
@@ -912,7 +1246,6 @@
 # .gr-textbox:focus, .gr-button:focus {
 #     outline: none; /* Remove outline focus for a cleaner look */
 # }
 # """
 # # Create the Chat Interface
@@ -927,8 +1260,6 @@
 # # Launch the app
 # if __name__ == "__main__":
 #     demo.launch(share=True, inbrowser=True, debug=True)
 import os
 from langchain_groq import ChatGroq
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
@@ -1114,7 +1445,7 @@ for text in processed_texts:
 vectorstore = Chroma(
-    collection_name="GBVR_Dataset",
     embedding_function=embed_model,
     persist_directory="./",
 )
@@ -1228,12 +1559,15 @@ title = "GBVR Chatbot"
 # Custom CSS for styling the interface
 custom_css = """
 body {
     font-family: "Arial", serif;
 }
 .gradio-container {
     font-family: "Times New Roman", serif;
 }
 .gr-button {
     background-color: #007bff; /* Blue button */
     color: white;
@@ -1243,18 +1577,44 @@ body {
     padding: 10px 20px;
     cursor: pointer;
 }
 .gr-textbox:focus, .gr-button:focus {
     outline: none; /* Remove outline focus for a cleaner look */
 }
 """
-# Create the Chat Interface
 demo = gr.ChatInterface(
     fn=rag_memory_stream,
     title=title,
     fill_height=True,
     theme="soft",
     css=custom_css, # Apply the custom CSS
 )
 # Launch the app

+# # # import os
+# # # import time
+# # # import pandas as pd
+# # # import gradio as gr
+# # # from langchain_groq import ChatGroq
+# # # from langchain_huggingface import HuggingFaceEmbeddings
+# # # from langchain_community.vectorstores import Chroma
+# # # from langchain_core.prompts import PromptTemplate
+# # # from langchain_core.output_parsers import StrOutputParser
+# # # from langchain_core.runnables import RunnablePassthrough
+# # # from PyPDF2 import PdfReader
+# # # # Configuration constants
+# # # COLLECTION_NAME = "GBVRS"
+# # # DATA_FOLDER = "./"
+# # # APP_VERSION = "v1.0.0"
+# # # APP_NAME = "Ijwi ry'Ubufasha"
+# # # MAX_HISTORY_MESSAGES = 8  # Limit history to avoid token limits
+# # # # Global variables for application state
+# # # llm = None
+# # # embed_model = None
+# # # vectorstore = None
+# # # retriever = None
+# # # rag_chain = None
+# # # # User session management
+# # # class UserSession:
+# # #     def __init__(self, session_id, llm):
+# # #         """Initialize a user session with unique ID and language model."""
+# # #         self.session_id = session_id
+# # #         self.user_info = {"Nickname": "Guest"}
+# # #         self.conversation_history = []
+# # #         self.llm = llm
+# # #         self.welcome_message = None
+# # #         self.last_activity = time.time()
+# # #     def set_user(self, user_info):
+# # #         """Set user information and generate welcome message."""
+# # #         self.user_info = user_info
+# # #         self.generate_welcome_message()
+# # #         # Initialize conversation history with welcome message
+# # #         welcome = self.get_welcome_message()
+# # #         self.conversation_history = [
+# # #             {"role": "assistant", "content": welcome},
+# # #         ]
+# # #     def get_user(self):
+# # #         """Get current user information."""
+# # #         return self.user_info
+# # #     def generate_welcome_message(self):
+# # #         """Generate a dynamic welcome message using the LLM."""
+# # #         try:
+# # #             nickname = self.user_info.get("Nickname", "Guest")
+# # #             # Use the LLM to generate the message
+# # #             prompt = (
+# # #                 f"Create a brief and warm welcome message for {nickname} that's about 1-2 sentences. "
+# # #                 f"Emphasize this is a safe space for discussing gender-based violence issues "
+# # #                 f"and that we provide support and resources. Keep it warm and reassuring."
+# # #             )
+# # #             response = self.llm.invoke(prompt)
+# # #             welcome = response.content.strip()
+# # #             # Format the message with HTML styling
+# # #             self.welcome_message = (
+# # #                 f"<div style='font-size: 18px; color: #4E6BBF;'>"
+# # #                 f"{welcome}"
+# # #                 f"</div>"
+# # #             )
+# # #         except Exception as e:
+# # #             # Fallback welcome message
+# # #             nickname = self.user_info.get("Nickname", "Guest")
+# # #             self.welcome_message = (
+# # #                 f"<div style='font-size: 18px; color: #4E6BBF;'>"
+# # #                 f"Welcome, {nickname}! You're in a safe space. We're here to provide support with "
+# # #                 f"gender-based violence issues and connect you with resources that can help."
+# # #                 f"</div>"
+# # #             )
+# # #     def get_welcome_message(self):
+# # #         """Get the formatted welcome message."""
+# # #         if not self.welcome_message:
+# # #             self.generate_welcome_message()
+# # #         return self.welcome_message
+# # #     def add_to_history(self, role, message):
+# # #         """Add a message to the conversation history."""
+# # #         self.conversation_history.append({"role": role, "content": message})
+# # #         self.last_activity = time.time()
+# # #         # Trim history if it gets too long
+# # #         if len(self.conversation_history) > MAX_HISTORY_MESSAGES * 2:  # Keep pairs of messages
+# # #             # Keep the first message (welcome) and the most recent messages
+# # #             self.conversation_history = [self.conversation_history[0]] + self.conversation_history[-MAX_HISTORY_MESSAGES*2+1:]
+# # #     def get_conversation_history(self):
+# # #         """Get the full conversation history."""
+# # #         return self.conversation_history
+# # #     def get_formatted_history(self):
+# # #         """Get conversation history formatted as a string for the LLM."""
+# # #         # Skip the welcome message and only include the last few exchanges
+# # #         recent_history = self.conversation_history[1:] if len(self.conversation_history) > 1 else []
+# # #         # Limit to last MAX_HISTORY_MESSAGES exchanges
+# # #         if len(recent_history) > MAX_HISTORY_MESSAGES * 2:
+# # #             recent_history = recent_history[-MAX_HISTORY_MESSAGES*2:]
+# # #         formatted_history = ""
+# # #         for entry in recent_history:
+# # #             role = "User" if entry["role"] == "user" else "Assistant"
+# # #             # Truncate very long messages to avoid token limits
+# # #             content = entry["content"]
+# # #             if len(content) > 500:  # Limit message length
+# # #                 content = content[:500] + "..."
+# # #             formatted_history += f"{role}: {content}\n\n"
+# # #         return formatted_history
+# # #     def is_expired(self, timeout_seconds=3600):
+# # #         """Check if the session has been inactive for too long."""
+# # #         return (time.time() - self.last_activity) > timeout_seconds
+# # # # Session manager to handle multiple users
+# # # class SessionManager:
+# # #     def __init__(self):
+# # #         """Initialize the session manager."""
+# # #         self.sessions = {}
+# # #         self.session_timeout = 3600  # 1 hour timeout
+# # #     def get_session(self, session_id):
+# # #         """Get an existing session or create a new one."""
+# # #         # Clean expired sessions first
+# # #         self._clean_expired_sessions()
+# # #         # Create new session if needed
+# # #         if session_id not in self.sessions:
+# # #             self.sessions[session_id] = UserSession(session_id, llm)
+# # #         return self.sessions[session_id]
+# # #     def _clean_expired_sessions(self):
+# # #         """Remove expired sessions to free up memory."""
+# # #         expired_keys = []
+# # #         for key, session in self.sessions.items():
+# # #             if session.is_expired(self.session_timeout):
+# # #                 expired_keys.append(key)
+# # #         for key in expired_keys:
+# # #             del self.sessions[key]
+# # # # Initialize the session manager
+# # # session_manager = SessionManager()
+# # # def initialize_assistant():
+# # #     """Initialize the assistant with necessary components and configurations."""
+# # #     global llm, embed_model, vectorstore, retriever, rag_chain
+# # #     # Initialize API key - try both possible key names
+# # #     groq_api_key = os.environ.get('GBV') or os.environ.get('GBV')
+# # #     if not groq_api_key:
+# # #         print("WARNING: No GROQ API key found in userdata.")
+# # #     # Initialize LLM - Default to Llama model which is more widely available
+# # #     llm = ChatGroq(
+# # #         model="llama-3.3-70b-versatile",  # More reliable than whisper model
+# # #         api_key=groq_api_key
+# # #     )
+# # #     # Set up embedding model
+# # #     try:
+# # #         embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
+# # #     except Exception as e:
+# # #         # Fallback to smaller model
+# # #         embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# # #     # Process data and create vector store
+# # #     print("Processing data files...")
+# # #     data = process_data_files()
+# # #     print("Creating vector store...")
+# # #     vectorstore = create_vectorstore(data)
+# # #     retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
+# # #     # Create RAG chain
+# # #     print("Setting up RAG chain...")
+# # #     rag_chain = create_rag_chain()
+# # #     print(f"✅ {APP_NAME} initialized successfully")
+# # # def process_data_files():
+# # #     """Process all data files from the specified folder."""
+# # #     context_data = []
+# # #     try:
+# # #         if not os.path.exists(DATA_FOLDER):
+# # #             print(f"WARNING: Data folder does not exist: {DATA_FOLDER}")
+# # #             return context_data
+# # #         # Get list of data files
+# # #         all_files = os.listdir(DATA_FOLDER)
+# # #         data_files = [f for f in all_files if f.lower().endswith(('.csv', '.xlsx', '.xls'))]
+# # #         if not data_files:
+# # #             print(f"WARNING: No data files found in: {DATA_FOLDER}")
+# # #             return context_data
+# # #         # Process each file
+# # #         for index, file_name in enumerate(data_files, 1):
+# # #             print(f"Processing file {index}/{len(data_files)}: {file_name}")
+# # #             file_path = os.path.join(DATA_FOLDER, file_name)
+# # #             try:
+# # #                 # Read file based on extension
+# # #                 if file_name.lower().endswith('.csv'):
+# # #                     df = pd.read_csv(file_path)
+# # #                 else:
+# # #                     df = pd.read_excel(file_path)
+# # #                 # Check if column 3 exists (source data is in third column)
+# # #                 if df.shape[1] > 2:
+# # #                     column_data = df.iloc[:, 2].dropna().astype(str).tolist()
+# # #                     # Each row becomes one chunk with metadata
+# # #                     for i, text in enumerate(column_data):
+# # #                         if text and len(text.strip()) > 0:
+# # #                             context_data.append({
+# # #                                 "page_content": text,
+# # #                                 "metadata": {
+# # #                                     "source": file_name,
+# # #                                     "row": i+1
+# # #                                 }
+# # #                             })
+# # #                 else:
+# # #                     print(f"WARNING: File {file_name} has fewer than 3 columns.")
+# # #             except Exception as e:
+# # #                 print(f"ERROR processing file {file_name}: {e}")
+# # #         print(f"✅ Created {len(context_data)} chunks from {len(data_files)} files.")
+# # #     except Exception as e:
+# # #         print(f"ERROR accessing data folder: {e}")
+# # #     return context_data
+# # # def create_vectorstore(data):
+# # #     """
+# # #     Creates and returns a Chroma vector store populated with the provided data.
+# # #     Parameters:
+# # #         data (list): A list of dictionaries, each containing 'page_content' and 'metadata'.
+# # #     Returns:
+# # #         Chroma: The populated Chroma vector store instance.
+# # #     """
+# # #     # Initialize the vector store
+# # #     vectorstore = Chroma(
+# # #         collection_name=COLLECTION_NAME,
+# # #         embedding_function=embed_model,
+# # #         persist_directory="./"
+# # #     )
+# # #     if not data:
+# # #         print("⚠️ No data provided. Returning an empty vector store.")
+# # #         return vectorstore
+# # #     try:
+# # #         # Extract text and metadata from the data
+# # #         texts = [doc["page_content"] for doc in data]
+# # #         # Add the texts and metadata to the vector store
+# # #         vectorstore.add_texts(texts)
+# # #     except Exception as e:
+# # #         print(f"❌ Failed to add documents to vector store: {e}")
+# # #     # Fix: Return vectorstore instead of vs
+# # #     return vectorstore  # Changed from 'return vs' to 'return vectorstore'
+# # # def create_rag_chain():
+# # #     """Create the RAG chain for processing user queries."""
+# # #     # Define the prompt template
+# # #     template = """
+# # #      You are a compassionate and supportive AI assistant specializing in helping individuals affected by Gender-Based Violence (GBV). Your responses must be based EXCLUSIVELY on the information provided in the context. Your primary goal is to provide emotionally intelligent support while maintaining appropriate boundaries.
+# # #         **Previous conversation:** {conversation_history}
+# # #         **Context information:** {context}
+# # #         **User's Question:** {question}
+# # #         When responding follow these guidelines:
+# # #         1. **Strict Context Adherence**
+# # #            - Only use information that appears in the provided {context}
+# # #            - If the answer is not found in the context, state "I don't have that information in my available resources" rather than generating a response
+# # #         2. **Personalized Communication**
+# # #            - Avoid contractions (e.g., use I am instead of I'm)
+# # #            - Incorporate thoughtful pauses or reflective questions when the conversation involves difficult topics
+# # #            - Use selective emojis (😊, 🤗, ❤️) only when tone-appropriate and not during crisis discussions
+# # #            - Balance warmth with professionalism
+# # #         3. **Emotional Intelligence**
+# # #            - Validate feelings without judgment
+# # #            - Offer reassurance when appropriate, always centered on empowerment
+# # #            - Adjust your tone based on the emotional state conveyed
+# # #         4. **Conversation Management**
+# # #            - Refer to {conversation_history} to maintain continuity and avoid repetition
+# # #            - Use clear paragraph breaks for readability
+# # #         5. **Information Delivery**
+# # #            - Extract only relevant information from {context} that directly addresses the question
+# # #            - Present information in accessible, non-technical language
+# # #            - When information is unavailable, respond with: "I don't have that specific information right now, {first_name}. Would it be helpful if I focus on [alternative support option]?"
+# # #         6. **Safety and Ethics**
+# # #            - Do not generate any speculative content or advice not supported by the context
+# # #            - If the context contains safety information, prioritize sharing that information
+# # #         Your response must come entirely from the provided context, maintaining the supportive tone while never introducing information from outside the provided materials.
+# # #         **Context:** {context}
+# # #         **User's Question:** {question}
+# # #         **Your Response:**
+# # #     """
+# # #     rag_prompt = PromptTemplate.from_template(template)
+# # #     def get_context_and_question(query_with_session):
+# # #         # Extract query and session_id
+# # #         query = query_with_session["query"]
+# # #         session_id = query_with_session["session_id"]
+# # #         # Get the user session
+# # #         session = session_manager.get_session(session_id)
+# # #         user_info = session.get_user()
+# # #         first_name = user_info.get("Nickname", "User")
+# # #         conversation_hist = session.get_formatted_history()
+# # #         try:
+# # #             # Retrieve relevant documents
+# # #             retrieved_docs = retriever.invoke(query)
+# # #             context_str = format_context(retrieved_docs)
+# # #         except Exception as e:
+# # #             print(f"ERROR retrieving documents: {e}")
+# # #             context_str = "No relevant information found."
+# # #         # Return the combined inputs for the prompt
+# # #         return {
+# # #             "context": context_str,
+# # #             "question": query,
+# # #             "first_name": first_name,
+# # #             "conversation_history": conversation_hist
+# # #         }
+# # #     # Build the chain
+# # #     try:
+# # #         chain = (
+# # #             RunnablePassthrough()
+# # #             | get_context_and_question
+# # #             | rag_prompt
+# # #             | llm
+# # #             | StrOutputParser()
+# # #         )
+# # #         return chain
+# # #     except Exception as e:
+# # #         print(f"ERROR creating RAG chain: {e}")
+# # #         # Return a simple function as fallback
+# # #         def fallback_chain(query_with_session):
+# # #             session_id = query_with_session["session_id"]
+# # #             session = session_manager.get_session(session_id)
+# # #             nickname = session.get_user().get("Nickname", "there")
+# # #             return f"I'm here to help you, {nickname}, but I'm experiencing some technical difficulties right now. Please try again shortly."
+# # #         return fallback_chain
+# # # def format_context(retrieved_docs):
+# # #     """Format retrieved documents into a string context."""
+# # #     if not retrieved_docs:
+# # #         return "No relevant information available."
+# # #     return "\n\n".join([doc.page_content for doc in retrieved_docs])
+# # # def rag_memory_stream(message, history, session_id):
+# # #     """Process user message and generate response with memory."""
+# # #     # Get the user session
+# # #     session = session_manager.get_session(session_id)
+# # #     # Add user message to history
+# # #     session.add_to_history("user", message)
+# # #     try:
+# # #         # Get response from RAG chain
+# # #         print(f"Processing message for session {session_id}: {message[:50]}...")
+# # #         # Pass both query and session_id to the chain
+# # #         response = rag_chain.invoke({
+# # #             "query": message,
+# # #             "session_id": session_id
+# # #         })
+# # #         print(f"Generated response: {response[:50]}...")
+# # #         # Add assistant response to history
+# # #         session.add_to_history("assistant", response)
+# # #         # Yield the response
+# # #         yield response
+# # #     except Exception as e:
+# # #         import traceback
+# # #         print(f"ERROR in rag_memory_stream: {e}")
+# # #         print(f"Detailed error: {traceback.format_exc()}")
+# # #         nickname = session.get_user().get("Nickname", "there")
+# # #         error_msg = f"I'm sorry, {nickname}. I encountered an error processing your request. Let's try a different question."
+# # #         session.add_to_history("assistant", error_msg)
+# # #         yield error_msg
+# # # def collect_user_info(nickname, session_id):
+# # #     """Store user details and initialize session."""
+# # #     if not nickname or nickname.strip() == "":
+# # #         return "Nickname is required to proceed.", gr.update(visible=False), gr.update(visible=True), []
+# # #     # Store user info for chat session
+# # #     user_info = {
+# # #         "Nickname": nickname.strip(),
+# # #         "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
+# # #     }
+# # #     # Get the session and set user info
+# # #     session = session_manager.get_session(session_id)
+# # #     session.set_user(user_info)
+# # #     # Generate welcome message
+# # #     welcome_message = session.get_welcome_message()
+# # #     # Return welcome message and update UI
+# # #     return welcome_message, gr.update(visible=True), gr.update(visible=False), [(None, welcome_message)]
+# # # def get_css():
+# # #     """Define CSS for the UI."""
+# # #     return """
+# # #     :root {
+# # #         --primary: #4E6BBF;
+# # #         --primary-light: #697BBF;
+# # #         --text-primary: #333333;
+# # #         --text-secondary: #666666;
+# # #         --background: #F9FAFC;
+# # #         --card-bg: #FFFFFF;
+# # #         --border: #E1E5F0;
+# # #         --shadow: rgba(0, 0, 0, 0.05);
+# # #     }
+# # #     body, .gradio-container {
+# # #         margin: 0;
+# # #         padding: 0;
+# # #         width: 100vw;
+# # #         height: 100vh;
+# # #         display: flex;
+# # #         flex-direction: column;
+# # #         justify-content: center;
+# # #         align-items: center;
+# # #         background: var(--background);
+# # #         color: var(--text-primary);
+# # #         font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+# # #     }
+# # #     .gradio-container {
+# # #         max-width: 100%;
+# # #         max-height: 100%;
+# # #     }
+# # #     .gr-box {
+# # #         background: var(--card-bg);
+# # #         color: var(--text-primary);
+# # #         border-radius: 12px;
+# # #         padding: 2rem;
+# # #         border: 1px solid var(--border);
+# # #         box-shadow: 0 4px 12px var(--shadow);
+# # #     }
+# # #     .gr-button-primary {
+# # #         background: var(--primary);
+# # #         color: white;
+# # #         padding: 12px 24px;
+# # #         border-radius: 8px;
+# # #         transition: all 0.3s ease;
+# # #         border: none;
+# # #         font-weight: bold;
+# # #     }
+# # #     .gr-button-primary:hover {
+# # #         transform: translateY(-1px);
+# # #         box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
+# # #         background: var(--primary-light);
+# # #     }
+# # #     footer {
+# # #         text-align: center;
+# # #         color: var(--text-secondary);
+# # #         padding: 1rem;
+# # #         font-size: 0.9em;
+# # #     }
+# # #     .gr-markdown h2 {
+# # #         color: var(--primary);
+# # #         margin-bottom: 0.5rem;
+# # #         font-size: 1.8em;
+# # #     }
+# # #     .gr-markdown h3 {
+# # #         color: var(--text-secondary);
+# # #         margin-bottom: 1.5rem;
+# # #         font-weight: normal;
+# # #     }
+# # #     #chatbot_container .chat-title h1,
+# # #     #chatbot_container .empty-chatbot {
+# # #         color: var(--primary);
+# # #     }
+# # #     #input_nickname {
+# # #         padding: 12px;
+# # #         border-radius: 8px;
+# # #         border: 1px solid var(--border);
+# # #         background: var(--card-bg);
+# # #         transition: all 0.3s ease;
+# # #     }
+# # #     #input_nickname:focus {
+# # #         border-color: var(--primary);
+# # #         box-shadow: 0 0 0 2px rgba(78, 107, 191, 0.2);
+# # #         outline: none;
+# # #     }
+# # #     .chatbot-container .message.user {
+# # #         background: #E8F0FE;
+# # #         border-radius: 12px 12px 0 12px;
+# # #     }
+# # #     .chatbot-container .message.bot {
+# # #         background: #F5F7FF;
+# # #         border-radius: 12px 12px 12px 0;
+# # #     }
+# # #     """
+# # # def create_ui():
+# # #     """Create and configure the Gradio UI."""
+# # #     with gr.Blocks(css=get_css(), theme=gr.themes.Soft()) as demo:
+# # #         # Create a unique session ID for this browser tab
+# # #         session_id = gr.State(value=f"session_{int(time.time())}_{os.urandom(4).hex()}")
+# # #         # Registration section
+# # #         with gr.Column(visible=True, elem_id="registration_container") as registration_container:
+# # #             gr.Markdown(f"## Welcome to {APP_NAME}")
+# # #             gr.Markdown("### Your privacy is important to us. Please provide a nickname to continue.")
+# # #             with gr.Row():
+# # #                 first_name = gr.Textbox(
+# # #                     label="Nickname",
+# # #                     placeholder="Enter your nickname",
+# # #                     scale=1,
+# # #                     elem_id="input_nickname"
+# # #                 )
+# # #             with gr.Row():
+# # #                 submit_btn = gr.Button("Start Chatting", variant="primary", scale=2)
+# # #             response_message = gr.Markdown()
+# # #         # Chatbot section (initially hidden)
+# # #         with gr.Column(visible=False, elem_id="chatbot_container") as chatbot_container:
+# # #             # Create a custom chat interface to pass session_id to our function
+# # #             chatbot = gr.Chatbot(
+# # #                 elem_id="chatbot",
+# # #                 height=500,
+# # #                 show_label=False
+# # #             )
+# # #             with gr.Row():
+# # #                 msg = gr.Textbox(
+# # #                     placeholder="Type your message here...",
+# # #                     show_label=False,
+# # #                     container=False,
+# # #                     scale=9
+# # #                 )
+# # #                 submit = gr.Button("Send", scale=1, variant="primary")
+# # #             examples = gr.Examples(
+# # #                 examples=[
+# # #                     "What resources are available for GBV victims?",
+# # #                     "How can I report an incident?",
+# # #                     "What are my legal rights?",
+# # #                     "I need help, what should I do first?"
+# # #                 ],
+# # #                 inputs=msg
+# # #             )
+# # #             # Footer with version info
+# # #             gr.Markdown(f"{APP_NAME} {APP_VERSION} © 2025")
+# # #             # Handle chat message submission
+# # #             def respond(message, chat_history, session_id):
+# # #                 bot_message = ""
+# # #                 for chunk in rag_memory_stream(message, chat_history, session_id):
+# # #                     bot_message += chunk
+# # #                 chat_history.append((message, bot_message))
+# # #                 return "", chat_history
+# # #             msg.submit(respond, [msg, chatbot, session_id], [msg, chatbot])
+# # #             submit.click(respond, [msg, chatbot, session_id], [msg, chatbot])
+# # #         # Handle user registration
+# # #         submit_btn.click(
+# # #             collect_user_info,
+# # #             inputs=[first_name, session_id],
+# # #             outputs=[response_message, chatbot_container, registration_container, chatbot]
+# # #         )
+# # #     return demo
+# # # def launch_app():
+# # #     """Launch the Gradio interface."""
+# # #     ui = create_ui()
+# # #     ui.launch(share=True)
+# # # # Main execution
+# # # if __name__ == "__main__":
+# # #     try:
+# # #         # Initialize and launch the assistant
+# # #         initialize_assistant()
+# # #         launch_app()
+# # #     except Exception as e:
+# # #         import traceback
+# # #         print(f"❌ Fatal error initializing GBV Assistant: {e}")
+# # #         print(traceback.format_exc())
+# # #         # Create a minimal emergency UI to display the error
+# # #         with gr.Blocks() as error_demo:
+# # #             gr.Markdown("## System Error")
+# # #             gr.Markdown(f"An error occurred while initializing the application: {str(e)}")
+# # #             gr.Markdown("Please check your configuration and try again.")
+# # #         error_demo.launch(share=True, inbrowser=True, debug=True)
+# # ############################################################################################################
+# # import os
+# # from langchain_groq import ChatGroq
+# # from langchain.prompts import ChatPromptTemplate, PromptTemplate
+# # from langchain.output_parsers import ResponseSchema, StructuredOutputParser
+# # from urllib.parse import urljoin, urlparse
+# # import requests
+# # from io import BytesIO
+# # from langchain_chroma import Chroma
+# # import requests
+# # from bs4 import BeautifulSoup
+# # from langchain_core.prompts import ChatPromptTemplate
+# # import gradio as gr
+# # from PyPDF2 import PdfReader
+# # from langchain_huggingface import HuggingFaceEmbeddings
+# # groq_api_key= os.environ.get('GBV')
+# # embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
+# # def scrape_websites(base_urls):
 # #     try:
+# #         visited_links = set()  # To avoid revisiting the same link
+# #         content_by_url = {}  # Store content from each URL
+# #         for base_url in base_urls:
+# #             if not base_url.strip():
+# #                 continue  # Skip empty or invalid URLs
+# #             print(f"Scraping base URL: {base_url}")
+# #             html_content = fetch_page_content(base_url)
+# #             if html_content:
+# #                 cleaned_content = clean_body_content(html_content)
+# #                 content_by_url[base_url] = cleaned_content
+# #                 visited_links.add(base_url)
+# #                 # Extract and process all internal links
+# #                 soup = BeautifulSoup(html_content, "html.parser")
+# #                 links = extract_internal_links(base_url, soup)
+# #                 for link in links:
+# #                     if link not in visited_links:
+# #                         print(f"Scraping link: {link}")
+# #                         page_content = fetch_page_content(link)
+# #                         if page_content:
+# #                             cleaned_content = clean_body_content(page_content)
+# #                             content_by_url[link] = cleaned_content
+# #                             visited_links.add(link)
+# #                         # If the link is a PDF file, extract its content
+# #                         if link.lower().endswith('.pdf'):
+# #                             print(f"Extracting PDF content from: {link}")
+# #                             pdf_content = extract_pdf_text(link)
+# #                             if pdf_content:
+# #                                 content_by_url[link] = pdf_content
+# #         return content_by_url
 # #     except Exception as e:
+# #         print(f"Error during scraping: {e}")
+# #         return {}
+# # def fetch_page_content(url):
+# #     try:
+# #         response = requests.get(url, timeout=10)
+# #         response.raise_for_status()
+# #         return response.text
+# #     except requests.exceptions.RequestException as e:
+# #         print(f"Error fetching {url}: {e}")
+# #         return None
+# # def extract_internal_links(base_url, soup):
+# #     links = set()
+# #     for anchor in soup.find_all("a", href=True):
+# #         href = anchor["href"]
+# #         full_url = urljoin(base_url, href)
+# #         if is_internal_link(base_url, full_url):
+# #             links.add(full_url)
+# #     return links
+# # def is_internal_link(base_url, link_url):
+# #     base_netloc = urlparse(base_url).netloc
+# #     link_netloc = urlparse(link_url).netloc
+# #     return base_netloc == link_netloc
+# # def extract_pdf_text(pdf_url):
+# #     try:
+# #         response = requests.get(pdf_url)
+# #         response.raise_for_status()
+# #         with BytesIO(response.content) as file:
+# #             reader = PdfReader(file)
+# #             pdf_text = ""
+# #             for page in reader.pages:
+# #                 pdf_text += page.extract_text()
+# #         return pdf_text if pdf_text else None
+# #     except requests.exceptions.RequestException as e:
+# #         print(f"Error fetching PDF {pdf_url}: {e}")
+# #         return None
+# #     except Exception as e:
+# #         print(f"Error reading PDF {pdf_url}: {e}")
+# #         return None
+# # def clean_body_content(html_content):
+# #     soup = BeautifulSoup(html_content, "html.parser")
+# #     for script_or_style in soup(["script", "style"]):
+# #         script_or_style.extract()
+# #     cleaned_content = soup.get_text(separator="\n")
+# #     cleaned_content = "\n".join(
+# #         line.strip() for line in cleaned_content.splitlines() if line.strip()
+# #     )
+# #     return cleaned_content
+# # if __name__ == "__main__":
+# #     website = ["https://haguruka.org.rw/"
+# #                ]
+# #     all_content = scrape_websites(website)
+# #     temp_list = []
+# #     for url, content in all_content.items():
+# #         temp_list.append((url, content))
+# # processed_texts = []
+# # for element in temp_list:
+# #     if isinstance(element, tuple):
+# #         url, content = element
+# #         processed_texts.append(f"url: {url}, content: {content}")
+# #     elif isinstance(element, str):
+# #         processed_texts.append(element)
+# #     else:
+# #         processed_texts.append(str(element))
+# # def chunk_string(s, chunk_size=1000):
+# #     return [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]
+# # chunked_texts = []
+# # for text in processed_texts:
+# #   chunked_texts.extend(chunk_string(text))
+# # vectorstore = Chroma(
+# #     collection_name="GBVR_Dataset",
+# #     embedding_function=embed_model,
+# #     persist_directory="./",
+# # )
+# # vectorstore.get().keys()
+# # vectorstore.add_texts(chunked_texts)
+# # template = ("""
+# #     You are a friendly, intelligent, and conversational AI assistant designed to provide accurate, engaging, and human-like responses based on the given context. Your goal is to extract relevant details from the provided context: {context} and assist the user effectively. Follow these guidelines:
+# #     1. **Warm & Natural Interaction**
+# #        - If the user greets you (e.g., "Hello," "Hi," "Good morning"), respond warmly and acknowledge them.
+# #        - Example responses:
+# #          - "😊 Good morning! How can I assist you today?"
+# #          - "Hello! What can I do for you? 🚀"
+# #     2. **Precise Information Extraction**
+# #        - Provide only the relevant details from the given context: {context}.
+# #        - Do not generate extra content or assumptions beyond the provided information.
+# #     3. **Conversational & Engaging Tone**
+# #        - Keep responses friendly, natural, and engaging.
+# #        - Use occasional emojis (e.g., 😊, 🚀) to make interactions more lively.
+# #     4. **Awareness of Real-Time Context**
+# #        - If necessary, acknowledge the current date and time to show awareness of real-world updates.
+# #     5. **Handling Missing Information**
+# #        - If no relevant information exists in the context, respond politely:
+# #          - "I don't have that information at the moment, but I'm happy to help with something else! 😊"
+# #     6. **Personalized Interaction**
+# #        - If user history is available, tailor responses based on their previous interactions for a more natural and engaging conversation.
+# #     7. **Direct, Concise Responses**
+# #        - If the user requests specific data, provide only the requested details without unnecessary explanations unless asked.
+# #     8. **Extracting Relevant Links**
+# #        - If the user asks for a link related to their request `{question}`, extract the most relevant URL from `{context}` and provide it directly.
+# #        - Example response:
+# #          - "Here is the link you requested: [URL]"
+# #     **Context:** {context}
+# #     **User's Question:** {question}
+# #     **Your Response:**
+# # """)
+# # rag_prompt = PromptTemplate.from_template(template)
+# # retriever = vectorstore.as_retriever()
+# # from langchain_core.output_parsers import StrOutputParser
+# # from langchain_core.runnables import RunnablePassthrough
+# # llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key )
+# # rag_chain = (
+# #     {"context": retriever, "question": RunnablePassthrough()}
+# #     | rag_prompt
+# #     | llm
+# #     | StrOutputParser()
+# # )
+# # # Define the RAG memory stream function
+# # def rag_memory_stream(message, history):
+# #     partial_text = ""
+# #     for new_text in rag_chain.stream(message):  # Replace with actual streaming logic
+# #         partial_text += new_text
+# #         yield partial_text
+# # # Title with emojis
+# # title = "GBVR Chatbot"
+# # # Custom CSS for styling the interface
+# # custom_css = """
+# # body {
+# #     font-family: "Arial", serif;
+# # }
+# # .gradio-container {
+# #     font-family: "Times New Roman", serif;
+# # }
+# # .gr-button {
+# #     background-color: #007bff; /* Blue button */
+# #     color: white;
+# #     border: none;
+# #     border-radius: 5px;
+# #     font-size: 16px;
+# #     padding: 10px 20px;
+# #     cursor: pointer;
+# # }
+# # .gr-textbox:focus, .gr-button:focus {
+# #     outline: none; /* Remove outline focus for a cleaner look */
+# # }
+# # """
+# # # Create the Chat Interface
+# # demo = gr.ChatInterface(
+# #     fn=rag_memory_stream,
+# #     title=title,
+# #     fill_height=True,
+# #     theme="soft",
+# #     css=custom_css, # Apply the custom CSS
+# # )
+# # # Launch the app
+# # if __name__ == "__main__":
+# #     demo.launch(share=True, inbrowser=True, debug=True)
 # import os
 # import gradio as gr
 # from PyPDF2 import PdfReader
 # from langchain_huggingface import HuggingFaceEmbeddings
+# from langchain_core.output_parsers import StrOutputParser
+# from langchain_core.runnables import RunnablePassthrough
+# # Simple session management
+# class SessionManager:
+#     def __init__(self):
+#         self.sessions = {}
+#     def get_or_create_session(self, session_id):
+#         if session_id not in self.sessions:
+#             self.sessions[session_id] = []
+#         return self.sessions[session_id]
+#     def add_interaction(self, session_id, user_message, ai_response):
+#         session = self.get_or_create_session(session_id)
+#         session.append({"user": user_message, "ai": ai_response})
+#     def get_history(self, session_id, max_turns=5):
+#         session = self.get_or_create_session(session_id)
+#         recent_history = session[-max_turns:] if len(session) > max_turns else session
+#         history_text = ""
+#         for interaction in recent_history:
+#             history_text += f"User: {interaction['user']}\n"
+#             history_text += f"Assistant: {interaction['ai']}\n\n"
+#         return history_text.strip()
+# # Initialize session manager
+# session_manager = SessionManager()
 # groq_api_key= os.environ.get('GBV')
 #   chunked_texts.extend(chunk_string(text))
 # vectorstore = Chroma(
 #     collection_name="GBVR_Dataset",
 #     embedding_function=embed_model,
 # vectorstore.add_texts(chunked_texts)
+# # Updated template to include conversation history
 # template = ("""
 #     You are a friendly, intelligent, and conversational AI assistant designed to provide accurate, engaging, and human-like responses based on the given context. Your goal is to extract relevant details from the provided context: {context} and assist the user effectively. Follow these guidelines:
 #          - "I don't have that information at the moment, but I'm happy to help with something else! 😊"
 #     6. **Personalized Interaction**
+#        - Use the conversation history to provide more personalized and contextually relevant responses.
+#        - Previous conversation history: {conversation_history}
 #     7. **Direct, Concise Responses**
 #        - If the user requests specific data, provide only the requested details without unnecessary explanations unless asked.
 # retriever = vectorstore.as_retriever()
+# llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key)
+# # Dictionary to store user sessions with session IDs
+# user_sessions = {}
+# # Define the RAG chain with session history
+# def rag_chain(question, session_id="default"):
+#     # Get conversation history if available
+#     conversation_history = session_manager.get_history(session_id)
+#     # Get context from retriever
+#     context_docs = retriever.invoke(question)
+#     context = "\n".join(doc.page_content for doc in context_docs)
+#     # Create prompt with history
+#     prompt = rag_prompt.format(
+#         context=context,
+#         question=question,
+#         conversation_history=conversation_history
+#     )
+#     # Generate response
+#     response = llm.invoke(prompt).content
+#     # Store the interaction
+#     session_manager.add_interaction(session_id, question, response)
+#     return response
 # # Define the RAG memory stream function
 # def rag_memory_stream(message, history):
+#     # Generate a session ID based on the first message if not exists
+#     session_id = None
+#     for msg in history:
+#         if msg[0]:  # If there's a user message
+#             # Use first few characters of first message as simple session ID
+#             session_id = hash(msg[0][:20]) if session_id is None else session_id
+#             break
+#     # Default session ID if history is empty
+#     if session_id is None:
+#         session_id = "default_session"
+#     # Process the message and get response
+#     response = rag_chain(message, str(session_id))
+#     # Stream the response word by word
 #     partial_text = ""
+#     words = response.split(' ')
+#     for word in words:
+#         partial_text += word + " "
+#         yield partial_text.strip()
 # # Title with emojis
 # title = "GBVR Chatbot"
 # # Custom CSS for styling the interface
 # custom_css = """
 # body {
 # .gr-textbox:focus, .gr-button:focus {
 #     outline: none; /* Remove outline focus for a cleaner look */
 # }
 # """
 # # Create the Chat Interface
 # # Launch the app
 # if __name__ == "__main__":
 #     demo.launch(share=True, inbrowser=True, debug=True)
 import os
 from langchain_groq import ChatGroq
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
 vectorstore = Chroma(
+    collection_name="GBVR_Datast",
     embedding_function=embed_model,
     persist_directory="./",
 )
 # Custom CSS for styling the interface
 custom_css = """
+/* Custom CSS for styling the interface */
 body {
     font-family: "Arial", serif;
 }
 .gradio-container {
     font-family: "Times New Roman", serif;
 }
 .gr-button {
     background-color: #007bff; /* Blue button */
     color: white;
     padding: 10px 20px;
     cursor: pointer;
 }
 .gr-textbox:focus, .gr-button:focus {
     outline: none; /* Remove outline focus for a cleaner look */
 }
+/* Specific CSS for the welcome message */
+.gradio-description {
+    font-size: 30px; /* Set font size for the welcome message */
+    font-family: "Arial", sans-serif;
+    text-align: center; /* Optional: Center-align the text */
+    padding: 20px; /* Optional: Add padding around the welcome message */
+}
 """
+# Generate a simple welcome message using the LLM
+def generate_welcome_message():
+    welcome_prompt = """
+    Generate a short, simple welcome message for a chatbot about Gender-Based Violence Resources in Rwanda.
+    Keep it under 3 sentences, and use simple language.
+    Make it warm and supportive but direct and easy to read.
+    """
+    # Get the welcome message from the LLM
+    welcome_message = llm.invoke(welcome_prompt).content
+    return welcome_message
+# Create simple welcome message
+welcome_msg = generate_welcome_message()
+# Create the Chat Interface with welcome message
 demo = gr.ChatInterface(
     fn=rag_memory_stream,
     title=title,
     fill_height=True,
     theme="soft",
     css=custom_css, # Apply the custom CSS
+    description=welcome_msg
 )
 # Launch the app