Spaces:

youdata-ai
/

cot_bot

Sleeping

File size: 8,923 Bytes

270f4ac

import streamlit as st
from config import app_name
from config import website_name
from config import DATABASE
from config import PINECONE_INDEX
from config import CHAT_COLLECTION
from langchain_core.messages import AIMessageChunk, ToolMessage
import pinecone
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import streamlit_chat
from langgraph.prebuilt import create_react_agent
from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema import HumanMessage, AIMessage
from utils.tools import get_context
import os
from pymongo import MongoClient
from bson import ObjectId
from dotenv import load_dotenv
from pytz import timezone, utc
from datetime import datetime

st.set_page_config(layout="wide", page_title=app_name, page_icon="📄")
load_dotenv()


FLASH_API = os.getenv("FLASH_API")
PINECONE_API = os.getenv("PINECONE_API_KEY")
MONGO_URI = os.getenv("MONGO_URI")


pc = pinecone.Pinecone(
    api_key=PINECONE_API
)

index = pc.Index(PINECONE_INDEX)
# MongoDB connection setup

client = MongoClient(MONGO_URI)
db = client[DATABASE]
chat_sessions = db[CHAT_COLLECTION]


embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=FLASH_API)
# model = ChatOpenAI(
#     model="gpt-4o-mini",
#     temperature=0,
#     openai_api_key=OPENAI_KEY,
#     streaming=True
# )

model2 = ChatGoogleGenerativeAI(
    model = "gemini-2.0-flash",
    google_api_key = FLASH_API,
    tool_call = True,
    stream = True

)

model2_with_tool = model2.bind_tools([get_context])

tools = [get_context]

system_prompt = f"""
You are a website-specific chatbot specializing in answering user queries about {website_name}.
- To answer the user query you will be provided a get_context tool, which allows you to retrieve data chunks  based on user query.
Follow these instructions carefully:

1. **Tool Usage**
   - You can use this tool as needed to fetch information from the knowledgebase.
   
2. **History Utilization**: 
   - You will be provided with conversation history to track context. If the user’s question relates to prior responses, try to answer from memory without invoking the search tool.
   - If additional information is required, reformulate the query to be self-contained before invoking the search tool again.
   - Never give incomplete or wrong answer based on your personal knowledge, always use tool with proper query
   
3. **General Messages and Salutations**:
   - If the user says "Hi," "Hello," "How are you?" or similar, respond conversationally without invoking the search tool.
   
4. **Handling Off-Topic Queries**:
   - If the user sends greetings, introductions, or queries unrelated to {website_name}, respond politely and conversationally without forcing a website-related answer.
   
5. **Response Formation**:
   - Each retrieved chunk will have a URL(url of the webpage from which information was gathered) associated with it; you must cite that URL if you use any information from that chunk.
   - source URL associated with the chunk should be cited , with mention of it as source.
   - A single source should be cited only once, not again and again. That too at bottom in a seperate sources section.
   - Only cite the correct webpage URL , of the chunk you are using to make the information
   - If the same URL appears in multiple relevant chunks, list that URL only once in the sources section.
   - Only include the URLs that genuinely informed or supported your answer.
   - if the answer itself contains url, then quote it properly.
   - Do not skip any PDF urls or any other relevant url mentioned in the chunk.
   - Respond in a friendly, well-formatted manner without mentioning internal terms like "chunk" or "chunk number."

6. **Clear and Complete Responses**:
   - Provide clear explanations with all relevant details. Never omit important information.
   - If the user query cannot be answered from the available data, politely ask for clarification.
   
7. **Response language**:
  - User can talk either in English or Gujarati.
  - Use the language in which the user does the conversation.
  
8. **Structured Responses**:
   - Give the response in well formatted manner, in points instead of long paragraphs.
   
9. **Very Important** **Response Precision**:
   - Make sure to to give only relevant answer to what user asked, do not just dump the information to user, without relevance.
   - Never respond to user that you do not know the answer, without invoking the tool. 
## List of tools available
1. 'get_context'

"""
# Load the extracted JSON data
    # parent of AIMessageChunk

agent_executor = create_react_agent(model2_with_tool, tools, state_modifier=system_prompt)

def stream_tokens(agent_stream):
    """
        Convert LangGraph's (chunk, metadata) stream to clean assistant reply text.
        - Skips ToolMessage and tool_calls
        - Streams only AIMessageChunk content
        """
    current_text = ""

    for chunk_tuple in agent_stream:
        chunk, metadata = chunk_tuple if isinstance(chunk_tuple, tuple) else (chunk_tuple, {})

        # Skip tool messages
        if isinstance(chunk, ToolMessage):
            continue

        # Skip tool call metadata
        if isinstance(chunk, AIMessageChunk):
            if chunk.tool_calls:
                continue

            partial = chunk.content or ""
            current_text += partial
            yield partial

    return current_text

# Initialize session state
if "current_chat_id" not in st.session_state:
    st.session_state["current_chat_id"] = None
if "chat_history" not in st.session_state:
    st.session_state["chat_history"] = [AIMessage(content="Hello, I can help you with information related to the Commissionerate of Transport, Gujarat.")]

# Create new chat session in Mongo
def create_new_chat_session():
    ist_time = datetime.now(timezone("Asia/Kolkata"))
    utc_time = ist_time.astimezone(utc)
    session_id = chat_sessions.insert_one({"created_at": utc_time, "messages": []}).inserted_id
    return str(session_id)

# Load session by ID
def load_chat_session(session_id):
    session = chat_sessions.find_one({"_id": ObjectId(session_id)})
    if session:
        msgs = []
        for m in session.get("messages", []):
            if m["role"] == "user":
                msgs.append(HumanMessage(content=m["content"]))
            else:
                msgs.append(AIMessage(content=m["content"]))
        st.session_state["chat_history"] = msgs

# Update Mongo with new messages
def update_chat_session(session_id, new_messages):
    mongo_msgs = [{"role": "user" if isinstance(m, HumanMessage) else "assistant", "content": m.content} for m in new_messages]
    chat_sessions.update_one({"_id": ObjectId(session_id)}, {"$push": {"messages": {"$each": mongo_msgs}}})

# Sidebar session list
st.sidebar.header("Chat Sessions")
if st.sidebar.button("New Chat"):
    chat_id = create_new_chat_session()
    st.session_state["current_chat_id"] = chat_id
    st.session_state["chat_history"] = [AIMessage(content="Hello, I can help you with information related to the Commissionerate of Transport, Gujarat.")]

for session in chat_sessions.find().sort("created_at", -1):
    sid = str(session["_id"])
    ist_time = session["created_at"].replace(tzinfo=utc).astimezone(timezone("Asia/Kolkata"))
    label = ist_time.strftime("%Y-%m-%d %H:%M:%S")
    col1, col2 = st.sidebar.columns([8, 1])
    with col1:
        if st.button(f"Session {label}", key=sid):
            st.session_state["current_chat_id"] = sid
            load_chat_session(sid)
    with col2:
        if st.button("🗑️", key=f"delete_{sid}"):
            chat_sessions.delete_one({"_id": ObjectId(sid)})
            st.rerun()

# Title
st.markdown(f"<h1 style='text-align:center;'>Welcome To {app_name}</h1><hr>", unsafe_allow_html=True)

# Display previous messages
for msg in st.session_state["chat_history"]:
    role = "user" if isinstance(msg, HumanMessage) else "assistant"
    with st.chat_message(role):
        st.markdown(msg.content)

# Input + Streaming
user_question = st.chat_input(f"Ask a Question related to {website_name}")
if user_question:
    user_msg = HumanMessage(content=user_question)
    st.session_state["chat_history"].append(user_msg)

    if st.session_state["current_chat_id"]:
        update_chat_session(st.session_state["current_chat_id"], [user_msg])

    with st.chat_message("user"):
        st.markdown(user_question)

    with st.chat_message("assistant"):
        raw_gen = agent_executor.stream({"messages": st.session_state["chat_history"]}, stream_mode="messages")
        response_text = st.write_stream(stream_tokens(raw_gen))

    ai_msg = AIMessage(content=response_text)
    st.session_state["chat_history"].append(ai_msg)

    if st.session_state["current_chat_id"]:
        update_chat_session(st.session_state["current_chat_id"], [ai_msg])