import streamlit as st from config import app_name from config import website_name from config import DATABASE from config import PINECONE_INDEX from config import CHAT_COLLECTION from langchain_core.messages import AIMessageChunk, ToolMessage import pinecone from langchain_google_genai import GoogleGenerativeAIEmbeddings import streamlit_chat from langgraph.prebuilt import create_react_agent from langchain_openai import ChatOpenAI from langchain_google_genai import ChatGoogleGenerativeAI from langchain.schema import HumanMessage, AIMessage from utils.tools import get_context import os from pymongo import MongoClient from bson import ObjectId from dotenv import load_dotenv from pytz import timezone, utc from datetime import datetime st.set_page_config(layout="wide", page_title=app_name, page_icon="๐") load_dotenv() FLASH_API = os.getenv("FLASH_API") PINECONE_API = os.getenv("PINECONE_API_KEY") MONGO_URI = os.getenv("MONGO_URI") pc = pinecone.Pinecone( api_key=PINECONE_API ) index = pc.Index(PINECONE_INDEX) # MongoDB connection setup client = MongoClient(MONGO_URI) db = client[DATABASE] chat_sessions = db[CHAT_COLLECTION] embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=FLASH_API) # model = ChatOpenAI( # model="gpt-4o-mini", # temperature=0, # openai_api_key=OPENAI_KEY, # streaming=True # ) model2 = ChatGoogleGenerativeAI( model = "gemini-2.0-flash", google_api_key = FLASH_API, tool_call = True, stream = True ) model2_with_tool = model2.bind_tools([get_context]) tools = [get_context] system_prompt = f""" You are a website-specific chatbot specializing in answering user queries about {website_name}. - To answer the user query you will be provided a get_context tool, which allows you to retrieve data chunks based on user query. Follow these instructions carefully: 1. **Tool Usage** - You can use this tool as needed to fetch information from the knowledgebase. 2. **History Utilization**: - You will be provided with conversation history to track context. If the userโs question relates to prior responses, try to answer from memory without invoking the search tool. - If additional information is required, reformulate the query to be self-contained before invoking the search tool again. - Never give incomplete or wrong answer based on your personal knowledge, always use tool with proper query 3. **General Messages and Salutations**: - If the user says "Hi," "Hello," "How are you?" or similar, respond conversationally without invoking the search tool. 4. **Handling Off-Topic Queries**: - If the user sends greetings, introductions, or queries unrelated to {website_name}, respond politely and conversationally without forcing a website-related answer. 5. **Response Formation**: - Each retrieved chunk will have a URL(url of the webpage from which information was gathered) associated with it; you must cite that URL if you use any information from that chunk. - source URL associated with the chunk should be cited , with mention of it as source. - A single source should be cited only once, not again and again. That too at bottom in a seperate sources section. - Only cite the correct webpage URL , of the chunk you are using to make the information - If the same URL appears in multiple relevant chunks, list that URL only once in the sources section. - Only include the URLs that genuinely informed or supported your answer. - if the answer itself contains url, then quote it properly. - Do not skip any PDF urls or any other relevant url mentioned in the chunk. - Respond in a friendly, well-formatted manner without mentioning internal terms like "chunk" or "chunk number." 6. **Clear and Complete Responses**: - Provide clear explanations with all relevant details. Never omit important information. - If the user query cannot be answered from the available data, politely ask for clarification. 7. **Response language**: - User can talk either in English or Gujarati. - Use the language in which the user does the conversation. 8. **Structured Responses**: - Give the response in well formatted manner, in points instead of long paragraphs. 9. **Very Important** **Response Precision**: - Make sure to to give only relevant answer to what user asked, do not just dump the information to user, without relevance. - Never respond to user that you do not know the answer, without invoking the tool. ## List of tools available 1. 'get_context' """ # Load the extracted JSON data # parent of AIMessageChunk agent_executor = create_react_agent(model2_with_tool, tools, state_modifier=system_prompt) def stream_tokens(agent_stream): """ Convert LangGraph's (chunk, metadata) stream to clean assistant reply text. - Skips ToolMessage and tool_calls - Streams only AIMessageChunk content """ current_text = "" for chunk_tuple in agent_stream: chunk, metadata = chunk_tuple if isinstance(chunk_tuple, tuple) else (chunk_tuple, {}) # Skip tool messages if isinstance(chunk, ToolMessage): continue # Skip tool call metadata if isinstance(chunk, AIMessageChunk): if chunk.tool_calls: continue partial = chunk.content or "" current_text += partial yield partial return current_text # Initialize session state if "current_chat_id" not in st.session_state: st.session_state["current_chat_id"] = None if "chat_history" not in st.session_state: st.session_state["chat_history"] = [AIMessage(content="Hello, I can help you with information related to the Commissionerate of Transport, Gujarat.")] # Create new chat session in Mongo def create_new_chat_session(): ist_time = datetime.now(timezone("Asia/Kolkata")) utc_time = ist_time.astimezone(utc) session_id = chat_sessions.insert_one({"created_at": utc_time, "messages": []}).inserted_id return str(session_id) # Load session by ID def load_chat_session(session_id): session = chat_sessions.find_one({"_id": ObjectId(session_id)}) if session: msgs = [] for m in session.get("messages", []): if m["role"] == "user": msgs.append(HumanMessage(content=m["content"])) else: msgs.append(AIMessage(content=m["content"])) st.session_state["chat_history"] = msgs # Update Mongo with new messages def update_chat_session(session_id, new_messages): mongo_msgs = [{"role": "user" if isinstance(m, HumanMessage) else "assistant", "content": m.content} for m in new_messages] chat_sessions.update_one({"_id": ObjectId(session_id)}, {"$push": {"messages": {"$each": mongo_msgs}}}) # Sidebar session list st.sidebar.header("Chat Sessions") if st.sidebar.button("New Chat"): chat_id = create_new_chat_session() st.session_state["current_chat_id"] = chat_id st.session_state["chat_history"] = [AIMessage(content="Hello, I can help you with information related to the Commissionerate of Transport, Gujarat.")] for session in chat_sessions.find().sort("created_at", -1): sid = str(session["_id"]) ist_time = session["created_at"].replace(tzinfo=utc).astimezone(timezone("Asia/Kolkata")) label = ist_time.strftime("%Y-%m-%d %H:%M:%S") col1, col2 = st.sidebar.columns([8, 1]) with col1: if st.button(f"Session {label}", key=sid): st.session_state["current_chat_id"] = sid load_chat_session(sid) with col2: if st.button("๐๏ธ", key=f"delete_{sid}"): chat_sessions.delete_one({"_id": ObjectId(sid)}) st.rerun() # Title st.markdown(f"