Spaces:

youdata-ai
/

mea_chatbot

Sleeping

App Files Files Community

mea_chatbot / src /streamlit_app.py

akshansh36

Update src/streamlit_app.py

017ba16 verified 4 months ago

raw

history blame contribute delete

10.2 kB

	import streamlit as st
	from config import app_name
	from config import website_name
	from config import DATABASE
	from config import PINECONE_INDEX
	from config import CHAT_COLLECTION
	from langchain_core.messages import AIMessageChunk
	import pinecone
	from langchain_google_genai import GoogleGenerativeAIEmbeddings
	from langgraph.prebuilt import create_react_agent
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain.schema import HumanMessage, AIMessage
	from utils.tools import get_context
	import os
	from pymongo import MongoClient
	from bson import ObjectId
	from dotenv import load_dotenv
	from pytz import timezone, utc
	from datetime import datetime
	import time

	st.set_page_config(layout="wide", page_title=app_name, page_icon="📄")
	load_dotenv()

	FLASH_API = os.getenv("FLASH_API")
	OPENAI_KEY = os.getenv("OPEN_AI")
	PINECONE_API = os.getenv("PINECONE_API_KEY")
	MONGO_URI = os.getenv("MONGO_URI")

	pc = pinecone.Pinecone(api_key=PINECONE_API)
	index = pc.Index(PINECONE_INDEX)

	client = MongoClient(MONGO_URI)
	db = client[DATABASE]
	chat_sessions = db[CHAT_COLLECTION]

	embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=FLASH_API)
	model2 = ChatGoogleGenerativeAI(
	model="gemini-2.5-flash",
	api_key=FLASH_API
	)

	model2_with_tool = model2.bind_tools([get_context])
	tools = [get_context]

	system_prompt = f"""
	You are a website-specific chatbot specializing in answering user queries about {website_name}.
	- To answer the user query you will be provided a get_context tool, which allows you to retrieve data chunks based on user query.
	Follow these instructions carefully:

	1. Tool Usage
	- You can use this tool as needed to fetch information from the knowledgebase.

	2. History Utilization:
	- You will be provided with conversation history to track context. If the user's question relates to prior responses, try to answer from memory without invoking the search tool.
	- If additional information is required, reformulate the query to be self-contained before invoking the search tool again.
	- Never give incomplete or wrong answer based on your personal knowledge, always use tool with proper query

	3. General Messages and Salutations:
	- If the user says "Hi," "Hello," "How are you?" or similar, respond conversationally without invoking the search tool.

	4. Handling Off-Topic Queries:
	- If the user sends greetings, introductions, or queries unrelated to {website_name}, respond politely and conversationally without forcing a website-related answer.

	5. Response Formation:
	- Each retrieved chunk will have a URL(url of the webpage from which information was gathered) associated with it; you must cite that URL if you use any information from that chunk.
	- source URL associated with the chunk should be cited , with mention of it as source.
	- A single source should be cited only once, not again and again. That too at bottom in a seperate sources section.
	- Only cite the correct webpage URL , of the chunk you are using to make the information
	- If the same URL appears in multiple relevant chunks, list that URL only once in the sources section.
	- Only include the URLs that genuinely informed or supported your answer.
	- if the answer itself contains url, then quote it properly.
	- Do not skip any PDF urls or any other relevant url mentioned in the chunk.
	- Respond in a friendly, well-formatted manner without mentioning internal terms like "chunk" or "chunk number."

	6. Clear and Complete Responses:
	- Provide clear explanations with all relevant details. Never omit important information.
	- If the user query cannot be answered from the available data, politely ask for clarification.

	7. Response language:
	- User can talk either in English or Gujarati.
	- Use the language in which the user does the conversation.

	8. Structured Responses:
	- Give the response in well formatted manner, in points instead of long paragraphs.

	9. Very Important Response Precision:
	- Make sure to to give only relevant answer to what user asked, do not just dump the information to user, without relevance.
	- Never respond to user that you do not know the answer, without invoking the tool.
	## List of tools available
	1. 'get_context'
	"""

	agent_executor = create_react_agent(model2_with_tool, tools, state_modifier=system_prompt)


	def stream_response_real_time(agent_stream):
	"""
	Real-time streaming generator that yields tokens as they arrive
	"""
	for chunk in agent_stream:
	# Handle different chunk types from LangGraph
	if isinstance(chunk, dict):
	# Extract messages from the chunk
	messages = chunk.get('messages', [])
	for message in messages:
	if isinstance(message, AIMessageChunk) and message.content:
	# Skip tool calls, only yield actual content
	if not hasattr(message, 'tool_calls') or not message.tool_calls:
	yield message.content
	elif isinstance(chunk, AIMessageChunk):
	# Direct AIMessageChunk
	if chunk.content and (not hasattr(chunk, 'tool_calls') or not chunk.tool_calls):
	yield chunk.content


	# Initialize session state
	if "current_chat_id" not in st.session_state:
	st.session_state["current_chat_id"] = None
	if "chat_history" not in st.session_state:
	st.session_state["chat_history"] = [AIMessage(
	content="Hello, I can help you with information related to the Ministry of external affairs.")]


	# Create new chat session in Mongo
	def create_new_chat_session():
	ist_time = datetime.now(timezone("Asia/Kolkata"))
	utc_time = ist_time.astimezone(utc)
	session_id = chat_sessions.insert_one({"created_at": utc_time, "messages": []}).inserted_id
	return str(session_id)


	# Load session by ID
	def load_chat_session(session_id):
	session = chat_sessions.find_one({"_id": ObjectId(session_id)})
	if session:
	msgs = []
	for m in session.get("messages", []):
	if m["role"] == "user":
	msgs.append(HumanMessage(content=m["content"]))
	else:
	msgs.append(AIMessage(content=m["content"]))
	st.session_state["chat_history"] = msgs


	# Update Mongo with new messages
	def update_chat_session(session_id, new_messages):
	mongo_msgs = [{"role": "user" if isinstance(m, HumanMessage) else "assistant", "content": m.content} for m in
	new_messages]
	chat_sessions.update_one({"_id": ObjectId(session_id)}, {"$push": {"messages": {"$each": mongo_msgs}}})


	# Sidebar session list
	st.sidebar.header("Chat Sessions")
	if st.sidebar.button("New Chat"):
	chat_id = create_new_chat_session()
	st.session_state["current_chat_id"] = chat_id
	st.session_state["chat_history"] = [AIMessage(
	content="Hello, I can help you with information related to the Commissionerate of Transport, Gujarat.")]

	for session in chat_sessions.find().sort("created_at", -1):
	sid = str(session["_id"])
	ist_time = session["created_at"].replace(tzinfo=utc).astimezone(timezone("Asia/Kolkata"))
	label = ist_time.strftime("%Y-%m-%d %H:%M:%S")
	col1, col2 = st.sidebar.columns([8, 1])
	with col1:
	if st.button(f"Session {label}", key=sid):
	st.session_state["current_chat_id"] = sid
	load_chat_session(sid)
	with col2:
	if st.button("🗑️", key=f"delete_{sid}"):
	chat_sessions.delete_one({"_id": ObjectId(sid)})
	st.rerun()

	# Title
	st.markdown(f"<h1 style='text-align:center;'>Welcome To {app_name}</h1><hr>", unsafe_allow_html=True)

	# Display previous messages
	for msg in st.session_state["chat_history"]:
	role = "user" if isinstance(msg, HumanMessage) else "assistant"
	with st.chat_message(role):
	st.markdown(msg.content)

	# Input + Real-Time Streaming
	user_question = st.chat_input(f"Ask a Question related to {website_name}")
	if user_question:
	user_msg = HumanMessage(content=user_question)
	st.session_state["chat_history"].append(user_msg)

	if st.session_state["current_chat_id"]:
	update_chat_session(st.session_state["current_chat_id"], [user_msg])

	with st.chat_message("user"):
	st.markdown(user_question)

	with st.chat_message("assistant"):
	# Create empty container for streaming text
	response_container = st.empty()
	full_response = ""

	# Get the agent stream
	agent_stream = agent_executor.stream(
	{"messages": st.session_state["chat_history"]},
	stream_mode="messages"
	)

	# Stream the response in real-time
	try:
	for chunk in agent_stream:
	# Handle LangGraph stream format
	if isinstance(chunk, tuple) and len(chunk) == 2:
	message, metadata = chunk
	if isinstance(message, AIMessageChunk) and message.content:
	# Skip tool calls
	if not hasattr(message, 'tool_calls') or not message.tool_calls:
	full_response += message.content
	response_container.markdown(full_response + "▌") # Add cursor
	elif isinstance(chunk, AIMessageChunk) and chunk.content:
	if not hasattr(chunk, 'tool_calls') or not chunk.tool_calls:
	full_response += chunk.content
	response_container.markdown(full_response + "▌") # Add cursor

	# Remove cursor and show final response
	response_container.markdown(full_response)

	except Exception as e:
	st.error(f"Error during streaming: {str(e)}")
	full_response = "Sorry, I encountered an error while processing your request."
	response_container.markdown(full_response)

	# Save the complete response
	ai_msg = AIMessage(content=full_response)
	st.session_state["chat_history"].append(ai_msg)

	if st.session_state["current_chat_id"]:
	update_chat_session(st.session_state["current_chat_id"], [ai_msg])