akshansh36 commited on
Commit
7d4344b
·
verified ·
1 Parent(s): 2311dd1

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +187 -0
  2. requirements.txt +0 -0
  3. tools.py +73 -0
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import streamlit_chat
3
+ from langgraph.prebuilt import create_react_agent
4
+ from langchain_openai import ChatOpenAI
5
+ from langchain.schema import HumanMessage, AIMessage
6
+ from tools import get_context
7
+ import os
8
+ from pymongo import MongoClient
9
+ from bson import ObjectId
10
+ from pytz import timezone, utc
11
+ from dotenv import load_dotenv
12
+ from datetime import datetime
13
+
14
+ load_dotenv()
15
+
16
+ st.set_page_config(layout="wide", page_title="RITES Bot", page_icon="📄")
17
+
18
+ OPENAI_KEY = os.getenv("OPENAI_API_KEY")
19
+ MONGO_URI = os.getenv("MONGO_URI")
20
+
21
+ model = ChatOpenAI(
22
+ model="gpt-4o-mini",
23
+ temperature=0,
24
+ openai_api_key=OPENAI_KEY,
25
+ streaming=True
26
+ )
27
+
28
+ client = MongoClient(MONGO_URI)
29
+ db = client["rites"]
30
+ chat_sessions = db["rites_pdf_chat"]
31
+
32
+ tools = [get_context]
33
+
34
+ system_prompt = """
35
+ You are an AI-powered assistant for the RITES website, providing users with accurate and relevant information sourced from official PDF documents.
36
+ - These documents include job openings, annual returns, financial reports, approved vendor lists, banned vendor lists, and press releases.
37
+ - To answer the user query you will be provided a get_context tool, which allows you to retrieve data chunks from the relevant documents based on user query.
38
+ Follow these instructions carefully:
39
+
40
+ 1. **Tool Usage**
41
+ - You can use this tool as needed to fetch information from the knowledgebase.
42
+
43
+ 2. **History Utilization**:
44
+ - You will be provided with conversation history to track context. If the user’s question relates to prior responses, try to answer from memory without invoking the search tool.
45
+ - If additional information is required, reformulate the query to be self-contained before invoking the search tool again.
46
+
47
+ 3. **General Messages and Salutations**:
48
+ - If the user says "Hi," "Hello," "How are you?" or similar, respond conversationally without invoking the search tool.
49
+
50
+ 4. **Unrelated Questions**:
51
+ - If a user asks something outside the scope of RITES (e.g., sports, movies, general trivia), politely decline by saying:
52
+ "I can assist you with information related to RITES, such as job openings, financial reports, and vendor details. Let me know how I can help."
53
+
54
+ 5. **Response Formation**:
55
+ - Each retrieved chunk will have a PDF URL associated with it; you must cite that PDF URL if you use any information from it.
56
+ - Each retrieved chunk will also have a start_page and end_page indicating the span of pages containing the information. Cite these page numbers if used.
57
+ - Do not cite the same URL or page number multiple times; combine the citations at the end.
58
+ - If using multiple PDFs, provide the information separately for each, with clear citations.
59
+ - Respond in a friendly, well-formatted manner without mentioning internal terms like "chunk" or "chunk number."
60
+
61
+ 6. **Clear and Complete Responses**:
62
+ - Provide clear explanations with all relevant details. Never omit important information.
63
+ - If the user query cannot be answered from the available data, politely ask for clarification.
64
+
65
+ ## List of tools available
66
+ 1. 'get_context'
67
+ """
68
+
69
+ agent_executor = create_react_agent(model, tools, state_modifier=system_prompt)
70
+
71
+ # Initialize session state variables if not present
72
+ if 'current_chat_id' not in st.session_state:
73
+ st.session_state['current_chat_id'] = None
74
+ if 'chat_history' not in st.session_state:
75
+ st.session_state['chat_history'] = [] # Now a list of message dicts with "role" and "content"
76
+
77
+ # Function to create a new chat session in MongoDB
78
+ def create_new_chat_session():
79
+ # Get the current time in IST
80
+ ind_time = datetime.now(timezone("Asia/Kolkata"))
81
+ # Convert IST time to UTC for storing in MongoDB
82
+ utc_time = ind_time.astimezone(utc)
83
+
84
+ new_session = {
85
+ "created_at": utc_time, # Store in UTC
86
+ "messages": [] # Initially empty
87
+ }
88
+ session_id = chat_sessions.insert_one(new_session).inserted_id
89
+ return str(session_id)
90
+
91
+ # Function to load a chat session by MongoDB ID (loads full history for display)
92
+ def load_chat_session(session_id):
93
+ session = chat_sessions.find_one({"_id": ObjectId(session_id)})
94
+ if session:
95
+ st.session_state['chat_history'] = session.get('messages', [])
96
+
97
+ # Function to update a chat session in MongoDB by appending new messages
98
+ def update_chat_session(session_id, new_messages):
99
+ """
100
+ Append new messages to the chat session.
101
+
102
+ Args:
103
+ session_id (str): The MongoDB session ID.
104
+ new_messages (list): A list of message dictionaries, each with keys "role" and "content".
105
+ """
106
+ chat_sessions.update_one(
107
+ {"_id": ObjectId(session_id)},
108
+ {"$push": {"messages": {"$each": new_messages}}}
109
+ )
110
+
111
+ # Sidebar: Chat sessions management
112
+ st.sidebar.header("Chat Sessions")
113
+
114
+ # Button to create a new chat session
115
+ if st.sidebar.button("New Chat"):
116
+ new_chat_id = create_new_chat_session()
117
+ st.session_state['current_chat_id'] = new_chat_id
118
+ st.session_state['chat_history'] = []
119
+
120
+ # List existing chat sessions with delete option
121
+ existing_sessions = chat_sessions.find().sort("created_at", -1)
122
+ for session in existing_sessions:
123
+ session_id = str(session['_id'])
124
+ # Convert stored UTC time to IST for display
125
+ utc_time = session['created_at']
126
+ ist_time = utc_time.replace(tzinfo=utc).astimezone(timezone("Asia/Kolkata"))
127
+ session_date = ist_time.strftime("%Y-%m-%d %H:%M:%S")
128
+
129
+ col1, col2 = st.sidebar.columns([8, 1])
130
+ with col1:
131
+ if st.button(f"Session {session_date}", key=session_id):
132
+ st.session_state['current_chat_id'] = session_id
133
+ load_chat_session(session_id)
134
+ with col2:
135
+ if st.button("🗑️", key=f"delete_{session_id}"):
136
+ chat_sessions.delete_one({"_id": ObjectId(session_id)})
137
+ st.rerun() # Refresh to update the sidebar
138
+
139
+ # Main Chat Interface
140
+ st.markdown('<div class="fixed-header"><h1>Welcome To "RITES" Chatbot</h1></div>', unsafe_allow_html=True)
141
+ st.markdown("<hr>", unsafe_allow_html=True)
142
+
143
+ # Input box for the user question
144
+ user_question = st.chat_input("Ask a Question related to RITES PDFs")
145
+
146
+ if user_question:
147
+ # Create a new session if none exists
148
+ if not st.session_state['current_chat_id']:
149
+ new_chat_id = create_new_chat_session()
150
+ st.session_state['current_chat_id'] = new_chat_id
151
+
152
+ with st.spinner("Please wait, I am thinking!!"):
153
+ # Append the new user message to the full history
154
+ user_message = {"role": "user", "content": user_question}
155
+ st.session_state['chat_history'].append(user_message)
156
+
157
+ # Prepare the last 5 messages for the agent input
158
+ recent_messages = st.session_state['chat_history'][-5:]
159
+ messages = []
160
+ for msg in recent_messages:
161
+ if msg["role"] == "user":
162
+ messages.append(HumanMessage(content=msg["content"]))
163
+ else:
164
+ messages.append(AIMessage(content=msg["content"]))
165
+
166
+ inputs = {"messages": messages}
167
+ response = agent_executor.invoke(inputs)
168
+ if response:
169
+ reply = response["messages"][-1].content
170
+ assistant_message = {"role": "assistant", "content": reply}
171
+ st.session_state['chat_history'].append(assistant_message)
172
+
173
+ # Update MongoDB with both the user and assistant messages
174
+ if st.session_state['current_chat_id']:
175
+ update_chat_session(
176
+ st.session_state['current_chat_id'],
177
+ [user_message, assistant_message]
178
+ )
179
+ else:
180
+ st.error("Error processing your request, please try again later.")
181
+
182
+ # Display the last 15 messages in the UI
183
+ for i, msg in enumerate(st.session_state['chat_history'][-15:]):
184
+ if msg["role"] == "user":
185
+ streamlit_chat.message(msg["content"], is_user=True, key=f"chat_message_user_{i}")
186
+ else:
187
+ streamlit_chat.message(msg["content"], is_user=False, key=f"chat_message_assistant_{i}")
requirements.txt ADDED
Binary file (4.25 kB). View file
 
tools.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ import pinecone
3
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
4
+ import os
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+ GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
9
+ PINECONE_API = os.getenv("PINECONE_API_KEY")
10
+
11
+ google_embeddings = GoogleGenerativeAIEmbeddings(
12
+ model="models/embedding-001", # Correct model name
13
+ google_api_key=GOOGLE_API_KEY
14
+ )
15
+
16
+ pc = pinecone.Pinecone(
17
+ api_key=PINECONE_API
18
+ )
19
+
20
+ PINECONE_INDEX = "rites-pdf"
21
+ index = pc.Index(PINECONE_INDEX)
22
+
23
+ @tool
24
+ def get_context(query: str) -> str:
25
+ """
26
+ Retrieve context information by performing a semantic search on indexed document chunks.
27
+
28
+ This tool embeds the provided user query using a Google Generative AI embeddings model,
29
+ then queries a Pinecone index to fetch the top 10 matching document chunks. Each match
30
+ includes metadata such as the text chunk, starting page, ending page, and the source PDF URL.
31
+ The function aggregates these details into a formatted string.
32
+
33
+ Args:
34
+ query (str): A user query search string used for semantic matching against the document index.
35
+
36
+ Returns:
37
+ str: A formatted string containing the matched document chunks along with their associated metadata,
38
+ including start page, end page, and PDF URL.
39
+ """
40
+ embedding = google_embeddings.embed_query(query)
41
+ search_results = index.query(
42
+ vector=embedding,
43
+ top_k=10, # Retrieve top 10 results
44
+ include_metadata=True
45
+ )
46
+ context = " "
47
+ count = 1
48
+ for match in search_results["matches"]:
49
+ chunk = match["metadata"].get("chunk")
50
+ url = match["metadata"].get("pdf_url")
51
+ start_page = match["metadata"].get("start_page")
52
+ end_page = match["metadata"].get("end_page")
53
+
54
+ context += f"""
55
+ Chunk {count}:
56
+ {chunk}
57
+ start_page: {start_page}
58
+ end_page: {end_page}
59
+ pdf_url: {url}
60
+ #########################################
61
+ """
62
+ count += 1
63
+
64
+ return context
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
+