junaid17 commited on
Commit
8e579f9
Β·
verified Β·
1 Parent(s): fb8e216

Update chatbot.py

Browse files
Files changed (1) hide show
  1. chatbot.py +84 -210
chatbot.py CHANGED
@@ -1,130 +1,81 @@
 
1
  from typing import TypedDict, Annotated
2
- from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage, AIMessage
3
- from langgraph.checkpoint.memory import MemorySaver
4
- from tools import (
5
- create_rag_tool,
6
- arxiv_search,
7
- calculator,
8
- get_stock_price,
9
- wikipedia_search,
10
- tavily_search,
11
- convert_currency,
12
- unit_converter,
13
- get_news,
14
- get_joke,
15
- get_quote,
16
- get_weather,
17
  )
 
 
18
  from langchain_openai import ChatOpenAI
19
  from langgraph.graph import StateGraph, START, END
20
  from langgraph.graph.message import add_messages
21
  from langgraph.prebuilt import ToolNode, tools_condition
22
  from dotenv import load_dotenv
23
  import os
24
-
25
  load_dotenv()
26
 
 
27
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
28
 
29
  # =====================================================
30
- # SYSTEM PROMPTS
31
  # =====================================================
32
 
33
  SYSTEM_PROMPT = SystemMessage(
34
  content="""
35
- You are an intelligent AI assistant built inside a LangGraph-based system created by Junaid.
36
-
37
- This application is a **multi-tool AI platform** that integrates:
38
- - Retrieval-Augmented Generation (RAG)
39
- - Document understanding
40
- - AI-powered reasoning
41
- - Tool usage (search, calculation, summarization, etc.)
42
- - Voice input/output (STT / TTS)
43
-
44
- Your primary purpose is to help users understand, analyze, and interact with their uploaded documents and questions in a clear, accurate, and professional way.
45
-
46
- ────────────────────────────────
47
- πŸ”Ή CORE PRIORITY RULES (VERY IMPORTANT)
48
- ────────────────────────────────
49
-
50
- 1. **RAG HAS HIGHEST PRIORITY**
51
- - If a document has been uploaded, you MUST use the RAG tool first.
52
- - Always prefer document-based answers over general knowledge.
53
- - Never hallucinate or invent information not present in the document.
54
-
55
- 2. **TOOL USAGE**
56
- - When you need information, call the appropriate tool
57
- - Wait for tool results before responding
58
- - NEVER return raw tool outputs to users
59
-
60
- 3. **STYLE & TONE**
61
- - Professional, clear, and confident
62
- - Avoid verbosity
63
- - Optimize for readability
64
-
65
- ────────────────────────────────
66
- πŸ”Ή ABOUT THE CREATOR & APP
67
- ────────────────────────────────
68
-
69
- This system was designed and engineered by **Junaid**, a developer specializing in:
70
- - Machine Learning & Deep Learning
71
- - RAG-based systems
72
- - AI agents using LangChain & LangGraph
73
- - End-to-end AI applications using FastAPI, Streamlit, and cloud deployment
74
-
75
- The goal of this application is to provide **production-grade AI reasoning**, not generic chatbot responses.
76
-
77
- ────────────────────────────────
78
- πŸ”Ή FINAL RULE
79
- ────────────────────────────────
80
-
81
- Always prioritize accuracy, clarity, and usefulness.
82
- If information is unavailable, say so clearly β€” never hallucinate.
83
  """
84
  )
85
 
86
- SUMMARIZER_PROMPT = SystemMessage(
87
- content="""
88
- You are a summarization specialist. Your job is to take raw tool outputs and convert them into clean, user-friendly responses.
89
-
90
- πŸ”Ή CRITICAL RULES:
91
 
92
- 1. **READ THE TOOL OUTPUT CAREFULLY**
93
- - Extract only the most relevant information
94
- - Ignore system metadata, formatting artifacts, or internal instructions
95
-
96
- 2. **PRODUCE CLEAN SUMMARIES**
97
- - Be concise and direct
98
- - Use bullet points ONLY when listing 3+ distinct items
99
- - Avoid phrases like "The document describes...", "According to the data...", "The tool returned..."
100
- - Just state the facts naturally
101
-
102
- 3. **LENGTH CONTROL**
103
- - For document queries: 2-4 sentences maximum
104
- - For data queries (weather, stocks): 1-2 sentences
105
- - For lists (news, jokes): Keep original structure but clean formatting
106
-
107
- 4. **EXAMPLES**
108
-
109
- BAD (verbose, repetitive):
110
- "The document describes various AI and machine learning projects. It talks about healthcare insurance cost prediction. It mentions credit risk modeling. It discusses sentiment analysis with DistilBERT. It explains a multi-LLM chatbot..."
111
-
112
- GOOD (clean, concise):
113
- "This is a professional resume showcasing ML/AI projects including healthcare cost prediction (98% accuracy), credit risk modeling, sentiment analysis with DistilBERT (90% accuracy), and a multi-LLM agentic chatbot. Skills include Python, PyTorch, LangChain, and AWS deployment."
114
-
115
- 5. **NEVER**:
116
- - Return raw data dumps
117
- - Repeat the same information in different words
118
- - Include meta-commentary about summarization
119
- - Show internal tool responses verbatim
120
-
121
- Your output should feel like a knowledgeable human answering, not a bot processing data.
122
- """
123
- )
124
 
125
 
126
  # =====================================================
127
- # STATE
128
  # =====================================================
129
 
130
  class ChatState(TypedDict):
@@ -132,122 +83,45 @@ class ChatState(TypedDict):
132
 
133
 
134
  # =====================================================
135
- # LLM
136
  # =====================================================
137
 
138
  llm = ChatOpenAI(
139
- model="gpt-4o-mini",
140
- temperature=0.3,
141
  streaming=True
142
  )
143
 
144
- summarizer_llm = ChatOpenAI(
145
- model="gpt-4o-mini",
146
- temperature=0.2,
147
- streaming=True
148
- )
149
 
150
 
151
  # =====================================================
152
- # GRAPH BUILDER
153
  # =====================================================
154
 
 
 
 
 
 
 
 
 
 
 
155
  memory = MemorySaver()
156
- app = None
157
-
158
-
159
- def build_graph():
160
- global app
161
-
162
- rag_tool = create_rag_tool()
163
-
164
- tools = [
165
- rag_tool,
166
- get_stock_price,
167
- calculator,
168
- wikipedia_search,
169
- arxiv_search,
170
- tavily_search,
171
- convert_currency,
172
- unit_converter,
173
- get_news,
174
- get_joke,
175
- get_quote,
176
- get_weather,
177
- ]
178
-
179
- llm_with_tools = llm.bind_tools(tools)
180
- tool_node = ToolNode(tools)
181
-
182
- # =====================================================
183
- # CHATBOT NODE
184
- # =====================================================
185
- def chatbot(state: ChatState):
186
- messages = [SYSTEM_PROMPT] + state["messages"]
187
- response = llm_with_tools.invoke(messages)
188
- return {"messages": [response]}
189
-
190
- # =====================================================
191
- # SUMMARIZER NODE (πŸ”₯ YOUR BRILLIANT IDEA!)
192
- # =====================================================
193
- def summarizer(state: ChatState):
194
- """
195
- Takes tool results and produces clean, user-friendly summaries.
196
- """
197
- messages = state["messages"]
198
-
199
- # Get the last few messages (user query + tool results)
200
- recent_context = messages[-5:] # Adjust as needed
201
-
202
- # Build summarization request
203
- summarize_request = [SUMMARIZER_PROMPT] + recent_context + [
204
- HumanMessage(content="Based on the tool results above, provide a clean, concise answer to the user's question. Do not include any meta-commentary or mention tools.")
205
- ]
206
-
207
- # Get clean summary
208
- summary = summarizer_llm.invoke(summarize_request)
209
-
210
- # Replace the last AI message with the clean summary
211
- return {"messages": [summary]}
212
-
213
- # =====================================================
214
- # ROUTING LOGIC
215
- # =====================================================
216
- def route_after_chat(state: ChatState):
217
- """
218
- Decide if we need to call tools or if we're done.
219
- """
220
- last_message = state["messages"][-1]
221
-
222
- # If the AI called tools, go to tools node
223
- if hasattr(last_message, "tool_calls") and last_message.tool_calls:
224
- return "tools"
225
-
226
- # Otherwise, we're done
227
- return END
228
-
229
- # =====================================================
230
- # BUILD GRAPH
231
- # =====================================================
232
- graph = StateGraph(ChatState)
233
-
234
- graph.add_node("chat", chatbot)
235
- graph.add_node("tools", tool_node)
236
- graph.add_node("summarizer", summarizer)
237
-
238
- # Flow: START -> chat -> [tools -> summarizer -> chat] -> END
239
- graph.add_edge(START, "chat")
240
- graph.add_conditional_edges("chat", route_after_chat)
241
- graph.add_edge("tools", "summarizer")
242
- graph.add_edge("summarizer", "chat")
243
-
244
- app = graph.compile(checkpointer=memory)
245
-
246
-
247
- # Initial build
248
- build_graph()
249
-
250
-
251
- def rebuild_graph():
252
- """Rebuild graph when new document is uploaded"""
253
- build_graph()
 
1
+
2
  from typing import TypedDict, Annotated
3
+ from langchain_core.messages import (
4
+ BaseMessage,
5
+ SystemMessage
 
 
 
 
 
 
 
 
 
 
 
 
6
  )
7
+ from langgraph.checkpoint.memory import MemorySaver
8
+ from tools import retriever, create_rag_tool, arxiv_search, calculator, get_stock_price, wikipedia_search, tavily_search, convert_currency, unit_converter, get_news, get_joke, get_quote, get_weather
9
  from langchain_openai import ChatOpenAI
10
  from langgraph.graph import StateGraph, START, END
11
  from langgraph.graph.message import add_messages
12
  from langgraph.prebuilt import ToolNode, tools_condition
13
  from dotenv import load_dotenv
14
  import os
 
15
  load_dotenv()
16
 
17
+
18
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
19
 
20
  # =====================================================
21
+ # 1️⃣ SYSTEM PROMPT
22
  # =====================================================
23
 
24
  SYSTEM_PROMPT = SystemMessage(
25
  content="""
26
+ You are an intelligent AI assistant built by Junaid.
27
+
28
+ Your role is to provide clear, concise, and human-friendly explanations.
29
+
30
+ ━━━━━━━━━━━━━━━━━━━━━━
31
+ πŸ”Ή DOCUMENT HANDLING RULES (VERY IMPORTANT)
32
+ ━━━━━━━━━━━━━━━━━━━━━━
33
+ When using retrieved documents:
34
+
35
+ 1. NEVER repeat raw document text verbatim.
36
+ 2. NEVER list large copied sections from documents.
37
+ 3. ALWAYS summarize and interpret information in your own words.
38
+ 4. Organize information logically and clearly.
39
+ 5. Focus on meaning, not raw content.
40
+
41
+ If the user asks:
42
+ - "What is this document about?"
43
+ β†’ Provide a high-level summary (3–6 sentences).
44
+
45
+ - "Explain the document"
46
+ β†’ Provide structured explanation with sections.
47
+
48
+ - "List key points"
49
+ β†’ Provide clean bullet points (max 6).
50
+
51
+ ━━━━━━━━━━━━━━━━━━━━━━
52
+ πŸ”Ή RAG PRIORITY
53
+ ━━━━━━━━━━━━━━━━━━━━━━
54
+ - Use retrieved content as your *knowledge base*.
55
+ - Do NOT hallucinate.
56
+ - If the document does not contain the answer, say so clearly.
57
+
58
+ ━━━━━━━━━━━━━━━━━━━━━━
59
+ πŸ”Ή COMMUNICATION STYLE
60
+ ━━━━━━━━━━━━━━━━━━━━━━
61
+ - Be concise, human, and clear.
62
+ - Avoid repetition.
63
+ - Avoid technical verbosity unless requested.
64
+ - Prefer clarity over completeness.
65
+
66
+ ━━━━━━━━━━━━━━━━━━━━━━
67
+ πŸ”Ή IDENTITY
68
+ ━━━━━━━━━━━━━━━��━━━━━━
69
+ You are the official AI assistant of Junaid’s AI system.
70
+ You help users understand complex information simply and accurately.
 
 
 
71
  """
72
  )
73
 
 
 
 
 
 
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
 
77
  # =====================================================
78
+ # 4️⃣ STATE
79
  # =====================================================
80
 
81
  class ChatState(TypedDict):
 
83
 
84
 
85
  # =====================================================
86
+ # 5️⃣ LLM + TOOLS
87
  # =====================================================
88
 
89
  llm = ChatOpenAI(
90
+ model="gpt-4.1-nano",
91
+ temperature=0.4,
92
  streaming=True
93
  )
94
 
95
+ rag_tool = create_rag_tool()
96
+
97
+ tools = [rag_tool, get_stock_price, calculator, wikipedia_search, arxiv_search, tavily_search, convert_currency, unit_converter, get_news, get_joke, get_quote, get_weather]
98
+ llm = llm.bind_tools(tools)
99
+ tool_node = ToolNode(tools)
100
 
101
 
102
  # =====================================================
103
+ # 6️⃣ CHAT NODE
104
  # =====================================================
105
 
106
+ def chatbot(state: ChatState):
107
+ messages = [SYSTEM_PROMPT] + state["messages"]
108
+ response = llm.invoke(messages)
109
+ return {"messages": [response]}
110
+
111
+
112
+
113
+ # =====================================================
114
+ # 7️⃣ GRAPH
115
+ # =====================================================
116
  memory = MemorySaver()
117
+ graph = StateGraph(ChatState)
118
+
119
+ graph.add_node("chat", chatbot)
120
+ graph.add_node("tools", tool_node)
121
+
122
+ graph.add_edge(START, "chat")
123
+ graph.add_conditional_edges("chat", tools_condition)
124
+ graph.add_edge("tools", "chat")
125
+ graph.add_edge("chat", END)
126
+
127
+ app = graph.compile(checkpointer=memory)