pratikcsv commited on
Commit
3bc934b
·
1 Parent(s): b14c653

added chatbot with web search functionality

Browse files
.github/workflows/main.yml CHANGED
@@ -2,6 +2,7 @@ name: Sync to Hugging Face Space
2
  on:
3
  push:
4
  branches: [main]
 
5
  workflow_dispatch:
6
 
7
  jobs:
@@ -10,8 +11,8 @@ jobs:
10
  steps:
11
  - uses: actions/checkout@v3
12
  with:
13
- fetch-depth: 0
14
- lfs: true
15
 
16
  - name: Install git-filter-repo
17
  run: pip install git-filter-repo
@@ -19,10 +20,17 @@ jobs:
19
  - name: Remove large file from history
20
  run: git filter-repo --path "Rag_Documents/layout-parser-paper.pdf" --invert-paths --force
21
 
22
- - name: Push to Hugging Face Spaces
23
- uses: huggingface/huggingface_hub@main
24
- with:
25
- target_repo: bpratik/Chatbot
26
- token: ${{ secrets.HF_TOKEN }}
27
- repo_type: space
28
- space_sdk: streamlit
 
 
 
 
 
 
 
 
2
  on:
3
  push:
4
  branches: [main]
5
+
6
  workflow_dispatch:
7
 
8
  jobs:
 
11
  steps:
12
  - uses: actions/checkout@v3
13
  with:
14
+ fetch-depth: 0 # Required for history rewriting
15
+ lfs: true # Enable Git LFS support
16
 
17
  - name: Install git-filter-repo
18
  run: pip install git-filter-repo
 
20
  - name: Remove large file from history
21
  run: git filter-repo --path "Rag_Documents/layout-parser-paper.pdf" --invert-paths --force
22
 
23
+ - name: Log in to Hugging Face Hub
24
+ env:
25
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
26
+ run: |
27
+ pip install -U "huggingface_hub>=0.20.0"
28
+ git config --global credential.helper store
29
+ echo "https://bpratik:$HF_TOKEN@huggingface.co" > ~/.git-credentials
30
+
31
+ - name: Push to hub
32
+ env:
33
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
34
+ run: |
35
+ git remote add hf https://huggingface.co/spaces/bpratik/Chatbot
36
+ git push --force hf main
README.md CHANGED
@@ -206,24 +206,10 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
206
 
207
  ## 🚧 Future Enhancements
208
 
209
- - [ ] **Memory/History Implementation**: Add persistent conversation memory using LangChain's built-in memory features
210
- - [ ] **Web Search Integration**: Implement web search capabilities for the chatbot
211
  - [ ] **File Upload Support**: Allow users to upload and chat about documents
212
  - [ ] **Multiple Conversation Sessions**: Support for multiple concurrent chat sessions
213
  - [ ] **Custom Model Integration**: Support for additional LLM providers
214
  - [ ] **Chat Export**: Export conversation history to various formats
215
 
216
- ## 📞 Support
217
-
218
- If you encounter any issues or have questions, please:
219
- 1. Check the troubleshooting section above
220
- 2. Search existing GitHub issues
221
- 3. Create a new issue with detailed information about the problem
222
-
223
- ## 🙏 Acknowledgments
224
-
225
- - [LangChain](https://langchain.com/) for the AI framework
226
- - [LangGraph](https://langchain-ai.github.io/langgraph/) for state graph implementation
227
- - [Streamlit](https://streamlit.io/) for the web interface
228
- - [Groq](https://groq.com/) for fast inference
229
- - [OpenAI](https://openai.com/) for GPT models
 
206
 
207
  ## 🚧 Future Enhancements
208
 
209
+ - [x] **Memory/History Implementation**: Add persistent conversation memory using LangChain's built-in memory features
210
+ - [x] **Web Search Integration**: Implement web search capabilities for the chatbot
211
  - [ ] **File Upload Support**: Allow users to upload and chat about documents
212
  - [ ] **Multiple Conversation Sessions**: Support for multiple concurrent chat sessions
213
  - [ ] **Custom Model Integration**: Support for additional LLM providers
214
  - [ ] **Chat Export**: Export conversation history to various formats
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -6,3 +6,4 @@ langchain_openai
6
  langchain_groq
7
  langchain_core
8
  streamlit
 
 
6
  langchain_groq
7
  langchain_core
8
  streamlit
9
+ tavily-python
src/__pycache__/main.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/main.cpython-312.pyc and b/src/__pycache__/main.cpython-312.pyc differ
 
src/graph/graph_builder.py CHANGED
@@ -4,15 +4,17 @@ from langchain_core.prompts import ChatPromptTemplate
4
  import datetime
5
  from src.state.state import State
6
  from src.nodes.basic_chatbot import BasicChatbot
 
7
 
8
 
9
  class GraphBuilder:
10
 
11
  """Class to build the state graph for the application."""
12
 
13
- def __init__(self, model, session_id: str = "default"):
14
  self.llm = model
15
  self.session_id = session_id
 
16
  self.graph_builder = StateGraph(State)
17
 
18
  def basic_chatbot(self):
@@ -22,6 +24,12 @@ class GraphBuilder:
22
  self.graph_builder.add_edge(START, 'basic_chatbot')
23
  self.graph_builder.add_edge('basic_chatbot', END)
24
 
 
 
 
 
 
 
25
  def setup_graph(self, use_case: str):
26
  """
27
  Setup the graph with the appropriate nodes based on use case.
@@ -31,8 +39,9 @@ class GraphBuilder:
31
 
32
  if use_case == 'Basic Chatbot':
33
  self.basic_chatbot()
 
 
34
  else:
35
- # Default to basic chatbot if use case is not recognized
36
  self.basic_chatbot()
37
 
38
  # Compile and return the graph
 
4
  import datetime
5
  from src.state.state import State
6
  from src.nodes.basic_chatbot import BasicChatbot
7
+ from src.nodes.websearch_chatbot import WebSearchChatbot
8
 
9
 
10
  class GraphBuilder:
11
 
12
  """Class to build the state graph for the application."""
13
 
14
+ def __init__(self, model, session_id: str = "default", tavily_api_key: str = None):
15
  self.llm = model
16
  self.session_id = session_id
17
+ self.tavily_api_key = tavily_api_key
18
  self.graph_builder = StateGraph(State)
19
 
20
  def basic_chatbot(self):
 
24
  self.graph_builder.add_edge(START, 'basic_chatbot')
25
  self.graph_builder.add_edge('basic_chatbot', END)
26
 
27
+ def websearch_chatbot(self):
28
+ self.websearch_chatbot_node = WebSearchChatbot(self.llm, self.session_id, self.tavily_api_key)
29
+ self.graph_builder.add_node('websearch_chatbot', self.websearch_chatbot_node.process)
30
+ self.graph_builder.add_edge(START, 'websearch_chatbot')
31
+ self.graph_builder.add_edge('websearch_chatbot', END)
32
+
33
  def setup_graph(self, use_case: str):
34
  """
35
  Setup the graph with the appropriate nodes based on use case.
 
39
 
40
  if use_case == 'Basic Chatbot':
41
  self.basic_chatbot()
42
+ elif use_case == 'Chatbot with Web Search':
43
+ self.websearch_chatbot()
44
  else:
 
45
  self.basic_chatbot()
46
 
47
  # Compile and return the graph
src/main.py CHANGED
@@ -81,7 +81,13 @@ def load_app():
81
  model, st.session_state.session_id
82
  )
83
 
84
- graph_builder = GraphBuilder(model=memory_enabled_model, session_id=st.session_state.session_id)
 
 
 
 
 
 
85
 
86
  try:
87
  graph = graph_builder.setup_graph(use_case=use_case)
 
81
  model, st.session_state.session_id
82
  )
83
 
84
+ tavily_api_key = user_input.get('Tavily API Key', '')
85
+
86
+ graph_builder = GraphBuilder(
87
+ model=memory_enabled_model,
88
+ session_id=st.session_state.session_id,
89
+ tavily_api_key=tavily_api_key
90
+ )
91
 
92
  try:
93
  graph = graph_builder.setup_graph(use_case=use_case)
src/nodes/websearch_chatbot.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.state.state import State
2
+ from src.tools.websearch import WebSearchTool
3
+ from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
4
+ from langchain.tools import BaseTool
5
+
6
+ class WebSearchChatbot:
7
+ def __init__(self, model, session_id: str = "default", tavily_api_key: str = None):
8
+ self.model = model
9
+ self.session_id = session_id
10
+ self.memory_config = {"configurable": {"session_id": session_id}}
11
+
12
+ if tavily_api_key and tavily_api_key.strip():
13
+ try:
14
+ self.web_search = WebSearchTool(tavily_api_key)
15
+ self.tools = [self.web_search.get_tool()]
16
+ self.model_with_tools = model.bind_tools(self.tools)
17
+ self.has_search = True
18
+ except Exception as e:
19
+ self.model_with_tools = model
20
+ self.has_search = False
21
+ else:
22
+ self.model_with_tools = model
23
+ self.has_search = False
24
+
25
+ def process(self, state):
26
+ messages = state['messages']
27
+ if not messages:
28
+ return state
29
+
30
+ if not self.has_search:
31
+ # If no search capability, add a message about it
32
+ last_message = messages[-1]
33
+ if hasattr(last_message, 'content') and any(keyword in last_message.content.lower() for keyword in ['search', 'find', 'latest', 'current', 'news']):
34
+ search_disclaimer = "I don't have web search capabilities enabled. Please provide a Tavily API key to search for current information."
35
+ response_content = f"{search_disclaimer}\n\nBased on my training data, I can still help with general questions."
36
+ from langchain_core.messages import AIMessage
37
+ return {'messages': AIMessage(content=response_content)}
38
+
39
+ response = self.model_with_tools.invoke(messages, config=self.memory_config)
40
+
41
+ if hasattr(response, 'tool_calls') and response.tool_calls:
42
+ messages.append(response)
43
+
44
+ for tool_call in response.tool_calls:
45
+ tool_result = self._execute_tool_call(tool_call)
46
+ tool_message = ToolMessage(
47
+ content=str(tool_result),
48
+ tool_call_id=tool_call['id']
49
+ )
50
+ messages.append(tool_message)
51
+
52
+ final_response = self.model_with_tools.invoke(messages, config=self.memory_config)
53
+ return {'messages': final_response}
54
+
55
+ return {'messages': response}
56
+
57
+ def _execute_tool_call(self, tool_call):
58
+ if tool_call['name'] == 'tavily_search_results_json':
59
+ return self.web_search.search_tool.invoke(tool_call['args'])
60
+ return "Tool not found"
src/tools/websearch.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.tools.tavily_search import TavilySearchResults
2
+ from langchain_core.tools import tool
3
+ import os
4
+
5
+ class WebSearchTool:
6
+ def __init__(self, api_key: str):
7
+ self.api_key = api_key
8
+ if not api_key:
9
+ raise ValueError("Tavily API key is required for web search functionality")
10
+
11
+ # Set the API key as environment variable for TavilySearchResults
12
+ os.environ["TAVILY_API_KEY"] = api_key
13
+
14
+ self.search_tool = TavilySearchResults(
15
+ max_results=5
16
+ )
17
+
18
+ def search_web(self, query: str) -> str:
19
+ """Search the web for current information about the given query."""
20
+ try:
21
+ results = self.search_tool.invoke(query)
22
+ if not results:
23
+ return "No search results found."
24
+
25
+ formatted_results = []
26
+ for result in results:
27
+ if isinstance(result, dict):
28
+ title = result.get('title', 'N/A')
29
+ content = result.get('content', 'N/A')
30
+ url = result.get('url', 'N/A')
31
+ formatted_results.append(f"Title: {title}\nContent: {content}\nURL: {url}\n")
32
+ else:
33
+ formatted_results.append(str(result))
34
+
35
+ return "\n".join(formatted_results)
36
+ except Exception as e:
37
+ return f"Error searching the web: {str(e)}"
38
+
39
+ def get_tool(self):
40
+ return self.search_tool
src/ui/__pycache__/load.cpython-312.pyc CHANGED
Binary files a/src/ui/__pycache__/load.cpython-312.pyc and b/src/ui/__pycache__/load.cpython-312.pyc differ
 
src/ui/config.ini CHANGED
@@ -1,5 +1,5 @@
1
  [DEFAULT]
2
- Title = Basic Chatbot
3
  USE_CASE = Basic Chatbot, Chatbot with Web Search
4
  LLM_options = Groq, OpenAI
5
 
 
1
  [DEFAULT]
2
+ Title = AI Chatbot with Web Search
3
  USE_CASE = Basic Chatbot, Chatbot with Web Search
4
  LLM_options = Groq, OpenAI
5
 
src/ui/load.py CHANGED
@@ -62,6 +62,12 @@ class LoadStreamlitUI:
62
  # Use Case Selection
63
  self.user_controls['Selected Use Case'] = st.selectbox('Select Use Case', use_case)
64
 
 
 
 
 
 
 
65
  # Memory Management Section
66
  st.divider()
67
  st.subheader("💭 Memory Management")
 
62
  # Use Case Selection
63
  self.user_controls['Selected Use Case'] = st.selectbox('Select Use Case', use_case)
64
 
65
+ # Tavily API Key for Web Search
66
+ if self.user_controls['Selected Use Case'] == 'Chatbot with Web Search':
67
+ self.user_controls['Tavily API Key'] = st.text_input('Enter Tavily API Key for Web Search', type='password')
68
+ if not self.user_controls['Tavily API Key']:
69
+ st.warning('Tavily API key is required for web search functionality. Get one at https://tavily.com')
70
+
71
  # Memory Management Section
72
  st.divider()
73
  st.subheader("💭 Memory Management")