added chatbot with web search functionality
Browse files- .github/workflows/main.yml +17 -9
- README.md +2 -16
- requirements.txt +1 -0
- src/__pycache__/main.cpython-312.pyc +0 -0
- src/graph/graph_builder.py +11 -2
- src/main.py +7 -1
- src/nodes/websearch_chatbot.py +60 -0
- src/tools/websearch.py +40 -0
- src/ui/__pycache__/load.cpython-312.pyc +0 -0
- src/ui/config.ini +1 -1
- src/ui/load.py +6 -0
.github/workflows/main.yml
CHANGED
|
@@ -2,6 +2,7 @@ name: Sync to Hugging Face Space
|
|
| 2 |
on:
|
| 3 |
push:
|
| 4 |
branches: [main]
|
|
|
|
| 5 |
workflow_dispatch:
|
| 6 |
|
| 7 |
jobs:
|
|
@@ -10,8 +11,8 @@ jobs:
|
|
| 10 |
steps:
|
| 11 |
- uses: actions/checkout@v3
|
| 12 |
with:
|
| 13 |
-
fetch-depth: 0
|
| 14 |
-
lfs: true
|
| 15 |
|
| 16 |
- name: Install git-filter-repo
|
| 17 |
run: pip install git-filter-repo
|
|
@@ -19,10 +20,17 @@ jobs:
|
|
| 19 |
- name: Remove large file from history
|
| 20 |
run: git filter-repo --path "Rag_Documents/layout-parser-paper.pdf" --invert-paths --force
|
| 21 |
|
| 22 |
-
- name:
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
on:
|
| 3 |
push:
|
| 4 |
branches: [main]
|
| 5 |
+
|
| 6 |
workflow_dispatch:
|
| 7 |
|
| 8 |
jobs:
|
|
|
|
| 11 |
steps:
|
| 12 |
- uses: actions/checkout@v3
|
| 13 |
with:
|
| 14 |
+
fetch-depth: 0 # Required for history rewriting
|
| 15 |
+
lfs: true # Enable Git LFS support
|
| 16 |
|
| 17 |
- name: Install git-filter-repo
|
| 18 |
run: pip install git-filter-repo
|
|
|
|
| 20 |
- name: Remove large file from history
|
| 21 |
run: git filter-repo --path "Rag_Documents/layout-parser-paper.pdf" --invert-paths --force
|
| 22 |
|
| 23 |
+
- name: Log in to Hugging Face Hub
|
| 24 |
+
env:
|
| 25 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 26 |
+
run: |
|
| 27 |
+
pip install -U "huggingface_hub>=0.20.0"
|
| 28 |
+
git config --global credential.helper store
|
| 29 |
+
echo "https://bpratik:$HF_TOKEN@huggingface.co" > ~/.git-credentials
|
| 30 |
+
|
| 31 |
+
- name: Push to hub
|
| 32 |
+
env:
|
| 33 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 34 |
+
run: |
|
| 35 |
+
git remote add hf https://huggingface.co/spaces/bpratik/Chatbot
|
| 36 |
+
git push --force hf main
|
README.md
CHANGED
|
@@ -206,24 +206,10 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
| 206 |
|
| 207 |
## 🚧 Future Enhancements
|
| 208 |
|
| 209 |
-
- [
|
| 210 |
-
- [
|
| 211 |
- [ ] **File Upload Support**: Allow users to upload and chat about documents
|
| 212 |
- [ ] **Multiple Conversation Sessions**: Support for multiple concurrent chat sessions
|
| 213 |
- [ ] **Custom Model Integration**: Support for additional LLM providers
|
| 214 |
- [ ] **Chat Export**: Export conversation history to various formats
|
| 215 |
|
| 216 |
-
## 📞 Support
|
| 217 |
-
|
| 218 |
-
If you encounter any issues or have questions, please:
|
| 219 |
-
1. Check the troubleshooting section above
|
| 220 |
-
2. Search existing GitHub issues
|
| 221 |
-
3. Create a new issue with detailed information about the problem
|
| 222 |
-
|
| 223 |
-
## 🙏 Acknowledgments
|
| 224 |
-
|
| 225 |
-
- [LangChain](https://langchain.com/) for the AI framework
|
| 226 |
-
- [LangGraph](https://langchain-ai.github.io/langgraph/) for state graph implementation
|
| 227 |
-
- [Streamlit](https://streamlit.io/) for the web interface
|
| 228 |
-
- [Groq](https://groq.com/) for fast inference
|
| 229 |
-
- [OpenAI](https://openai.com/) for GPT models
|
|
|
|
| 206 |
|
| 207 |
## 🚧 Future Enhancements
|
| 208 |
|
| 209 |
+
- [x] **Memory/History Implementation**: Add persistent conversation memory using LangChain's built-in memory features
|
| 210 |
+
- [x] **Web Search Integration**: Implement web search capabilities for the chatbot
|
| 211 |
- [ ] **File Upload Support**: Allow users to upload and chat about documents
|
| 212 |
- [ ] **Multiple Conversation Sessions**: Support for multiple concurrent chat sessions
|
| 213 |
- [ ] **Custom Model Integration**: Support for additional LLM providers
|
| 214 |
- [ ] **Chat Export**: Export conversation history to various formats
|
| 215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -6,3 +6,4 @@ langchain_openai
|
|
| 6 |
langchain_groq
|
| 7 |
langchain_core
|
| 8 |
streamlit
|
|
|
|
|
|
| 6 |
langchain_groq
|
| 7 |
langchain_core
|
| 8 |
streamlit
|
| 9 |
+
tavily-python
|
src/__pycache__/main.cpython-312.pyc
CHANGED
|
Binary files a/src/__pycache__/main.cpython-312.pyc and b/src/__pycache__/main.cpython-312.pyc differ
|
|
|
src/graph/graph_builder.py
CHANGED
|
@@ -4,15 +4,17 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
| 4 |
import datetime
|
| 5 |
from src.state.state import State
|
| 6 |
from src.nodes.basic_chatbot import BasicChatbot
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
class GraphBuilder:
|
| 10 |
|
| 11 |
"""Class to build the state graph for the application."""
|
| 12 |
|
| 13 |
-
def __init__(self, model, session_id: str = "default"):
|
| 14 |
self.llm = model
|
| 15 |
self.session_id = session_id
|
|
|
|
| 16 |
self.graph_builder = StateGraph(State)
|
| 17 |
|
| 18 |
def basic_chatbot(self):
|
|
@@ -22,6 +24,12 @@ class GraphBuilder:
|
|
| 22 |
self.graph_builder.add_edge(START, 'basic_chatbot')
|
| 23 |
self.graph_builder.add_edge('basic_chatbot', END)
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def setup_graph(self, use_case: str):
|
| 26 |
"""
|
| 27 |
Setup the graph with the appropriate nodes based on use case.
|
|
@@ -31,8 +39,9 @@ class GraphBuilder:
|
|
| 31 |
|
| 32 |
if use_case == 'Basic Chatbot':
|
| 33 |
self.basic_chatbot()
|
|
|
|
|
|
|
| 34 |
else:
|
| 35 |
-
# Default to basic chatbot if use case is not recognized
|
| 36 |
self.basic_chatbot()
|
| 37 |
|
| 38 |
# Compile and return the graph
|
|
|
|
| 4 |
import datetime
|
| 5 |
from src.state.state import State
|
| 6 |
from src.nodes.basic_chatbot import BasicChatbot
|
| 7 |
+
from src.nodes.websearch_chatbot import WebSearchChatbot
|
| 8 |
|
| 9 |
|
| 10 |
class GraphBuilder:
|
| 11 |
|
| 12 |
"""Class to build the state graph for the application."""
|
| 13 |
|
| 14 |
+
def __init__(self, model, session_id: str = "default", tavily_api_key: str = None):
|
| 15 |
self.llm = model
|
| 16 |
self.session_id = session_id
|
| 17 |
+
self.tavily_api_key = tavily_api_key
|
| 18 |
self.graph_builder = StateGraph(State)
|
| 19 |
|
| 20 |
def basic_chatbot(self):
|
|
|
|
| 24 |
self.graph_builder.add_edge(START, 'basic_chatbot')
|
| 25 |
self.graph_builder.add_edge('basic_chatbot', END)
|
| 26 |
|
| 27 |
+
def websearch_chatbot(self):
|
| 28 |
+
self.websearch_chatbot_node = WebSearchChatbot(self.llm, self.session_id, self.tavily_api_key)
|
| 29 |
+
self.graph_builder.add_node('websearch_chatbot', self.websearch_chatbot_node.process)
|
| 30 |
+
self.graph_builder.add_edge(START, 'websearch_chatbot')
|
| 31 |
+
self.graph_builder.add_edge('websearch_chatbot', END)
|
| 32 |
+
|
| 33 |
def setup_graph(self, use_case: str):
|
| 34 |
"""
|
| 35 |
Setup the graph with the appropriate nodes based on use case.
|
|
|
|
| 39 |
|
| 40 |
if use_case == 'Basic Chatbot':
|
| 41 |
self.basic_chatbot()
|
| 42 |
+
elif use_case == 'Chatbot with Web Search':
|
| 43 |
+
self.websearch_chatbot()
|
| 44 |
else:
|
|
|
|
| 45 |
self.basic_chatbot()
|
| 46 |
|
| 47 |
# Compile and return the graph
|
src/main.py
CHANGED
|
@@ -81,7 +81,13 @@ def load_app():
|
|
| 81 |
model, st.session_state.session_id
|
| 82 |
)
|
| 83 |
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
try:
|
| 87 |
graph = graph_builder.setup_graph(use_case=use_case)
|
|
|
|
| 81 |
model, st.session_state.session_id
|
| 82 |
)
|
| 83 |
|
| 84 |
+
tavily_api_key = user_input.get('Tavily API Key', '')
|
| 85 |
+
|
| 86 |
+
graph_builder = GraphBuilder(
|
| 87 |
+
model=memory_enabled_model,
|
| 88 |
+
session_id=st.session_state.session_id,
|
| 89 |
+
tavily_api_key=tavily_api_key
|
| 90 |
+
)
|
| 91 |
|
| 92 |
try:
|
| 93 |
graph = graph_builder.setup_graph(use_case=use_case)
|
src/nodes/websearch_chatbot.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.state.state import State
|
| 2 |
+
from src.tools.websearch import WebSearchTool
|
| 3 |
+
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
|
| 4 |
+
from langchain.tools import BaseTool
|
| 5 |
+
|
| 6 |
+
class WebSearchChatbot:
|
| 7 |
+
def __init__(self, model, session_id: str = "default", tavily_api_key: str = None):
|
| 8 |
+
self.model = model
|
| 9 |
+
self.session_id = session_id
|
| 10 |
+
self.memory_config = {"configurable": {"session_id": session_id}}
|
| 11 |
+
|
| 12 |
+
if tavily_api_key and tavily_api_key.strip():
|
| 13 |
+
try:
|
| 14 |
+
self.web_search = WebSearchTool(tavily_api_key)
|
| 15 |
+
self.tools = [self.web_search.get_tool()]
|
| 16 |
+
self.model_with_tools = model.bind_tools(self.tools)
|
| 17 |
+
self.has_search = True
|
| 18 |
+
except Exception as e:
|
| 19 |
+
self.model_with_tools = model
|
| 20 |
+
self.has_search = False
|
| 21 |
+
else:
|
| 22 |
+
self.model_with_tools = model
|
| 23 |
+
self.has_search = False
|
| 24 |
+
|
| 25 |
+
def process(self, state):
|
| 26 |
+
messages = state['messages']
|
| 27 |
+
if not messages:
|
| 28 |
+
return state
|
| 29 |
+
|
| 30 |
+
if not self.has_search:
|
| 31 |
+
# If no search capability, add a message about it
|
| 32 |
+
last_message = messages[-1]
|
| 33 |
+
if hasattr(last_message, 'content') and any(keyword in last_message.content.lower() for keyword in ['search', 'find', 'latest', 'current', 'news']):
|
| 34 |
+
search_disclaimer = "I don't have web search capabilities enabled. Please provide a Tavily API key to search for current information."
|
| 35 |
+
response_content = f"{search_disclaimer}\n\nBased on my training data, I can still help with general questions."
|
| 36 |
+
from langchain_core.messages import AIMessage
|
| 37 |
+
return {'messages': AIMessage(content=response_content)}
|
| 38 |
+
|
| 39 |
+
response = self.model_with_tools.invoke(messages, config=self.memory_config)
|
| 40 |
+
|
| 41 |
+
if hasattr(response, 'tool_calls') and response.tool_calls:
|
| 42 |
+
messages.append(response)
|
| 43 |
+
|
| 44 |
+
for tool_call in response.tool_calls:
|
| 45 |
+
tool_result = self._execute_tool_call(tool_call)
|
| 46 |
+
tool_message = ToolMessage(
|
| 47 |
+
content=str(tool_result),
|
| 48 |
+
tool_call_id=tool_call['id']
|
| 49 |
+
)
|
| 50 |
+
messages.append(tool_message)
|
| 51 |
+
|
| 52 |
+
final_response = self.model_with_tools.invoke(messages, config=self.memory_config)
|
| 53 |
+
return {'messages': final_response}
|
| 54 |
+
|
| 55 |
+
return {'messages': response}
|
| 56 |
+
|
| 57 |
+
def _execute_tool_call(self, tool_call):
|
| 58 |
+
if tool_call['name'] == 'tavily_search_results_json':
|
| 59 |
+
return self.web_search.search_tool.invoke(tool_call['args'])
|
| 60 |
+
return "Tool not found"
|
src/tools/websearch.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 2 |
+
from langchain_core.tools import tool
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
class WebSearchTool:
|
| 6 |
+
def __init__(self, api_key: str):
|
| 7 |
+
self.api_key = api_key
|
| 8 |
+
if not api_key:
|
| 9 |
+
raise ValueError("Tavily API key is required for web search functionality")
|
| 10 |
+
|
| 11 |
+
# Set the API key as environment variable for TavilySearchResults
|
| 12 |
+
os.environ["TAVILY_API_KEY"] = api_key
|
| 13 |
+
|
| 14 |
+
self.search_tool = TavilySearchResults(
|
| 15 |
+
max_results=5
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
def search_web(self, query: str) -> str:
|
| 19 |
+
"""Search the web for current information about the given query."""
|
| 20 |
+
try:
|
| 21 |
+
results = self.search_tool.invoke(query)
|
| 22 |
+
if not results:
|
| 23 |
+
return "No search results found."
|
| 24 |
+
|
| 25 |
+
formatted_results = []
|
| 26 |
+
for result in results:
|
| 27 |
+
if isinstance(result, dict):
|
| 28 |
+
title = result.get('title', 'N/A')
|
| 29 |
+
content = result.get('content', 'N/A')
|
| 30 |
+
url = result.get('url', 'N/A')
|
| 31 |
+
formatted_results.append(f"Title: {title}\nContent: {content}\nURL: {url}\n")
|
| 32 |
+
else:
|
| 33 |
+
formatted_results.append(str(result))
|
| 34 |
+
|
| 35 |
+
return "\n".join(formatted_results)
|
| 36 |
+
except Exception as e:
|
| 37 |
+
return f"Error searching the web: {str(e)}"
|
| 38 |
+
|
| 39 |
+
def get_tool(self):
|
| 40 |
+
return self.search_tool
|
src/ui/__pycache__/load.cpython-312.pyc
CHANGED
|
Binary files a/src/ui/__pycache__/load.cpython-312.pyc and b/src/ui/__pycache__/load.cpython-312.pyc differ
|
|
|
src/ui/config.ini
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
[DEFAULT]
|
| 2 |
-
Title =
|
| 3 |
USE_CASE = Basic Chatbot, Chatbot with Web Search
|
| 4 |
LLM_options = Groq, OpenAI
|
| 5 |
|
|
|
|
| 1 |
[DEFAULT]
|
| 2 |
+
Title = AI Chatbot with Web Search
|
| 3 |
USE_CASE = Basic Chatbot, Chatbot with Web Search
|
| 4 |
LLM_options = Groq, OpenAI
|
| 5 |
|
src/ui/load.py
CHANGED
|
@@ -62,6 +62,12 @@ class LoadStreamlitUI:
|
|
| 62 |
# Use Case Selection
|
| 63 |
self.user_controls['Selected Use Case'] = st.selectbox('Select Use Case', use_case)
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
# Memory Management Section
|
| 66 |
st.divider()
|
| 67 |
st.subheader("💭 Memory Management")
|
|
|
|
| 62 |
# Use Case Selection
|
| 63 |
self.user_controls['Selected Use Case'] = st.selectbox('Select Use Case', use_case)
|
| 64 |
|
| 65 |
+
# Tavily API Key for Web Search
|
| 66 |
+
if self.user_controls['Selected Use Case'] == 'Chatbot with Web Search':
|
| 67 |
+
self.user_controls['Tavily API Key'] = st.text_input('Enter Tavily API Key for Web Search', type='password')
|
| 68 |
+
if not self.user_controls['Tavily API Key']:
|
| 69 |
+
st.warning('Tavily API key is required for web search functionality. Get one at https://tavily.com')
|
| 70 |
+
|
| 71 |
# Memory Management Section
|
| 72 |
st.divider()
|
| 73 |
st.subheader("💭 Memory Management")
|