# # pip install streamlit langchain lanchain-openai beautifulsoup4 python-dotenv chromadb # import os # import streamlit as st # from langchain_core.messages import AIMessage, HumanMessage # from langchain_community.document_loaders import WebBaseLoader # from langchain.text_splitter import RecursiveCharacterTextSplitter # from langchain_community.vectorstores import Chroma # from langchain_openai import OpenAIEmbeddings, ChatOpenAI # #from dotenv import load_dotenv # from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # from langchain.chains import create_history_aware_retriever, create_retrieval_chain # from langchain.chains.combine_documents import create_stuff_documents_chain # from constants import openai_key # os.environ["OPENAI_API_KEY"]=openai_key # #load_dotenv() # def get_vectorstore_from_url(url): # # get the text in document form # loader = WebBaseLoader(url) # document = loader.load() # # split the document into chunks # text_splitter = RecursiveCharacterTextSplitter() # document_chunks = text_splitter.split_documents(document) # # create a vectorstore from the chunks # vector_store = Chroma.from_documents(document_chunks, OpenAIEmbeddings()) # return vector_store # def get_context_retriever_chain(vector_store): # llm = ChatOpenAI() # retriever = vector_store.as_retriever() # prompt = ChatPromptTemplate.from_messages([ # MessagesPlaceholder(variable_name="chat_history"), # ("user", "{input}"), # ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation") # ]) # retriever_chain = create_history_aware_retriever(llm, retriever, prompt) # return retriever_chain # def get_conversational_rag_chain(retriever_chain): # llm = ChatOpenAI() # prompt = ChatPromptTemplate.from_messages([ # ("system", "Answer the user's questions based on the below context:\n\n{context}"), # MessagesPlaceholder(variable_name="chat_history"), # ("user", "{input}"), # ]) # stuff_documents_chain = create_stuff_documents_chain(llm,prompt) # return create_retrieval_chain(retriever_chain, stuff_documents_chain) # def get_response(user_input): # retriever_chain = get_context_retriever_chain(st.session_state.vector_store) # conversation_rag_chain = get_conversational_rag_chain(retriever_chain) # response = conversation_rag_chain.invoke({ # "chat_history": st.session_state.chat_history, # "input": user_input # }) # return response['answer'] # # app config # st.set_page_config(page_title="Chat with websites", page_icon="🤖") # st.title("Chat with websites") # # sidebar # with st.sidebar: # st.header("Settings") # website_url = st.text_input("Website URL") # if website_url is None or website_url == "": # st.info("Please enter a website URL") # else: # # session state # if "chat_history" not in st.session_state: # st.session_state.chat_history = [ # AIMessage(content="Hello, I am a bot. How can I help you?"), # ] # if "vector_store" not in st.session_state: # st.session_state.vector_store = get_vectorstore_from_url(website_url) # # user input # user_query = st.chat_input("Type your message here...") # if user_query is not None and user_query != "": # response = get_response(user_query) # st.session_state.chat_history.append(HumanMessage(content=user_query)) # st.session_state.chat_history.append(AIMessage(content=response)) # # conversation # for message in st.session_state.chat_history: # if isinstance(message, AIMessage): # with st.chat_message("AI"): # st.write(message.content) # elif isinstance(message, HumanMessage): # with st.chat_message("Human"): # st.write(message.content) import os import streamlit as st from dotenv import load_dotenv from langchain_core.messages import AIMessage, HumanMessage from langchain_community.document_loaders import WebBaseLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.llms import HuggingFaceHub from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain.chains import create_history_aware_retriever, create_retrieval_chain from langchain.chains.combine_documents import create_stuff_documents_chain # Load environment variable (works locally and on Hugging Face Spaces) load_dotenv() HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "").strip() # Function to get vectorstore from a website def get_vectorstore_from_url(url): loader = WebBaseLoader(url) documents = loader.load() # Split into chunks text_splitter = RecursiveCharacterTextSplitter() chunks = text_splitter.split_documents(documents) # Embeddings using Hugging Face embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Vector store vector_store = Chroma.from_documents(chunks, embeddings) return vector_store # History-aware retriever def get_context_retriever_chain(vector_store): llm = HuggingFaceHub( repo_id="google/flan-t5-base", huggingfacehub_api_token=HF_TOKEN, model_kwargs={"temperature": 0.5, "max_length": 512} ) retriever = vector_store.as_retriever() prompt = ChatPromptTemplate.from_messages([ MessagesPlaceholder(variable_name="chat_history"), ("user", "{input}"), ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation") ]) return create_history_aware_retriever(llm, retriever, prompt) # Conversational chain def get_conversational_rag_chain(retriever_chain): # llm = HuggingFaceHub( # repo_id="google/flan-t5-base", # huggingfacehub_api_token=HF_TOKEN, # model_kwargs={"temperature": 0.5, "max_length": 512} # ) llm = HuggingFaceHub( repo_id="google/flan-t5-small", model_kwargs={"temperature": 0.5, "max_length": 512}, huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN", "").strip() ) prompt = ChatPromptTemplate.from_messages([ ("system", "Answer the user's questions based on the below context:\n\n{context}"), MessagesPlaceholder(variable_name="chat_history"), ("user", "{input}"), ]) stuff_documents_chain = create_stuff_documents_chain(llm, prompt) return create_retrieval_chain(retriever_chain, stuff_documents_chain) # Main response generator def get_response(user_input): retriever_chain = get_context_retriever_chain(st.session_state.vector_store) rag_chain = get_conversational_rag_chain(retriever_chain) response = rag_chain.invoke({ "chat_history": st.session_state.chat_history, "input": user_input }) return response['answer'] # Streamlit UI st.set_page_config(page_title="Chat with Websites", page_icon="🤖") st.title("🧠 Chat with Websites (Free Hugging Face Model)") with st.sidebar: st.header("🔧 Settings") website_url = st.text_input("🌐 Website URL") if not website_url: st.info("Please enter a website URL in the sidebar.") else: if "chat_history" not in st.session_state: st.session_state.chat_history = [AIMessage(content="Hello! Ask me anything about this website.")] if "vector_store" not in st.session_state: with st.spinner("🔍 Loading and indexing website..."): st.session_state.vector_store = get_vectorstore_from_url(website_url) user_input = st.chat_input("Type your message...") if user_input: response = get_response(user_input) st.session_state.chat_history.append(HumanMessage(content=user_input)) st.session_state.chat_history.append(AIMessage(content=response)) # Display chat messages for msg in st.session_state.chat_history: with st.chat_message("AI" if isinstance(msg, AIMessage) else "Human"): st.write(msg.content)