File size: 8,348 Bytes
de878c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15a4c90
 
68efd10
15a4c90
 
 
 
de878c1
 
15a4c90
 
 
 
68efd10
de878c1
68efd10
15a4c90
de878c1
15a4c90
 
de878c1
 
 
15a4c90
de878c1
 
 
 
 
 
 
15a4c90
 
 
de878c1
15a4c90
de878c1
 
68efd10
de878c1
 
 
15a4c90
de878c1
15a4c90
de878c1
 
 
15a4c90
de878c1
 
 
 
 
bc57c6d
 
 
 
 
de878c1
bc57c6d
 
 
 
 
de878c1
15a4c90
de878c1
 
 
15a4c90
de878c1
 
 
15a4c90
 
de878c1
15a4c90
 
de878c1
 
 
15a4c90
 
 
de878c1
15a4c90
 
de878c1
 
 
15a4c90
 
de878c1
 
15a4c90
de878c1
 
15a4c90
 
de878c1
15a4c90
de878c1
 
15a4c90
de878c1
 
 
 
15a4c90
 
68efd10
de878c1
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# # pip install streamlit langchain lanchain-openai beautifulsoup4 python-dotenv chromadb
# import os
# import streamlit as st
# from langchain_core.messages import AIMessage, HumanMessage
# from langchain_community.document_loaders import WebBaseLoader
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_community.vectorstores import Chroma
# from langchain_openai import OpenAIEmbeddings, ChatOpenAI
# #from dotenv import load_dotenv
# from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
# from langchain.chains import create_history_aware_retriever, create_retrieval_chain
# from langchain.chains.combine_documents import create_stuff_documents_chain
# from constants import openai_key

# os.environ["OPENAI_API_KEY"]=openai_key


# #load_dotenv()

# def get_vectorstore_from_url(url):
#     # get the text in document form
#     loader = WebBaseLoader(url)
#     document = loader.load()
    
#     # split the document into chunks
#     text_splitter = RecursiveCharacterTextSplitter()
#     document_chunks = text_splitter.split_documents(document)
    
#     # create a vectorstore from the chunks
#     vector_store = Chroma.from_documents(document_chunks, OpenAIEmbeddings())

#     return vector_store

# def get_context_retriever_chain(vector_store):
#     llm = ChatOpenAI()
    
#     retriever = vector_store.as_retriever()
    
#     prompt = ChatPromptTemplate.from_messages([
#       MessagesPlaceholder(variable_name="chat_history"),
#       ("user", "{input}"),
#       ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
#     ])
    
#     retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
    
#     return retriever_chain
    
# def get_conversational_rag_chain(retriever_chain): 
    
#     llm = ChatOpenAI()
    
#     prompt = ChatPromptTemplate.from_messages([
#       ("system", "Answer the user's questions based on the below context:\n\n{context}"),
#       MessagesPlaceholder(variable_name="chat_history"),
#       ("user", "{input}"),
#     ])
    
#     stuff_documents_chain = create_stuff_documents_chain(llm,prompt)
    
#     return create_retrieval_chain(retriever_chain, stuff_documents_chain)

# def get_response(user_input):
#     retriever_chain = get_context_retriever_chain(st.session_state.vector_store)
#     conversation_rag_chain = get_conversational_rag_chain(retriever_chain)
    
#     response = conversation_rag_chain.invoke({
#         "chat_history": st.session_state.chat_history,
#         "input": user_input
#     })
    
#     return response['answer']

# # app config
# st.set_page_config(page_title="Chat with websites", page_icon="πŸ€–")
# st.title("Chat with websites")

# # sidebar
# with st.sidebar:
#     st.header("Settings")
#     website_url = st.text_input("Website URL")

# if website_url is None or website_url == "":
#     st.info("Please enter a website URL")

# else:
#     # session state
#     if "chat_history" not in st.session_state:
#         st.session_state.chat_history = [
#             AIMessage(content="Hello, I am a bot. How can I help you?"),
#         ]
#     if "vector_store" not in st.session_state:
#         st.session_state.vector_store = get_vectorstore_from_url(website_url)    

#     # user input
#     user_query = st.chat_input("Type your message here...")
#     if user_query is not None and user_query != "":
#         response = get_response(user_query)
#         st.session_state.chat_history.append(HumanMessage(content=user_query))
#         st.session_state.chat_history.append(AIMessage(content=response))
        
       

#     # conversation
#     for message in st.session_state.chat_history:
#         if isinstance(message, AIMessage):
#             with st.chat_message("AI"):
#                 st.write(message.content)
#         elif isinstance(message, HumanMessage):
#             with st.chat_message("Human"):
#                 st.write(message.content)

import os
import streamlit as st
from dotenv import load_dotenv
from langchain_core.messages import AIMessage, HumanMessage
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFaceHub
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# Load environment variable (works locally and on Hugging Face Spaces)
load_dotenv()
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "").strip()

# Function to get vectorstore from a website
def get_vectorstore_from_url(url):
    loader = WebBaseLoader(url)
    documents = loader.load()

    # Split into chunks
    text_splitter = RecursiveCharacterTextSplitter()
    chunks = text_splitter.split_documents(documents)

    # Embeddings using Hugging Face
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    # Vector store
    vector_store = Chroma.from_documents(chunks, embeddings)

    return vector_store

# History-aware retriever
def get_context_retriever_chain(vector_store):
    llm = HuggingFaceHub(
        repo_id="google/flan-t5-base",
        huggingfacehub_api_token=HF_TOKEN,
        model_kwargs={"temperature": 0.5, "max_length": 512}
    )

    retriever = vector_store.as_retriever()

    prompt = ChatPromptTemplate.from_messages([
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
    ])

    return create_history_aware_retriever(llm, retriever, prompt)

# Conversational chain
def get_conversational_rag_chain(retriever_chain):
    # llm = HuggingFaceHub(
    #     repo_id="google/flan-t5-base",
    #     huggingfacehub_api_token=HF_TOKEN,
    #     model_kwargs={"temperature": 0.5, "max_length": 512}
    # )
    llm = HuggingFaceHub(
    repo_id="google/flan-t5-small",
    model_kwargs={"temperature": 0.5, "max_length": 512},
    huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN", "").strip()
)


    prompt = ChatPromptTemplate.from_messages([
        ("system", "Answer the user's questions based on the below context:\n\n{context}"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
    ])

    stuff_documents_chain = create_stuff_documents_chain(llm, prompt)

    return create_retrieval_chain(retriever_chain, stuff_documents_chain)

# Main response generator
def get_response(user_input):
    retriever_chain = get_context_retriever_chain(st.session_state.vector_store)
    rag_chain = get_conversational_rag_chain(retriever_chain)

    response = rag_chain.invoke({
        "chat_history": st.session_state.chat_history,
        "input": user_input
    })

    return response['answer']

# Streamlit UI
st.set_page_config(page_title="Chat with Websites", page_icon="πŸ€–")
st.title("🧠 Chat with Websites (Free Hugging Face Model)")

with st.sidebar:
    st.header("πŸ”§ Settings")
    website_url = st.text_input("🌐 Website URL")

if not website_url:
    st.info("Please enter a website URL in the sidebar.")
else:
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = [AIMessage(content="Hello! Ask me anything about this website.")]
    if "vector_store" not in st.session_state:
        with st.spinner("πŸ” Loading and indexing website..."):
            st.session_state.vector_store = get_vectorstore_from_url(website_url)

    user_input = st.chat_input("Type your message...")
    if user_input:
        response = get_response(user_input)
        st.session_state.chat_history.append(HumanMessage(content=user_input))
        st.session_state.chat_history.append(AIMessage(content=response))

    # Display chat messages
    for msg in st.session_state.chat_history:
        with st.chat_message("AI" if isinstance(msg, AIMessage) else "Human"):
            st.write(msg.content)