import streamlit as st import tempfile import os import shutil from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from langchain_community.document_loaders import WebBaseLoader from langchain.chains.question_answering import load_qa_chain # from langchain.llms import OpenAI from langchain_openai import ChatOpenAI # Streamlit UI st.title("🔍 Chat with Any Website") # User inputs openai_api_key = st.text_input("Enter OpenAI API Key", type="password") website_url = st.text_input("Enter Website URL") # Temporary directory to store FAISS index temp_dir = tempfile.gettempdir() faiss_db_path = os.path.join(temp_dir, "faiss_index_dir") # Ensure FAISS directory exists if not os.path.exists(faiss_db_path): os.makedirs(faiss_db_path) # Load embeddings if already created if os.path.exists(os.path.join(faiss_db_path, "index.faiss")): docsearch = FAISS.load_local(faiss_db_path, OpenAIEmbeddings(), allow_dangerous_deserialization=True) else: docsearch = None if st.button("Build Embeddings") and openai_api_key and website_url: st.info("Fetching website data...") os.environ['OPENAI_API_KEY'] = openai_api_key # Load website data loader = WebBaseLoader(website_url) raw_text = loader.load() # Chunking the fetched text text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=50) docs = text_splitter.split_documents(raw_text) # Creating embeddings embeddings = OpenAIEmbeddings() docsearch = FAISS.from_documents(docs, embeddings) # Save FAISS index if os.path.exists(faiss_db_path): shutil.rmtree(faiss_db_path) os.makedirs(faiss_db_path) docsearch.save_local(faiss_db_path) st.success("Embeddings built and saved successfully!") # Chat section if docsearch: st.subheader("💬 Chat with Website") user_query = st.text_input("Enter your question") if st.button("Get Answer") and user_query: # chain = load_qa_chain(OpenAI(), chain_type="stuff") chain = load_qa_chain(ChatOpenAI(model="gpt-4o"), chain_type="stuff") docs = docsearch.similarity_search(user_query) response = chain.run(input_documents=docs, question=user_query) st.write("**Response:**", response)