import os import pinecone import openai from langchain.embeddings.openai import OpenAIEmbeddings # from langchain.text_splitter import CharacterTextSplitter # from langchain.document_loaders import PyPDFLoader from langchain.vectorstores import Pinecone from langchain.chains import ConversationalRetrievalChain from langchain.chat_models import ChatOpenAI import streamlit as st def _initialize_env(): pinecone.init( api_key=st.secrets["pinecone_api_key"], environment=st.secrets["pinecone_env"] ) openai.api_key = st.secrets["openai_api_key"] os.environ['OPENAI_API_KEY'] = st.secrets["openai_api_key"] def _initialize_indexes(): embeddings = OpenAIEmbeddings() # loader = PyPDFLoader("./etf-book.pdf") # documents = loader.load() # text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) # docs = text_splitter.split_documents(documents) # db = Pinecone.from_documents( # docs, # embeddings, # index_name=st.secrets["pinecone_table_name"] # ) db_object = Pinecone.from_existing_index( st.secrets["pinecone_table_name"], embeddings ) return db_object def _initialize_retriever(db_object): retriever = db_object.as_retriever(search_type="similarity", search_kwargs={"k": 2}) qa = ConversationalRetrievalChain.from_llm( ChatOpenAI(temperature=0), retriever ) return qa chat_history = [] def answer(user_input): if user_input == '': return global chat_history result = qa({ "question": user_input, "chat_history": chat_history }) chat_history.append((user_input, result["answer"])) chat_history = chat_history[-10:] st.write( "Bot: ", result["answer"] ) _initialize_env() db = _initialize_indexes() qa = _initialize_retriever(db) question = st.text_input('Question') answer(question)