import os from langchain.text_splitter import CharacterTextSplitter from langchain.document_loaders import TextLoader, DirectoryLoader from langchain.embeddings import CohereEmbeddings from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.llms import OpenAI from langchain.llms import Cohere from langchain.chains import RetrievalQA from langchain import PromptTemplate import streamlit as st def ingest(file_path,embeddings): loader = TextLoader(file_path) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000) #Splitting the text and creating chunks docs = text_splitter.split_documents(documents) persist_directory = file_path[:-4] print('persist dict: ') print(persist_directory) vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory=persist_directory) # persiste the db to disk vectordb.persist() vectordb = None with st.sidebar: with st.form('Cohere/OpenAI'): mod = st.radio('Choose OpenAI/Cohere', ('OpenAI', 'Cohere')) api_key = st.text_input('Enter API key', type="password") # model = st.radio('Choose Company', ('ArtisanAppetite foods', 'BMW','Titan Watches')) submitted = st.form_submit_button("Submit") if api_key: if(mod=='OpenAI'): os.environ["OPENAI_API_KEY"] = api_key llm = OpenAI(temperature=0.7, verbose=True) embeddings = OpenAIEmbeddings() elif(mod=='Cohere'): os.environ["COHERE_API_KEY"] = api_key llm = Cohere(temperature=0.7, verbose=True) embeddings = CohereEmbeddings() uploaded_file = st.file_uploader("Upload a file to ingest", type=["txt"]) if uploaded_file is not None: file_contents = uploaded_file.read() file_path = uploaded_file.name # with open(save_path, "wb") as f: # f.write(file_contents) print(file_path) ingest(file_path,embeddings)