README / DataIngester.py
jyotiguptahk's picture
Upload 3 files
9627de8
import os
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain.embeddings import CohereEmbeddings
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.llms import Cohere
from langchain.chains import RetrievalQA
from langchain import PromptTemplate
import streamlit as st
def ingest(file_path,embeddings):
loader = TextLoader(file_path)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000) #Splitting the text and creating chunks
docs = text_splitter.split_documents(documents)
persist_directory = file_path[:-4]
print('persist dict: ')
print(persist_directory)
vectordb = Chroma.from_documents(documents=docs,
embedding=embeddings,
persist_directory=persist_directory)
# persiste the db to disk
vectordb.persist()
vectordb = None
with st.sidebar:
with st.form('Cohere/OpenAI'):
mod = st.radio('Choose OpenAI/Cohere', ('OpenAI', 'Cohere'))
api_key = st.text_input('Enter API key', type="password")
# model = st.radio('Choose Company', ('ArtisanAppetite foods', 'BMW','Titan Watches'))
submitted = st.form_submit_button("Submit")
if api_key:
if(mod=='OpenAI'):
os.environ["OPENAI_API_KEY"] = api_key
llm = OpenAI(temperature=0.7, verbose=True)
embeddings = OpenAIEmbeddings()
elif(mod=='Cohere'):
os.environ["COHERE_API_KEY"] = api_key
llm = Cohere(temperature=0.7, verbose=True)
embeddings = CohereEmbeddings()
uploaded_file = st.file_uploader("Upload a file to ingest", type=["txt"])
if uploaded_file is not None:
file_contents = uploaded_file.read()
file_path = uploaded_file.name
# with open(save_path, "wb") as f:
# f.write(file_contents)
print(file_path)
ingest(file_path,embeddings)