|
|
|
|
|
import os |
|
|
from langchain.text_splitter import CharacterTextSplitter |
|
|
from langchain.document_loaders import TextLoader, DirectoryLoader |
|
|
from langchain.embeddings import CohereEmbeddings |
|
|
from langchain.embeddings import OpenAIEmbeddings |
|
|
from langchain.vectorstores import Chroma |
|
|
from langchain.llms import OpenAI |
|
|
from langchain.llms import Cohere |
|
|
from langchain.chains import RetrievalQA |
|
|
from langchain import PromptTemplate |
|
|
|
|
|
import streamlit as st |
|
|
|
|
|
def ingest(file_path,embeddings): |
|
|
loader = TextLoader(file_path) |
|
|
documents = loader.load() |
|
|
text_splitter = CharacterTextSplitter(chunk_size=1000) |
|
|
docs = text_splitter.split_documents(documents) |
|
|
|
|
|
persist_directory = file_path[:-4] |
|
|
print('persist dict: ') |
|
|
print(persist_directory) |
|
|
|
|
|
vectordb = Chroma.from_documents(documents=docs, |
|
|
embedding=embeddings, |
|
|
persist_directory=persist_directory) |
|
|
|
|
|
vectordb.persist() |
|
|
vectordb = None |
|
|
|
|
|
with st.sidebar: |
|
|
with st.form('Cohere/OpenAI'): |
|
|
mod = st.radio('Choose OpenAI/Cohere', ('OpenAI', 'Cohere')) |
|
|
api_key = st.text_input('Enter API key', type="password") |
|
|
|
|
|
submitted = st.form_submit_button("Submit") |
|
|
|
|
|
if api_key: |
|
|
if(mod=='OpenAI'): |
|
|
os.environ["OPENAI_API_KEY"] = api_key |
|
|
llm = OpenAI(temperature=0.7, verbose=True) |
|
|
embeddings = OpenAIEmbeddings() |
|
|
elif(mod=='Cohere'): |
|
|
os.environ["COHERE_API_KEY"] = api_key |
|
|
llm = Cohere(temperature=0.7, verbose=True) |
|
|
embeddings = CohereEmbeddings() |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a file to ingest", type=["txt"]) |
|
|
|
|
|
if uploaded_file is not None: |
|
|
file_contents = uploaded_file.read() |
|
|
file_path = uploaded_file.name |
|
|
|
|
|
|
|
|
print(file_path) |
|
|
ingest(file_path,embeddings) |