File size: 2,051 Bytes
9627de8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57

import os
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain.embeddings import CohereEmbeddings
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.llms import Cohere
from langchain.chains import RetrievalQA
from langchain import PromptTemplate

import streamlit as st

def ingest(file_path,embeddings):
    loader = TextLoader(file_path)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000) #Splitting the text and creating chunks
    docs = text_splitter.split_documents(documents)

    persist_directory = file_path[:-4]
    print('persist dict: ')
    print(persist_directory)

    vectordb = Chroma.from_documents(documents=docs,
                                 embedding=embeddings,
                                 persist_directory=persist_directory)
    # persiste the db to disk
    vectordb.persist()
    vectordb = None

with st.sidebar:
    with st.form('Cohere/OpenAI'):
        mod = st.radio('Choose OpenAI/Cohere', ('OpenAI', 'Cohere'))
        api_key = st.text_input('Enter API key', type="password")
        # model = st.radio('Choose Company', ('ArtisanAppetite foods', 'BMW','Titan Watches'))
        submitted = st.form_submit_button("Submit")

if api_key:
    if(mod=='OpenAI'):
        os.environ["OPENAI_API_KEY"] = api_key
        llm = OpenAI(temperature=0.7, verbose=True)
        embeddings = OpenAIEmbeddings()
    elif(mod=='Cohere'):
        os.environ["COHERE_API_KEY"] = api_key
        llm = Cohere(temperature=0.7, verbose=True)
        embeddings = CohereEmbeddings()

uploaded_file = st.file_uploader("Upload a file to ingest", type=["txt"])

if uploaded_file is not None:
        file_contents = uploaded_file.read()
        file_path = uploaded_file.name
      #  with open(save_path, "wb") as f:
      #      f.write(file_contents)
        print(file_path)
        ingest(file_path,embeddings)