File size: 3,662 Bytes
e2df733 721a6f5 7f80f00 721a6f5 988f08d e77ad25 721a6f5 e77ad25 e2df733 721a6f5 baba35c e2df733 721a6f5 e2df733 721a6f5 c17b205 721a6f5 d428be6 721a6f5 c17b205 063a120 721a6f5 b75338e c17b205 721a6f5 c17b205 721a6f5 c17b205 721a6f5 c17b205 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
import textwrap
from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
from llama_index import VectorStoreIndex, SimpleDirectoryReader, Document
from llama_index.vector_stores import DeepLakeVectorStore
from langchain.chat_models import ChatOpenAI
import chainlit as cl
import os
os.environ["OPENAI_API_KEY"]= os.environ.get("open_ai")
from llama_index import SimpleDirectoryReader, Document, StorageContext, OpenAIEmbedding, ServiceContext, PromptHelper, VectorStoreIndex
from llama_index.vector_stores import PineconeVectorStore, QdrantVectorStore, SimpleVectorStore, DeepLakeVectorStore
from transformers import BertTokenizerFast
import openai
from llama_index.llms import OpenAI
from llama_index import ServiceContext
from llama_index.embeddings import OpenAIEmbedding
from llama_index.node_parser import SimpleNodeParser
from llama_index.text_splitter import TokenTextSplitter
from llama_index import StorageContext, load_index_from_storage
from llama_index import load_index_from_storage, load_indices_from_storage, load_graph_from_storage
#dataset_path ="hub://cxcxxaaaaaz/text_embedding" # if we comment this out and don't pass the path then GPTDeepLakeIndex will create dataset in memory
from llama_index.storage.storage_context import StorageContext
from llama_index import VectorStoreIndex, SimpleDirectoryReader, Document
from llama_index.vector_stores import ChromaVectorStore
# Create an index over the documnts
#vector_store = DeepLakeVectorStore(dataset_path=dataset_path
import chromadb
db2 = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db2.get_or_create_collection("vector")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
#vector_store = LanceDBVectorStore1(uri="./sample_data/")
#storage_context = StorageContext.from_defaults(vector_store=vector_store)
llm = OpenAI(model='gpt-3.5-turbo', temperature=0.1)
embed_model = OpenAIEmbedding()
#node_parser = SimpleNodeParser(text_splitter=TokenTextSplitter(chunk_size=2924, chunk_overlap=20))
prompt_helper = PromptHelper(
context_window=2000,
num_output=256,
chunk_overlap_ratio=0.1,
chunk_size_limit=200
)
import tiktoken
from llama_index.callbacks import CallbackManager, TokenCountingHandler
from llama_index import load_index_from_storage, load_indices_from_storage, load_graph_from_storage
token_counter = TokenCountingHandler(tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
callback_manager = CallbackManager([token_counter])
service_context = ServiceContext.from_defaults(
llm=llm,
embed_model=embed_model
)
from llama_index import set_global_service_context
index = VectorStoreIndex.from_documents([], vectorstore=vector_store, storage_context=storage_context, service_context=service_context)
@cl.on_chat_start
async def factory():
# Substitute your connection string here
query_engine = index.as_query_engine(
service_context=service_context,
streaming=True,
)
cl.user_session.set("query_engine", query_engine)
@cl.on_message
async def main(message: cl.Message):
query_engine = cl.user_session.get("query_engine") # type: RetrieverQueryEngine
response = await cl.make_async(query_engine.query)(message.content)
response_message = cl.Message(content="")
for token in response.response_gen:
await response_message.stream_token(token=token)
if response.response_txt:
response_message.content = response.response_txt
await response_message.send()
|