|
|
import os |
|
|
import textwrap |
|
|
|
|
|
from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine |
|
|
|
|
|
|
|
|
from llama_index import VectorStoreIndex, SimpleDirectoryReader, Document |
|
|
|
|
|
|
|
|
from llama_index.vector_stores import DeepLakeVectorStore |
|
|
from langchain.chat_models import ChatOpenAI |
|
|
import chainlit as cl |
|
|
|
|
|
|
|
|
import os |
|
|
os.environ["OPENAI_API_KEY"]= os.environ.get("open_ai") |
|
|
|
|
|
|
|
|
|
|
|
from llama_index import SimpleDirectoryReader, Document, StorageContext, OpenAIEmbedding, ServiceContext, PromptHelper, VectorStoreIndex |
|
|
from llama_index.vector_stores import PineconeVectorStore, QdrantVectorStore, SimpleVectorStore, DeepLakeVectorStore |
|
|
from transformers import BertTokenizerFast |
|
|
import openai |
|
|
from llama_index.llms import OpenAI |
|
|
from llama_index import ServiceContext |
|
|
from llama_index.embeddings import OpenAIEmbedding |
|
|
from llama_index.node_parser import SimpleNodeParser |
|
|
from llama_index.text_splitter import TokenTextSplitter |
|
|
|
|
|
from llama_index import StorageContext, load_index_from_storage |
|
|
from llama_index import load_index_from_storage, load_indices_from_storage, load_graph_from_storage |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from llama_index.storage.storage_context import StorageContext |
|
|
from llama_index import VectorStoreIndex, SimpleDirectoryReader, Document |
|
|
from llama_index.vector_stores import ChromaVectorStore |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import chromadb |
|
|
|
|
|
db2 = chromadb.PersistentClient(path="./chroma_db") |
|
|
chroma_collection = db2.get_or_create_collection("vector") |
|
|
|
|
|
|
|
|
|
|
|
vector_store = ChromaVectorStore(chroma_collection=chroma_collection) |
|
|
storage_context = StorageContext.from_defaults(vector_store=vector_store) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm = OpenAI(model='gpt-3.5-turbo', temperature=0.1) |
|
|
embed_model = OpenAIEmbedding() |
|
|
|
|
|
|
|
|
|
|
|
prompt_helper = PromptHelper( |
|
|
context_window=2000, |
|
|
num_output=256, |
|
|
chunk_overlap_ratio=0.1, |
|
|
chunk_size_limit=200 |
|
|
) |
|
|
|
|
|
import tiktoken |
|
|
from llama_index.callbacks import CallbackManager, TokenCountingHandler |
|
|
from llama_index import load_index_from_storage, load_indices_from_storage, load_graph_from_storage |
|
|
|
|
|
token_counter = TokenCountingHandler(tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode) |
|
|
|
|
|
callback_manager = CallbackManager([token_counter]) |
|
|
|
|
|
|
|
|
service_context = ServiceContext.from_defaults( |
|
|
llm=llm, |
|
|
embed_model=embed_model |
|
|
) |
|
|
from llama_index import set_global_service_context |
|
|
|
|
|
|
|
|
|
|
|
index = VectorStoreIndex.from_documents([], vectorstore=vector_store, storage_context=storage_context, service_context=service_context) |
|
|
|
|
|
|
|
|
|
|
|
@cl.on_chat_start |
|
|
async def factory(): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
query_engine = index.as_query_engine( |
|
|
service_context=service_context, |
|
|
streaming=True, |
|
|
) |
|
|
|
|
|
|
|
|
cl.user_session.set("query_engine", query_engine) |
|
|
|
|
|
|
|
|
|
|
|
@cl.on_message |
|
|
async def main(message: cl.Message): |
|
|
query_engine = cl.user_session.get("query_engine") |
|
|
response = await cl.make_async(query_engine.query)(message.content) |
|
|
|
|
|
response_message = cl.Message(content="") |
|
|
|
|
|
for token in response.response_gen: |
|
|
await response_message.stream_token(token=token) |
|
|
|
|
|
if response.response_txt: |
|
|
response_message.content = response.response_txt |
|
|
|
|
|
await response_message.send() |
|
|
|
|
|
|