import os from llama_index import SimpleDirectoryReader from llama_index.node_parser import SimpleNodeParser from llama_index.data_structs.node import Node, DocumentRelationship from llama_index import VectorStoreIndex from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext from langchain.llms import AzureOpenAI from langchain.embeddings.openai import OpenAIEmbeddings from llama_index import LangchainEmbedding, ServiceContext from llama_index import StorageContext, load_index_from_storage import logging import sys logging.basicConfig( stream=sys.stdout, level=logging.DEBUG ) # logging.DEBUG for more verbose output logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) def main() -> None: documents = SimpleDirectoryReader("./data").load_data() # index = VectorStoreIndex.from_documents(documents) # parser = SimpleNodeParser() # nodes = parser.get_nodes_from_documents(documents) # index = VectorStoreIndex(nodes) # define embedding embedding = LangchainEmbedding(OpenAIEmbeddings(client=None, chunk_size=1)) # define LLM llm_predictor = LLMPredictor( llm=AzureOpenAI( client=None, deployment_name="text-davinci-003", model="text-davinci-003", ) ) # configure service context service_context = ServiceContext.from_defaults( llm_predictor=llm_predictor, embed_model=embedding ) # build index index = VectorStoreIndex.from_documents( documents, service_context=service_context, ) index.storage_context.persist(persist_dir="./dataset") storage_context = StorageContext.from_defaults(persist_dir="./dataset") index = load_index_from_storage( storage_context=storage_context, service_context=service_context ) # index.vector_store.persist("./dataset") # query with embed_model specified query_engine = index.as_query_engine( retriever_mode="embedding", verbose=True, service_context=service_context ) response = query_engine.query("请帮忙推荐一杯咖啡给我,我喜欢咖啡因") print(response) if __name__ == "__main__": main()