Spaces:
Sleeping
Sleeping
| import qdrant_client | |
| from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader | |
| from llama_index.core import load_index_from_storage | |
| from llama_index.llms.ollama import Ollama | |
| from llama_index.llms.llama_cpp import LlamaCPP | |
| from llama_index.llms.llama_cpp.llama_utils import messages_to_prompt, completion_to_prompt | |
| from llama_index.core import StorageContext | |
| from llama_index.vector_stores.qdrant import QdrantVectorStore | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| from llama_index.core import Settings | |
| from llama_index.core import set_global_service_context | |
| import gradio as gr | |
| DOC_PATH = './data/pdf_esg' | |
| INDEX_PATH = './storage' | |
| llm = LlamaCPP( | |
| # You can pass in the URL to a GGML model to download it automatically | |
| # model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf', | |
| model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf', | |
| # optionally, you can set the path to a pre-downloaded model instead of model_url | |
| model_path=None, | |
| temperature=0.1, | |
| max_new_tokens=256, | |
| # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room | |
| context_window=4096, | |
| # kwargs to pass to __call__() | |
| generate_kwargs={}, | |
| # kwargs to pass to __init__() | |
| # set to at least 1 to use GPU | |
| model_kwargs={"n_gpu_layers": -1}, | |
| # transform inputs into Llama2 format | |
| messages_to_prompt=messages_to_prompt, | |
| completion_to_prompt=completion_to_prompt, | |
| verbose=True, | |
| ) | |
| # Settings.llm = Ollama(model="mistral") | |
| Settings.llm = llm | |
| Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") | |
| embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") | |
| service_context = ServiceContext.from_defaults(llm=llm,embed_model = embed_model) | |
| set_global_service_context(service_context) | |
| def construct_index(doc_path=DOC_PATH, index_store=INDEX_PATH, use_cache=False): | |
| client = qdrant_client.QdrantClient(path="./qdrant_data") | |
| vector_store = QdrantVectorStore(client=client, collection_name="esg") | |
| storage_context = StorageContext.from_defaults(vector_store=vector_store) | |
| if use_cache: | |
| # rebuild storage context | |
| storage_context = StorageContext.from_defaults(persist_dir=index_store) | |
| index = load_index_from_storage(storage_context) # load index | |
| else: | |
| reader = SimpleDirectoryReader(input_dir='./data/pdf_esg') | |
| documents = reader.load_data() | |
| index = VectorStoreIndex.from_documents(documents) | |
| index.storage_context.persist(index_store) | |
| return None | |
| def qabot(input_text, index_store = INDEX_PATH): | |
| storage_context = StorageContext.from_defaults(persist_dir=index_store) | |
| # Load the data | |
| index = load_index_from_storage(storage_context) | |
| query_engine = index.as_query_engine() | |
| response = query_engine.query(input_text) | |
| return response.response | |
| if __name__ == "__main__": | |
| # construct_index(DOC_PATH, use_cache=False) | |
| # create_index_retriever_query_engine() | |
| iface = gr.Interface(fn=qabot, inputs=gr.Textbox(lines=7, label='Enter your query'), | |
| outputs="text", | |
| title="ESG Chatbot") | |
| iface.launch(inline=False) |