Spaces:
Paused
Paused
Commit
·
ecd09c2
1
Parent(s):
546ad48
init
Browse files- app.py +39 -62
- generate_response.py +85 -0
- img/logo-em.jpg +0 -0
- img/logo.png +0 -0
- requirements.txt +10 -1
- storage_mx/default__vector_store.json +0 -0
- storage_mx/docstore.json +0 -0
- storage_mx/graph_store.json +1 -0
- storage_mx/image__vector_store.json +1 -0
- storage_mx/index_store.json +1 -0
- vectorize.py +28 -0
app.py
CHANGED
|
@@ -1,64 +1,41 @@
|
|
| 1 |
-
import
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"""
|
| 5 |
-
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
| 6 |
-
"""
|
| 7 |
-
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
def respond(
|
| 11 |
-
message,
|
| 12 |
-
history: list[tuple[str, str]],
|
| 13 |
-
system_message,
|
| 14 |
-
max_tokens,
|
| 15 |
-
temperature,
|
| 16 |
-
top_p,
|
| 17 |
-
):
|
| 18 |
-
messages = [{"role": "system", "content": system_message}]
|
| 19 |
-
|
| 20 |
-
for val in history:
|
| 21 |
-
if val[0]:
|
| 22 |
-
messages.append({"role": "user", "content": val[0]})
|
| 23 |
-
if val[1]:
|
| 24 |
-
messages.append({"role": "assistant", "content": val[1]})
|
| 25 |
-
|
| 26 |
-
messages.append({"role": "user", "content": message})
|
| 27 |
-
|
| 28 |
-
response = ""
|
| 29 |
-
|
| 30 |
-
for message in client.chat_completion(
|
| 31 |
-
messages,
|
| 32 |
-
max_tokens=max_tokens,
|
| 33 |
-
stream=True,
|
| 34 |
-
temperature=temperature,
|
| 35 |
-
top_p=top_p,
|
| 36 |
-
):
|
| 37 |
-
token = message.choices[0].delta.content
|
| 38 |
-
|
| 39 |
-
response += token
|
| 40 |
-
yield response
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
"""
|
| 44 |
-
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
| 45 |
-
"""
|
| 46 |
-
demo = gr.ChatInterface(
|
| 47 |
-
respond,
|
| 48 |
-
additional_inputs=[
|
| 49 |
-
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
| 50 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 51 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 52 |
-
gr.Slider(
|
| 53 |
-
minimum=0.1,
|
| 54 |
-
maximum=1.0,
|
| 55 |
-
value=0.95,
|
| 56 |
-
step=0.05,
|
| 57 |
-
label="Top-p (nucleus sampling)",
|
| 58 |
-
),
|
| 59 |
-
],
|
| 60 |
-
)
|
| 61 |
|
|
|
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
import gradio as gr
|
| 5 |
|
| 6 |
+
from generate_response import set_llm, generate_chat_response
|
| 7 |
+
from vectorize import load_write_index_nodes
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger("agent_logger")
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# idk who is passing two arguments to chat when I am not using the history anywhere and i don't care to find out so
|
| 13 |
+
# don't remove this argument please
|
| 14 |
+
def chat(message, history):
|
| 15 |
+
index, nodes = load_write_index_nodes(path='./storage_mx')
|
| 16 |
+
for response_text in generate_chat_response(index, message):
|
| 17 |
+
yield response_text
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def run():
|
| 21 |
+
api_key = os.getenv('mx_api_key')
|
| 22 |
+
set_llm(key=api_key, model="mistral-large-latest", temperature=0)
|
| 23 |
+
|
| 24 |
+
logger.info("Launching Gradio ChatInterface...")
|
| 25 |
+
examples = ["How do I put my newborn in a cot?", "Hoe slaap ik in een lawaaiige buurt?",
|
| 26 |
+
"Yeni anne olarak uyku kalitemi nasıl artırabilirim?"]
|
| 27 |
+
description = ('''
|
| 28 |
+
<div style="text-align: center;">
|
| 29 |
+
<span style="white-space: nowrap; display: inline-block;">
|
| 30 |
+
<img src="./img/logo.png" alt="TU Delft Logo" width="200" style="display: inline; vertical-align: middle;"/>
|
| 31 |
+
<img src="./img/logo-em.jpg" alt="Erasmus MC Logo" width="200" style="display: inline; vertical-align: middle;"/>
|
| 32 |
+
</span><br/>
|
| 33 |
+
Welcome to Our Smart Family Buddy. This is <span style="font-weight: bold;">SleepBot</span>. SleepBot can answer your questions about you,
|
| 34 |
+
and your family's, sleep health. For more family support see: <a href="https://cjgrijnmond.nl/">cjgrijnmond.nl</a>. For medical
|
| 35 |
+
questions see: <a href="https://www.thuisarts.nl/">thuisarts.nl</a?.
|
| 36 |
+
</div>
|
| 37 |
+
''')
|
| 38 |
+
demo = gr.ChatInterface(fn=chat, type="messages", title="SleepBot", description=description, examples=examples,
|
| 39 |
+
show_progress="full", theme="soft")
|
| 40 |
+
demo.queue().launch()
|
| 41 |
+
# demo.queue().launch(auth=('sleepbot', 'testing24'))
|
generate_response.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
|
| 3 |
+
from llama_index.core import Settings
|
| 4 |
+
from llama_index.core.base.embeddings.base import BaseEmbedding
|
| 5 |
+
from llama_index.core.base.llms.base import BaseLLM
|
| 6 |
+
from llama_index.core.chat_engine import CondensePlusContextChatEngine
|
| 7 |
+
from llama_index.core.memory import ChatMemoryBuffer
|
| 8 |
+
from llama_index.embeddings.mistralai import MistralAIEmbedding
|
| 9 |
+
from llama_index.embeddings.openai import OpenAIEmbedding
|
| 10 |
+
from llama_index.llms.mistralai import MistralAI
|
| 11 |
+
from llama_index.llms.openai import OpenAI
|
| 12 |
+
|
| 13 |
+
llm: BaseLLM
|
| 14 |
+
embed_model: BaseEmbedding
|
| 15 |
+
logger = logging.getLogger("agent_logger")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def set_llm(model, key, temperature):
|
| 19 |
+
global llm
|
| 20 |
+
global embed_model
|
| 21 |
+
|
| 22 |
+
logger.info(f'Setting up LLM with {model} and associated embedding model...')
|
| 23 |
+
|
| 24 |
+
if "gpt" in model:
|
| 25 |
+
llm = OpenAI(api_key=key, temperature=temperature, model=model)
|
| 26 |
+
embed_model = OpenAIEmbedding(api_key=key)
|
| 27 |
+
elif "mistral" in model:
|
| 28 |
+
llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
|
| 29 |
+
embed_model = MistralAIEmbedding(api_key=key)
|
| 30 |
+
else:
|
| 31 |
+
# Default model
|
| 32 |
+
llm = OpenAI(api_key=key, model="gpt-3.5-turbo", temperature=0)
|
| 33 |
+
embed_model = OpenAIEmbedding(api_key=key)
|
| 34 |
+
|
| 35 |
+
Settings.llm = llm
|
| 36 |
+
Settings.embed_model = embed_model
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def get_llm():
|
| 40 |
+
return llm
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def generate_chat_response(index, message):
|
| 44 |
+
logger.info("Generating chat response with history and rag...")
|
| 45 |
+
string_output = ""
|
| 46 |
+
|
| 47 |
+
memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
|
| 48 |
+
|
| 49 |
+
logger.info("Creating chat engine with index and memory...")
|
| 50 |
+
chat_engine = CondensePlusContextChatEngine.from_defaults(
|
| 51 |
+
index.as_retriever(),
|
| 52 |
+
memory=memory,
|
| 53 |
+
llm=llm,
|
| 54 |
+
context_prompt=(
|
| 55 |
+
"You are a chatbot designed to help families improve their sleep health. Do not respond to any off-topic"
|
| 56 |
+
"queries but direct users back to the topic of sleep health. Your responses should be in the same"
|
| 57 |
+
"language as the user message. Responses should be at B1 fluency level.\n For information retrieval "
|
| 58 |
+
"questions, use this Output Format:\n"
|
| 59 |
+
"2 to 3 lines high level summary of the response as a whole. \n"
|
| 60 |
+
"Inside an HTML Read More element: Further details and breakdown of the response step by step\n. "
|
| 61 |
+
"Back outside of the Read More, the name of the documents from which the response was generated. "
|
| 62 |
+
"Then 'Learn More About: [suggested follow up question 1], [suggested follow up question 2], "
|
| 63 |
+
"or [suggested follow up question 3]'.\n"
|
| 64 |
+
"Here are the relevant documents for the context:\n"
|
| 65 |
+
"{context_str}"
|
| 66 |
+
"\nInstruction: Address the user query using the previous chat history and the context above, "
|
| 67 |
+
"or ask a follow up question to gain more relevant context then answer the initial query given the new "
|
| 68 |
+
"information."
|
| 69 |
+
),
|
| 70 |
+
verbose=True,
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
response = chat_engine.stream_chat(message)
|
| 74 |
+
|
| 75 |
+
for node in response.source_nodes:
|
| 76 |
+
# todo how to go from node id to document name?
|
| 77 |
+
print(f"Fetched node {node} in search.")
|
| 78 |
+
|
| 79 |
+
response_text = []
|
| 80 |
+
for text in response.response_gen:
|
| 81 |
+
response_text.append(text)
|
| 82 |
+
string_output = ''.join(response_text)
|
| 83 |
+
yield string_output
|
| 84 |
+
|
| 85 |
+
logger.info(f'Assistant response: {string_output}')
|
img/logo-em.jpg
ADDED
|
img/logo.png
ADDED
|
requirements.txt
CHANGED
|
@@ -1 +1,10 @@
|
|
| 1 |
-
huggingface_hub==0.25.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
huggingface_hub==0.25.2
|
| 2 |
+
gradio==5.5.0
|
| 3 |
+
llama-index==0.11.21
|
| 4 |
+
llama-index-core==0.11.21
|
| 5 |
+
llama-index-llms-mistralai==0.2.7
|
| 6 |
+
llama-index-embeddings-mistralai
|
| 7 |
+
llama-index-embeddings-openai
|
| 8 |
+
llama-index-llms-openai==0.2.16
|
| 9 |
+
# needed for simpledirectoryreader to work
|
| 10 |
+
llama-index-readers-file
|
storage_mx/default__vector_store.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
storage_mx/docstore.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
storage_mx/graph_store.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"graph_dict": {}}
|
storage_mx/image__vector_store.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
|
storage_mx/index_store.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"index_store/data": {"7a40a07d-bec2-480f-b26a-562b4cb73513": {"__type__": "vector_store", "__data__": "{\"index_id\": \"7a40a07d-bec2-480f-b26a-562b4cb73513\", \"summary\": null, \"nodes_dict\": {\"e51048fa-fa37-4f3d-a9f3-2bda207ea035\": \"e51048fa-fa37-4f3d-a9f3-2bda207ea035\", \"3eb856d0-5d45-413d-a7d1-fe70a6fa141a\": \"3eb856d0-5d45-413d-a7d1-fe70a6fa141a\", \"0d9e8845-88ee-47dd-8bfb-57709abe2938\": \"0d9e8845-88ee-47dd-8bfb-57709abe2938\", \"852c3d73-e48f-4166-89ac-56b1132050aa\": \"852c3d73-e48f-4166-89ac-56b1132050aa\", \"ef805574-3b73-4fd7-8b88-1dba0c43526d\": \"ef805574-3b73-4fd7-8b88-1dba0c43526d\", \"79720481-2a88-4d17-9b0b-0c401aa77f46\": \"79720481-2a88-4d17-9b0b-0c401aa77f46\", \"154dffe5-ac36-49b2-be67-190f2a108ce5\": \"154dffe5-ac36-49b2-be67-190f2a108ce5\", \"16a28a66-3f85-4f87-8125-d871ffe0804d\": \"16a28a66-3f85-4f87-8125-d871ffe0804d\", \"4371e9ca-2364-473d-a9ef-53ce4dee1f6d\": \"4371e9ca-2364-473d-a9ef-53ce4dee1f6d\", \"fdddf762-0504-465b-81e2-c9f9e22ab269\": \"fdddf762-0504-465b-81e2-c9f9e22ab269\", \"604e17f9-270e-4bc6-a02a-33bc97927dca\": \"604e17f9-270e-4bc6-a02a-33bc97927dca\", \"c80694b8-d373-4d2e-9983-a34678da5bf9\": \"c80694b8-d373-4d2e-9983-a34678da5bf9\", \"da06ce09-ae66-4b8a-a3aa-c9f90ad6a375\": \"da06ce09-ae66-4b8a-a3aa-c9f90ad6a375\", \"810ece2f-d6aa-4487-8c28-ac59822109d0\": \"810ece2f-d6aa-4487-8c28-ac59822109d0\", \"236030f1-f10f-408a-b77d-f2c577d5fa41\": \"236030f1-f10f-408a-b77d-f2c577d5fa41\", \"eab8aaee-b7f4-4695-84a1-5bcb8f29692a\": \"eab8aaee-b7f4-4695-84a1-5bcb8f29692a\", \"6d459e0b-9ac4-4e20-8d1b-a25aace73d80\": \"6d459e0b-9ac4-4e20-8d1b-a25aace73d80\", \"ea7efd6b-22aa-46e5-b1a8-eea5e1bc162d\": \"ea7efd6b-22aa-46e5-b1a8-eea5e1bc162d\", \"00b42b85-9eb7-44b1-aa7a-33897991f345\": \"00b42b85-9eb7-44b1-aa7a-33897991f345\", \"d8538d28-587a-4dde-84ff-7d7a25cdc667\": \"d8538d28-587a-4dde-84ff-7d7a25cdc667\", \"c077fd44-6705-4ce7-ac7d-b26717b59dcb\": \"c077fd44-6705-4ce7-ac7d-b26717b59dcb\", \"2ab0af90-85e6-401c-94fa-642700026a2a\": \"2ab0af90-85e6-401c-94fa-642700026a2a\", \"8e9eb911-7169-4e2d-9adb-71332e9c9b6c\": \"8e9eb911-7169-4e2d-9adb-71332e9c9b6c\", \"0d76b58e-f879-452f-8707-fe3b95a2a972\": \"0d76b58e-f879-452f-8707-fe3b95a2a972\", \"73f2c335-cc85-4068-a365-40ab1ca8e050\": \"73f2c335-cc85-4068-a365-40ab1ca8e050\", \"7692a9fb-c9c4-4c88-9485-bf4de8718180\": \"7692a9fb-c9c4-4c88-9485-bf4de8718180\", \"3f78f755-14fb-45b1-af18-09c972f94620\": \"3f78f755-14fb-45b1-af18-09c972f94620\", \"d2a28d03-024f-4700-9f23-18d2209d3c82\": \"d2a28d03-024f-4700-9f23-18d2209d3c82\", \"c9561983-2890-4d35-a679-4c898dbbf16a\": \"c9561983-2890-4d35-a679-4c898dbbf16a\", \"eb99e5a3-7a04-4a45-836e-b718022229f2\": \"eb99e5a3-7a04-4a45-836e-b718022229f2\", \"d5241187-28e1-4c62-8de9-2896760859a3\": \"d5241187-28e1-4c62-8de9-2896760859a3\", \"83fa041f-be5e-4672-ae0b-5f78f2b9c16e\": \"83fa041f-be5e-4672-ae0b-5f78f2b9c16e\", \"664e3d51-3cb1-4ab3-b431-ca88b2463a75\": \"664e3d51-3cb1-4ab3-b431-ca88b2463a75\", \"4320fa5b-3db6-4880-a382-58b500b8ddd3\": \"4320fa5b-3db6-4880-a382-58b500b8ddd3\", \"d8152a86-346b-4615-8cf1-c108fe8a4ebc\": \"d8152a86-346b-4615-8cf1-c108fe8a4ebc\", \"771b4529-d228-4cda-8bcc-436ebe38ca82\": \"771b4529-d228-4cda-8bcc-436ebe38ca82\", \"7a7367ea-d8c8-4f60-9f9f-10863e6b6233\": \"7a7367ea-d8c8-4f60-9f9f-10863e6b6233\", \"3e530f02-5b98-4248-a327-792fad4e6707\": \"3e530f02-5b98-4248-a327-792fad4e6707\", \"bf6cf315-9a29-4136-bbb0-dde0be6e6b45\": \"bf6cf315-9a29-4136-bbb0-dde0be6e6b45\", \"3fdeb14f-78bd-4cdf-986b-3533e0f16006\": \"3fdeb14f-78bd-4cdf-986b-3533e0f16006\", \"c59a77e4-2ae3-464c-9c0c-9f3e6ed19545\": \"c59a77e4-2ae3-464c-9c0c-9f3e6ed19545\", \"3c029d87-398d-4ba9-b1ef-9bf7105adb7b\": \"3c029d87-398d-4ba9-b1ef-9bf7105adb7b\", \"280ec62a-b3f2-4458-b260-6b3384f629d1\": \"280ec62a-b3f2-4458-b260-6b3384f629d1\", \"6d9f8f96-4d7c-4912-8210-b9c4b68d2b97\": \"6d9f8f96-4d7c-4912-8210-b9c4b68d2b97\", \"d13e6ad0-e33c-4484-bdf6-ea5451e336ca\": \"d13e6ad0-e33c-4484-bdf6-ea5451e336ca\", \"1952ed09-7df6-42e8-88ec-cf7a64a05c67\": \"1952ed09-7df6-42e8-88ec-cf7a64a05c67\", \"56f14150-fdd7-4d4a-8299-1af60a0bb7c3\": \"56f14150-fdd7-4d4a-8299-1af60a0bb7c3\", \"483cc15a-69a5-46a2-8ab8-f18e1e5a4d75\": \"483cc15a-69a5-46a2-8ab8-f18e1e5a4d75\", \"63e8b380-5817-4f9c-b7b4-0b7a3a34962d\": \"63e8b380-5817-4f9c-b7b4-0b7a3a34962d\", \"e7fa8509-8bec-4b93-974b-e863021e1016\": \"e7fa8509-8bec-4b93-974b-e863021e1016\", \"5ae84309-887b-4558-b547-5a19730c0b88\": \"5ae84309-887b-4558-b547-5a19730c0b88\", \"7e120781-843f-436c-8a25-34e98e2d9a34\": \"7e120781-843f-436c-8a25-34e98e2d9a34\", \"53c6300d-8ddb-4484-8fd0-1c6a4d490ed8\": \"53c6300d-8ddb-4484-8fd0-1c6a4d490ed8\", \"cd01dd77-8932-4a27-94ef-89a504b034c0\": \"cd01dd77-8932-4a27-94ef-89a504b034c0\", \"6ed04c34-1580-4c2e-89e3-f9ba631434ef\": \"6ed04c34-1580-4c2e-89e3-f9ba631434ef\", \"20afe2b3-f8d3-4b53-9888-e2c97748145e\": \"20afe2b3-f8d3-4b53-9888-e2c97748145e\", \"889979a7-dc9d-4f78-acb3-e0544e656edb\": \"889979a7-dc9d-4f78-acb3-e0544e656edb\", \"2054f22f-5f4c-449d-a914-cf6aaebba2bb\": \"2054f22f-5f4c-449d-a914-cf6aaebba2bb\", \"d5aea81e-48b4-49d3-9c71-e13496b1f09a\": \"d5aea81e-48b4-49d3-9c71-e13496b1f09a\", \"da464428-1742-4607-84e9-fa12ffb64b9a\": \"da464428-1742-4607-84e9-fa12ffb64b9a\", \"61bbf52e-5f7d-4483-b59d-60fe5f47d26a\": \"61bbf52e-5f7d-4483-b59d-60fe5f47d26a\", \"73420898-f368-40aa-8b81-f758d4951648\": \"73420898-f368-40aa-8b81-f758d4951648\", \"3c1de954-c97a-4864-a0c8-85747d40201e\": \"3c1de954-c97a-4864-a0c8-85747d40201e\", \"3713e666-2619-4808-9ce4-c4ca2d4a9548\": \"3713e666-2619-4808-9ce4-c4ca2d4a9548\", \"1fbdaaea-236c-45a0-b3b1-2b412c1d9c5f\": \"1fbdaaea-236c-45a0-b3b1-2b412c1d9c5f\", \"cebc685c-00f9-4e60-b988-cfffbfb6d189\": \"cebc685c-00f9-4e60-b988-cfffbfb6d189\", \"c8987e86-9c10-4c5a-ac2b-0345320d5804\": \"c8987e86-9c10-4c5a-ac2b-0345320d5804\", \"3f8c574e-c957-4774-a784-bf317230d905\": \"3f8c574e-c957-4774-a784-bf317230d905\", \"6bc8b02e-82cd-4417-947f-6f351884d094\": \"6bc8b02e-82cd-4417-947f-6f351884d094\", \"61b65276-95ca-4be0-b168-b353a101eec0\": \"61b65276-95ca-4be0-b168-b353a101eec0\", \"08a3b3e9-07ab-40d7-9bba-3a47cd9ead24\": \"08a3b3e9-07ab-40d7-9bba-3a47cd9ead24\", \"e1baa1c3-eeed-4662-9f95-2427d3496f7d\": \"e1baa1c3-eeed-4662-9f95-2427d3496f7d\", \"a52ce642-3952-4320-8618-ccfb7f67dc01\": \"a52ce642-3952-4320-8618-ccfb7f67dc01\", \"4e262530-7286-4d5e-8fab-a564240d6498\": \"4e262530-7286-4d5e-8fab-a564240d6498\", \"4d490a13-6773-4e0a-8d2a-cf8da9862f6c\": \"4d490a13-6773-4e0a-8d2a-cf8da9862f6c\", \"97491151-98ab-4783-ab17-8e84cd470106\": \"97491151-98ab-4783-ab17-8e84cd470106\", \"b8113426-5a5c-4988-8cd1-cdd9d4f650a5\": \"b8113426-5a5c-4988-8cd1-cdd9d4f650a5\", \"ce52331e-c92c-41dd-99ad-98850974f81d\": \"ce52331e-c92c-41dd-99ad-98850974f81d\", \"14471e5c-9ffa-4899-8cf4-5be454c63588\": \"14471e5c-9ffa-4899-8cf4-5be454c63588\", \"80ed9a7d-f5fb-4e54-8d0c-85c4239de423\": \"80ed9a7d-f5fb-4e54-8d0c-85c4239de423\", \"28dfe340-634d-4e4e-ba23-b8c4b39059e1\": \"28dfe340-634d-4e4e-ba23-b8c4b39059e1\", \"23b9d1c7-ad1e-49ea-b59c-fc5eadbe64e0\": \"23b9d1c7-ad1e-49ea-b59c-fc5eadbe64e0\", \"bfec8b10-2d15-4028-8b12-1065c84d7f51\": \"bfec8b10-2d15-4028-8b12-1065c84d7f51\", \"0a64d801-1001-4876-bda2-231893c8b10f\": \"0a64d801-1001-4876-bda2-231893c8b10f\", \"c9357362-6bb2-4f47-9c5e-655f0eaf0776\": \"c9357362-6bb2-4f47-9c5e-655f0eaf0776\", \"71edcbd6-fb22-418d-9585-b0c380572976\": \"71edcbd6-fb22-418d-9585-b0c380572976\", \"24073662-e53d-4063-a58d-758637989a5d\": \"24073662-e53d-4063-a58d-758637989a5d\", \"1762770f-a85a-4371-8d64-c0aff3df922b\": \"1762770f-a85a-4371-8d64-c0aff3df922b\", \"430c114e-9c75-4d85-ac81-5ea55d8bf9f8\": \"430c114e-9c75-4d85-ac81-5ea55d8bf9f8\", \"7238a95f-f834-4c16-89ab-346c3ff138de\": \"7238a95f-f834-4c16-89ab-346c3ff138de\", \"b5fa0b35-f082-4468-b156-6144a3699e11\": \"b5fa0b35-f082-4468-b156-6144a3699e11\", \"9cb50698-f880-40d6-a71e-4107318f381c\": \"9cb50698-f880-40d6-a71e-4107318f381c\", \"b0ec9680-37a1-4008-acfc-3ad9f451c7f0\": \"b0ec9680-37a1-4008-acfc-3ad9f451c7f0\", \"a588a9f2-10d0-4e89-82eb-71ade5666253\": \"a588a9f2-10d0-4e89-82eb-71ade5666253\", \"9e73293d-9c9c-4d2a-b48a-04da4f468d4e\": \"9e73293d-9c9c-4d2a-b48a-04da4f468d4e\", \"4a0dc2fd-2aba-4cb5-9ced-0a5430e474bd\": \"4a0dc2fd-2aba-4cb5-9ced-0a5430e474bd\", \"5305b416-7e21-4442-9862-f57bea5aeb24\": \"5305b416-7e21-4442-9862-f57bea5aeb24\", \"e7f2649c-8983-4b93-a2fa-96e9db1f3421\": \"e7f2649c-8983-4b93-a2fa-96e9db1f3421\", \"fc677c94-2c7f-46a2-ba31-e15b52492c28\": \"fc677c94-2c7f-46a2-ba31-e15b52492c28\", \"35d7070f-a028-48a8-b18e-ccf24a597110\": \"35d7070f-a028-48a8-b18e-ccf24a597110\", \"31bd6e83-602b-4211-9124-1cd660c4823d\": \"31bd6e83-602b-4211-9124-1cd660c4823d\", \"706dbe50-4d29-48ba-91bf-7f6c23937c9a\": \"706dbe50-4d29-48ba-91bf-7f6c23937c9a\", \"084e0995-8557-4a41-927c-c365fd1f6926\": \"084e0995-8557-4a41-927c-c365fd1f6926\", \"481495a3-5da2-4c27-ae67-649ba965c5ae\": \"481495a3-5da2-4c27-ae67-649ba965c5ae\", \"f12049ad-88fa-42a3-85d4-951dbf3be20c\": \"f12049ad-88fa-42a3-85d4-951dbf3be20c\", \"87b0b66f-855a-45d0-8528-f88209c129f5\": \"87b0b66f-855a-45d0-8528-f88209c129f5\", \"739f8c61-90c0-4aae-a782-ab47b05a747d\": \"739f8c61-90c0-4aae-a782-ab47b05a747d\", \"939b9e11-0c04-4e8a-a739-036b69a50013\": \"939b9e11-0c04-4e8a-a739-036b69a50013\", \"d2b886c6-d77f-4977-ab86-28609409c8e7\": \"d2b886c6-d77f-4977-ab86-28609409c8e7\", \"8012f503-0599-4071-bc11-6bcd3fa6cd24\": \"8012f503-0599-4071-bc11-6bcd3fa6cd24\", \"7a51f43e-37da-4541-990f-662299359a13\": \"7a51f43e-37da-4541-990f-662299359a13\", \"086bff2b-df3a-4603-884d-1114d2c314ad\": \"086bff2b-df3a-4603-884d-1114d2c314ad\", \"72faf6e1-673e-49df-99aa-30e3bc91e754\": \"72faf6e1-673e-49df-99aa-30e3bc91e754\", \"b25ac305-cf9b-49dd-802a-b80932efc6fa\": \"b25ac305-cf9b-49dd-802a-b80932efc6fa\", \"d43da16a-65f8-446b-a7cf-1c96a696a297\": \"d43da16a-65f8-446b-a7cf-1c96a696a297\", \"4ab98fa6-da18-4993-bcd9-84502c7bcde8\": \"4ab98fa6-da18-4993-bcd9-84502c7bcde8\", \"565ea9df-0b71-4edc-9419-f8a22c947f84\": \"565ea9df-0b71-4edc-9419-f8a22c947f84\", \"0e0fc2f2-f4f8-42e6-a953-d8b0c7a5abfd\": \"0e0fc2f2-f4f8-42e6-a953-d8b0c7a5abfd\", \"0406a1f1-0406-4d05-922f-f1427f98c93b\": \"0406a1f1-0406-4d05-922f-f1427f98c93b\", \"11cb0bbe-5e61-42ac-b4eb-9e0bd63bb9bb\": \"11cb0bbe-5e61-42ac-b4eb-9e0bd63bb9bb\", \"5b085cb4-abee-453f-a574-da71af56978c\": \"5b085cb4-abee-453f-a574-da71af56978c\", \"f77e36b3-e4df-404e-a881-99cb0dcc88ae\": \"f77e36b3-e4df-404e-a881-99cb0dcc88ae\", \"e23c9b79-4282-453f-8af1-e9fd3038fd88\": \"e23c9b79-4282-453f-8af1-e9fd3038fd88\", \"0371c7ea-85f3-4b7f-a647-eade968e7ca3\": \"0371c7ea-85f3-4b7f-a647-eade968e7ca3\", \"2750152d-b234-43ad-b229-4ced5769e83d\": \"2750152d-b234-43ad-b229-4ced5769e83d\", \"f1f4bc50-21b0-45dc-ab0c-0961ffbdfcc5\": \"f1f4bc50-21b0-45dc-ab0c-0961ffbdfcc5\", \"c156498d-abb3-4aeb-bb67-4318d91a3e60\": \"c156498d-abb3-4aeb-bb67-4318d91a3e60\", \"67291e61-382d-4ce5-8250-e0fdfc9cbc28\": \"67291e61-382d-4ce5-8250-e0fdfc9cbc28\", \"6479ebef-1268-4c99-8d65-548e63340cc8\": \"6479ebef-1268-4c99-8d65-548e63340cc8\", \"6c38f83a-8170-4f50-a19d-a502ed2c9528\": \"6c38f83a-8170-4f50-a19d-a502ed2c9528\", \"ec7c4c87-d07d-4d5f-82db-8f8ed88e3986\": \"ec7c4c87-d07d-4d5f-82db-8f8ed88e3986\", \"c4ea99af-4eab-49b6-a721-a5e7f11dbca3\": \"c4ea99af-4eab-49b6-a721-a5e7f11dbca3\", \"65e41988-0ad3-4e8d-b339-e0a7bed53c62\": \"65e41988-0ad3-4e8d-b339-e0a7bed53c62\", \"4dfcafd4-0ea9-49ba-87eb-a5769ccef28b\": \"4dfcafd4-0ea9-49ba-87eb-a5769ccef28b\", \"19f13ac8-ce91-4077-b8d1-f5db1699002d\": \"19f13ac8-ce91-4077-b8d1-f5db1699002d\", \"2029610b-7396-4417-9d69-de0a7767df03\": \"2029610b-7396-4417-9d69-de0a7767df03\", \"db705b7d-2d53-4713-9953-47ce66126681\": \"db705b7d-2d53-4713-9953-47ce66126681\", \"b6f42f5b-b2ef-45f0-9a79-cd299095804d\": \"b6f42f5b-b2ef-45f0-9a79-cd299095804d\", \"8d683469-6bcd-4a7f-a955-88b5f94623f1\": \"8d683469-6bcd-4a7f-a955-88b5f94623f1\", \"5fcddab8-253a-4ef4-bb2d-db9e5ebb3dec\": \"5fcddab8-253a-4ef4-bb2d-db9e5ebb3dec\", \"2a6206ff-2e94-4793-9718-5c27e7c28998\": \"2a6206ff-2e94-4793-9718-5c27e7c28998\", \"68977ad7-dac1-4f52-8ed0-2674af8e7906\": \"68977ad7-dac1-4f52-8ed0-2674af8e7906\", \"aad94c9e-3914-4825-a7a6-97976e342c62\": \"aad94c9e-3914-4825-a7a6-97976e342c62\", \"d40bd1e8-fe87-433a-b9bb-4877022e6e7c\": \"d40bd1e8-fe87-433a-b9bb-4877022e6e7c\", \"a5bef94d-3129-42d2-9b86-45ff02a6a105\": \"a5bef94d-3129-42d2-9b86-45ff02a6a105\", \"97b22a16-25a5-4ab0-be42-ce1f79be34f3\": \"97b22a16-25a5-4ab0-be42-ce1f79be34f3\", \"958e9b3e-8fe7-48af-adef-d5dbcc8c2f5c\": \"958e9b3e-8fe7-48af-adef-d5dbcc8c2f5c\", \"deaf3d52-a4b7-4361-88da-f0d49abdc9ca\": \"deaf3d52-a4b7-4361-88da-f0d49abdc9ca\", \"97f2437e-bb30-4e87-9a8c-853ec840e795\": \"97f2437e-bb30-4e87-9a8c-853ec840e795\", \"5d686d00-0a1b-4cf0-babe-91a1b319aafd\": \"5d686d00-0a1b-4cf0-babe-91a1b319aafd\", \"f79e3f21-31cb-4321-b31f-fd2ae720c707\": \"f79e3f21-31cb-4321-b31f-fd2ae720c707\", \"e2104350-6cc0-4dff-836d-f2e557909c0e\": \"e2104350-6cc0-4dff-836d-f2e557909c0e\", \"8c00fc5a-1392-424d-970d-bdb3d08d82c6\": \"8c00fc5a-1392-424d-970d-bdb3d08d82c6\", \"7d44c574-d5c1-4050-978d-aa6ff265adf9\": \"7d44c574-d5c1-4050-978d-aa6ff265adf9\", \"3a4a5da2-20ce-4cb7-8863-02bfda49ff02\": \"3a4a5da2-20ce-4cb7-8863-02bfda49ff02\", \"ff22d68d-869f-4351-8d1e-4783bc980aaa\": \"ff22d68d-869f-4351-8d1e-4783bc980aaa\", \"797421cb-c1a2-46ac-b120-54a953e6a36c\": \"797421cb-c1a2-46ac-b120-54a953e6a36c\", \"34737e0f-e1c4-45d5-95a6-ebfa56786657\": \"34737e0f-e1c4-45d5-95a6-ebfa56786657\", \"55a103de-fddb-46bf-8d7d-f27b328bf619\": \"55a103de-fddb-46bf-8d7d-f27b328bf619\", \"eae8014a-3219-4d81-9914-eaf471ba9277\": \"eae8014a-3219-4d81-9914-eaf471ba9277\", \"d00f1043-9806-439b-8f01-e4dce603980b\": \"d00f1043-9806-439b-8f01-e4dce603980b\", \"1a9cea1d-3dfa-4e2a-ac5b-aa8743eaa630\": \"1a9cea1d-3dfa-4e2a-ac5b-aa8743eaa630\", \"542437e0-3b04-4b87-b0b2-ff05548b6847\": \"542437e0-3b04-4b87-b0b2-ff05548b6847\", \"1194b920-d8f9-4f67-b333-0f27fc5dea34\": \"1194b920-d8f9-4f67-b333-0f27fc5dea34\", \"40683c27-7a6f-4ef7-92b1-dabcac6aee8a\": \"40683c27-7a6f-4ef7-92b1-dabcac6aee8a\", \"58353613-c8f8-42cc-8a0a-de5ae5c76507\": \"58353613-c8f8-42cc-8a0a-de5ae5c76507\", \"08a1b5af-2d77-4f3f-a7c3-3979af55927b\": \"08a1b5af-2d77-4f3f-a7c3-3979af55927b\", \"30dac815-611a-4576-a799-51d5f40dacb1\": \"30dac815-611a-4576-a799-51d5f40dacb1\", \"927dfe1d-a24a-4c10-a8bd-4bb3d1417d20\": \"927dfe1d-a24a-4c10-a8bd-4bb3d1417d20\", \"31ee15ae-a44a-45e1-ac97-09117be6841e\": \"31ee15ae-a44a-45e1-ac97-09117be6841e\", \"3ae92dde-13ff-48fb-a70d-36aee6ba18bc\": \"3ae92dde-13ff-48fb-a70d-36aee6ba18bc\", \"b0c251c6-65a3-47c9-af8d-22763bc37e94\": \"b0c251c6-65a3-47c9-af8d-22763bc37e94\", \"9139fb8e-7163-4696-ac37-3bfe42580d39\": \"9139fb8e-7163-4696-ac37-3bfe42580d39\", \"df4cdc83-9997-4480-aed8-93f1c13b07aa\": \"df4cdc83-9997-4480-aed8-93f1c13b07aa\", \"46dfe8c4-945f-4f7a-8f4e-6e9da2a8da7b\": \"46dfe8c4-945f-4f7a-8f4e-6e9da2a8da7b\", \"b9b25f33-694f-4e8d-8f19-a26b9404e296\": \"b9b25f33-694f-4e8d-8f19-a26b9404e296\", \"62987fc3-87f7-48b3-8a94-a661ec36c887\": \"62987fc3-87f7-48b3-8a94-a661ec36c887\", \"346bf031-de41-49e4-b39b-cb015b9fd368\": \"346bf031-de41-49e4-b39b-cb015b9fd368\", \"c04f30ac-6717-4853-8aa1-5a40ac745692\": \"c04f30ac-6717-4853-8aa1-5a40ac745692\", \"39400e49-d3c7-49bd-bfdf-52f4ffd47264\": \"39400e49-d3c7-49bd-bfdf-52f4ffd47264\", \"a4790a9b-4ec1-4fae-a14c-24ccbc4ff9c2\": \"a4790a9b-4ec1-4fae-a14c-24ccbc4ff9c2\", \"87652293-907b-4873-bed4-364afe8b5594\": \"87652293-907b-4873-bed4-364afe8b5594\", \"d103260c-6663-4b58-ab81-3ace7020bc6a\": \"d103260c-6663-4b58-ab81-3ace7020bc6a\", \"7ab73376-a705-458a-9217-8683c5ff373e\": \"7ab73376-a705-458a-9217-8683c5ff373e\", \"0a859091-8ed4-4176-8247-67878293ea93\": \"0a859091-8ed4-4176-8247-67878293ea93\", \"1cc8d3c5-2a50-4f8f-84dd-111c20ce45dc\": \"1cc8d3c5-2a50-4f8f-84dd-111c20ce45dc\", \"b6c7b9d2-ec34-47ca-8f59-fbe178159ee4\": \"b6c7b9d2-ec34-47ca-8f59-fbe178159ee4\", \"af52b7ad-47ac-440b-82d9-608917d7eb02\": \"af52b7ad-47ac-440b-82d9-608917d7eb02\", \"09d07a1e-b506-466e-b2b3-93a95897f884\": \"09d07a1e-b506-466e-b2b3-93a95897f884\", \"11673e89-01a3-43d8-af56-f9eb97eb8587\": \"11673e89-01a3-43d8-af56-f9eb97eb8587\", \"fcc2d220-8ed7-4a41-a4fb-12f470f33d9b\": \"fcc2d220-8ed7-4a41-a4fb-12f470f33d9b\", \"8457a273-c7b4-499f-9df8-3f04130a6f3a\": \"8457a273-c7b4-499f-9df8-3f04130a6f3a\", \"65574567-3461-4bd7-b9c9-2ca0731a4b66\": \"65574567-3461-4bd7-b9c9-2ca0731a4b66\", \"fb207f95-bf43-4182-94c2-9c50ac365802\": \"fb207f95-bf43-4182-94c2-9c50ac365802\", \"e4068d71-72f6-4a6a-acc3-e19fa646d8f4\": \"e4068d71-72f6-4a6a-acc3-e19fa646d8f4\", \"13466d12-0d64-427e-96f8-05193eb8ff8c\": \"13466d12-0d64-427e-96f8-05193eb8ff8c\", \"d8815c94-7dee-4dca-a8d6-fd0eacf3ae88\": \"d8815c94-7dee-4dca-a8d6-fd0eacf3ae88\", \"c37b3d0f-e1ca-4078-b4a1-11084bbb59b2\": \"c37b3d0f-e1ca-4078-b4a1-11084bbb59b2\", \"84d6e8d1-a7b9-4f68-b1db-9f91ee88bbc7\": \"84d6e8d1-a7b9-4f68-b1db-9f91ee88bbc7\", \"d04ce259-3fb9-4b02-bd6d-f7639b7d2eed\": \"d04ce259-3fb9-4b02-bd6d-f7639b7d2eed\", \"fd822ea6-c122-4b52-b734-959d2d1c59b5\": \"fd822ea6-c122-4b52-b734-959d2d1c59b5\", \"b7d38d32-2560-4c23-b01f-43634b74fd70\": \"b7d38d32-2560-4c23-b01f-43634b74fd70\", \"03073157-7adf-483b-b788-818dc439b265\": \"03073157-7adf-483b-b788-818dc439b265\", \"9cdfcb5d-beb3-4f5a-bf68-26944e5d1a8d\": \"9cdfcb5d-beb3-4f5a-bf68-26944e5d1a8d\", \"76377af1-cc61-437c-845f-9a4fb1bd7a4a\": \"76377af1-cc61-437c-845f-9a4fb1bd7a4a\", \"c7e0aad2-b8a6-4258-9665-179376c7c8c4\": \"c7e0aad2-b8a6-4258-9665-179376c7c8c4\", \"0b506b1d-8d3f-47ed-ac9d-e65b23f91eb8\": \"0b506b1d-8d3f-47ed-ac9d-e65b23f91eb8\", \"8716c7ab-2f34-4bf1-85bc-1c661b8957cf\": \"8716c7ab-2f34-4bf1-85bc-1c661b8957cf\", \"2f39f707-fa96-423e-95e4-663b50c419f9\": \"2f39f707-fa96-423e-95e4-663b50c419f9\", \"a3c21ab6-7c51-4c6b-a5b0-0bbe1755a080\": \"a3c21ab6-7c51-4c6b-a5b0-0bbe1755a080\", \"b8bbdad9-e09f-4530-ba94-411c5faf09f8\": \"b8bbdad9-e09f-4530-ba94-411c5faf09f8\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
|
vectorize.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage
|
| 5 |
+
from llama_index.core.node_parser import SentenceSplitter
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
+
DOCUMENT_PATH = './documents'
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# remember to delete stored vectors when new documents are added to the data so the storage is recreated
|
| 12 |
+
def load_write_index_nodes(path):
|
| 13 |
+
documents = []
|
| 14 |
+
|
| 15 |
+
if not os.path.exists(path):
|
| 16 |
+
documents = SimpleDirectoryReader(DOCUMENT_PATH, filename_as_id=True).load_data()
|
| 17 |
+
logger.info(f'Indexing documents in {DOCUMENT_PATH}...')
|
| 18 |
+
index = VectorStoreIndex.from_documents(documents)
|
| 19 |
+
index.storage_context.persist(persist_dir=path)
|
| 20 |
+
logger.info(f'{len(documents)} documents indexed.')
|
| 21 |
+
else:
|
| 22 |
+
logger.info(f'Loading index from {path}...')
|
| 23 |
+
storage_context = StorageContext.from_defaults(persist_dir=path)
|
| 24 |
+
index = load_index_from_storage(storage_context)
|
| 25 |
+
|
| 26 |
+
parser = SentenceSplitter()
|
| 27 |
+
nodes = parser.get_nodes_from_documents(documents)
|
| 28 |
+
return index, nodes
|