arabellastrange commited on
Commit
ecd09c2
·
1 Parent(s): 546ad48
app.py CHANGED
@@ -1,64 +1,41 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
 
 
62
 
63
- if __name__ == "__main__":
64
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ import gradio as gr
5
 
6
+ from generate_response import set_llm, generate_chat_response
7
+ from vectorize import load_write_index_nodes
8
+
9
+ logger = logging.getLogger("agent_logger")
10
+
11
+
12
+ # idk who is passing two arguments to chat when I am not using the history anywhere and i don't care to find out so
13
+ # don't remove this argument please
14
+ def chat(message, history):
15
+ index, nodes = load_write_index_nodes(path='./storage_mx')
16
+ for response_text in generate_chat_response(index, message):
17
+ yield response_text
18
+
19
+
20
+ def run():
21
+ api_key = os.getenv('mx_api_key')
22
+ set_llm(key=api_key, model="mistral-large-latest", temperature=0)
23
+
24
+ logger.info("Launching Gradio ChatInterface...")
25
+ examples = ["How do I put my newborn in a cot?", "Hoe slaap ik in een lawaaiige buurt?",
26
+ "Yeni anne olarak uyku kalitemi nasıl artırabilirim?"]
27
+ description = ('''
28
+ <div style="text-align: center;">
29
+ <span style="white-space: nowrap; display: inline-block;">
30
+ <img src="./img/logo.png" alt="TU Delft Logo" width="200" style="display: inline; vertical-align: middle;"/>
31
+ <img src="./img/logo-em.jpg" alt="Erasmus MC Logo" width="200" style="display: inline; vertical-align: middle;"/>
32
+ </span><br/>
33
+ Welcome to Our Smart Family Buddy. This is <span style="font-weight: bold;">SleepBot</span>. SleepBot can answer your questions about you,
34
+ and your family's, sleep health. For more family support see: <a href="https://cjgrijnmond.nl/">cjgrijnmond.nl</a>. For medical
35
+ questions see: <a href="https://www.thuisarts.nl/">thuisarts.nl</a?.
36
+ </div>
37
+ ''')
38
+ demo = gr.ChatInterface(fn=chat, type="messages", title="SleepBot", description=description, examples=examples,
39
+ show_progress="full", theme="soft")
40
+ demo.queue().launch()
41
+ # demo.queue().launch(auth=('sleepbot', 'testing24'))
generate_response.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ from llama_index.core import Settings
4
+ from llama_index.core.base.embeddings.base import BaseEmbedding
5
+ from llama_index.core.base.llms.base import BaseLLM
6
+ from llama_index.core.chat_engine import CondensePlusContextChatEngine
7
+ from llama_index.core.memory import ChatMemoryBuffer
8
+ from llama_index.embeddings.mistralai import MistralAIEmbedding
9
+ from llama_index.embeddings.openai import OpenAIEmbedding
10
+ from llama_index.llms.mistralai import MistralAI
11
+ from llama_index.llms.openai import OpenAI
12
+
13
+ llm: BaseLLM
14
+ embed_model: BaseEmbedding
15
+ logger = logging.getLogger("agent_logger")
16
+
17
+
18
+ def set_llm(model, key, temperature):
19
+ global llm
20
+ global embed_model
21
+
22
+ logger.info(f'Setting up LLM with {model} and associated embedding model...')
23
+
24
+ if "gpt" in model:
25
+ llm = OpenAI(api_key=key, temperature=temperature, model=model)
26
+ embed_model = OpenAIEmbedding(api_key=key)
27
+ elif "mistral" in model:
28
+ llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
29
+ embed_model = MistralAIEmbedding(api_key=key)
30
+ else:
31
+ # Default model
32
+ llm = OpenAI(api_key=key, model="gpt-3.5-turbo", temperature=0)
33
+ embed_model = OpenAIEmbedding(api_key=key)
34
+
35
+ Settings.llm = llm
36
+ Settings.embed_model = embed_model
37
+
38
+
39
+ def get_llm():
40
+ return llm
41
+
42
+
43
+ def generate_chat_response(index, message):
44
+ logger.info("Generating chat response with history and rag...")
45
+ string_output = ""
46
+
47
+ memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
48
+
49
+ logger.info("Creating chat engine with index and memory...")
50
+ chat_engine = CondensePlusContextChatEngine.from_defaults(
51
+ index.as_retriever(),
52
+ memory=memory,
53
+ llm=llm,
54
+ context_prompt=(
55
+ "You are a chatbot designed to help families improve their sleep health. Do not respond to any off-topic"
56
+ "queries but direct users back to the topic of sleep health. Your responses should be in the same"
57
+ "language as the user message. Responses should be at B1 fluency level.\n For information retrieval "
58
+ "questions, use this Output Format:\n"
59
+ "2 to 3 lines high level summary of the response as a whole. \n"
60
+ "Inside an HTML Read More element: Further details and breakdown of the response step by step\n. "
61
+ "Back outside of the Read More, the name of the documents from which the response was generated. "
62
+ "Then 'Learn More About: [suggested follow up question 1], [suggested follow up question 2], "
63
+ "or [suggested follow up question 3]'.\n"
64
+ "Here are the relevant documents for the context:\n"
65
+ "{context_str}"
66
+ "\nInstruction: Address the user query using the previous chat history and the context above, "
67
+ "or ask a follow up question to gain more relevant context then answer the initial query given the new "
68
+ "information."
69
+ ),
70
+ verbose=True,
71
+ )
72
+
73
+ response = chat_engine.stream_chat(message)
74
+
75
+ for node in response.source_nodes:
76
+ # todo how to go from node id to document name?
77
+ print(f"Fetched node {node} in search.")
78
+
79
+ response_text = []
80
+ for text in response.response_gen:
81
+ response_text.append(text)
82
+ string_output = ''.join(response_text)
83
+ yield string_output
84
+
85
+ logger.info(f'Assistant response: {string_output}')
img/logo-em.jpg ADDED
img/logo.png ADDED
requirements.txt CHANGED
@@ -1 +1,10 @@
1
- huggingface_hub==0.25.2
 
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub==0.25.2
2
+ gradio==5.5.0
3
+ llama-index==0.11.21
4
+ llama-index-core==0.11.21
5
+ llama-index-llms-mistralai==0.2.7
6
+ llama-index-embeddings-mistralai
7
+ llama-index-embeddings-openai
8
+ llama-index-llms-openai==0.2.16
9
+ # needed for simpledirectoryreader to work
10
+ llama-index-readers-file
storage_mx/default__vector_store.json ADDED
The diff for this file is too large to render. See raw diff
 
storage_mx/docstore.json ADDED
The diff for this file is too large to render. See raw diff
 
storage_mx/graph_store.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"graph_dict": {}}
storage_mx/image__vector_store.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
storage_mx/index_store.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"index_store/data": {"7a40a07d-bec2-480f-b26a-562b4cb73513": {"__type__": "vector_store", "__data__": "{\"index_id\": \"7a40a07d-bec2-480f-b26a-562b4cb73513\", \"summary\": null, \"nodes_dict\": {\"e51048fa-fa37-4f3d-a9f3-2bda207ea035\": \"e51048fa-fa37-4f3d-a9f3-2bda207ea035\", \"3eb856d0-5d45-413d-a7d1-fe70a6fa141a\": \"3eb856d0-5d45-413d-a7d1-fe70a6fa141a\", \"0d9e8845-88ee-47dd-8bfb-57709abe2938\": \"0d9e8845-88ee-47dd-8bfb-57709abe2938\", \"852c3d73-e48f-4166-89ac-56b1132050aa\": \"852c3d73-e48f-4166-89ac-56b1132050aa\", \"ef805574-3b73-4fd7-8b88-1dba0c43526d\": \"ef805574-3b73-4fd7-8b88-1dba0c43526d\", \"79720481-2a88-4d17-9b0b-0c401aa77f46\": \"79720481-2a88-4d17-9b0b-0c401aa77f46\", \"154dffe5-ac36-49b2-be67-190f2a108ce5\": \"154dffe5-ac36-49b2-be67-190f2a108ce5\", \"16a28a66-3f85-4f87-8125-d871ffe0804d\": \"16a28a66-3f85-4f87-8125-d871ffe0804d\", \"4371e9ca-2364-473d-a9ef-53ce4dee1f6d\": \"4371e9ca-2364-473d-a9ef-53ce4dee1f6d\", \"fdddf762-0504-465b-81e2-c9f9e22ab269\": \"fdddf762-0504-465b-81e2-c9f9e22ab269\", \"604e17f9-270e-4bc6-a02a-33bc97927dca\": \"604e17f9-270e-4bc6-a02a-33bc97927dca\", \"c80694b8-d373-4d2e-9983-a34678da5bf9\": \"c80694b8-d373-4d2e-9983-a34678da5bf9\", \"da06ce09-ae66-4b8a-a3aa-c9f90ad6a375\": \"da06ce09-ae66-4b8a-a3aa-c9f90ad6a375\", \"810ece2f-d6aa-4487-8c28-ac59822109d0\": \"810ece2f-d6aa-4487-8c28-ac59822109d0\", \"236030f1-f10f-408a-b77d-f2c577d5fa41\": \"236030f1-f10f-408a-b77d-f2c577d5fa41\", \"eab8aaee-b7f4-4695-84a1-5bcb8f29692a\": \"eab8aaee-b7f4-4695-84a1-5bcb8f29692a\", \"6d459e0b-9ac4-4e20-8d1b-a25aace73d80\": \"6d459e0b-9ac4-4e20-8d1b-a25aace73d80\", \"ea7efd6b-22aa-46e5-b1a8-eea5e1bc162d\": \"ea7efd6b-22aa-46e5-b1a8-eea5e1bc162d\", \"00b42b85-9eb7-44b1-aa7a-33897991f345\": \"00b42b85-9eb7-44b1-aa7a-33897991f345\", \"d8538d28-587a-4dde-84ff-7d7a25cdc667\": \"d8538d28-587a-4dde-84ff-7d7a25cdc667\", \"c077fd44-6705-4ce7-ac7d-b26717b59dcb\": \"c077fd44-6705-4ce7-ac7d-b26717b59dcb\", \"2ab0af90-85e6-401c-94fa-642700026a2a\": \"2ab0af90-85e6-401c-94fa-642700026a2a\", \"8e9eb911-7169-4e2d-9adb-71332e9c9b6c\": \"8e9eb911-7169-4e2d-9adb-71332e9c9b6c\", \"0d76b58e-f879-452f-8707-fe3b95a2a972\": \"0d76b58e-f879-452f-8707-fe3b95a2a972\", \"73f2c335-cc85-4068-a365-40ab1ca8e050\": \"73f2c335-cc85-4068-a365-40ab1ca8e050\", \"7692a9fb-c9c4-4c88-9485-bf4de8718180\": \"7692a9fb-c9c4-4c88-9485-bf4de8718180\", \"3f78f755-14fb-45b1-af18-09c972f94620\": \"3f78f755-14fb-45b1-af18-09c972f94620\", \"d2a28d03-024f-4700-9f23-18d2209d3c82\": \"d2a28d03-024f-4700-9f23-18d2209d3c82\", \"c9561983-2890-4d35-a679-4c898dbbf16a\": \"c9561983-2890-4d35-a679-4c898dbbf16a\", \"eb99e5a3-7a04-4a45-836e-b718022229f2\": \"eb99e5a3-7a04-4a45-836e-b718022229f2\", \"d5241187-28e1-4c62-8de9-2896760859a3\": \"d5241187-28e1-4c62-8de9-2896760859a3\", \"83fa041f-be5e-4672-ae0b-5f78f2b9c16e\": \"83fa041f-be5e-4672-ae0b-5f78f2b9c16e\", \"664e3d51-3cb1-4ab3-b431-ca88b2463a75\": \"664e3d51-3cb1-4ab3-b431-ca88b2463a75\", \"4320fa5b-3db6-4880-a382-58b500b8ddd3\": \"4320fa5b-3db6-4880-a382-58b500b8ddd3\", \"d8152a86-346b-4615-8cf1-c108fe8a4ebc\": \"d8152a86-346b-4615-8cf1-c108fe8a4ebc\", \"771b4529-d228-4cda-8bcc-436ebe38ca82\": \"771b4529-d228-4cda-8bcc-436ebe38ca82\", \"7a7367ea-d8c8-4f60-9f9f-10863e6b6233\": \"7a7367ea-d8c8-4f60-9f9f-10863e6b6233\", \"3e530f02-5b98-4248-a327-792fad4e6707\": \"3e530f02-5b98-4248-a327-792fad4e6707\", \"bf6cf315-9a29-4136-bbb0-dde0be6e6b45\": \"bf6cf315-9a29-4136-bbb0-dde0be6e6b45\", \"3fdeb14f-78bd-4cdf-986b-3533e0f16006\": \"3fdeb14f-78bd-4cdf-986b-3533e0f16006\", \"c59a77e4-2ae3-464c-9c0c-9f3e6ed19545\": \"c59a77e4-2ae3-464c-9c0c-9f3e6ed19545\", \"3c029d87-398d-4ba9-b1ef-9bf7105adb7b\": \"3c029d87-398d-4ba9-b1ef-9bf7105adb7b\", \"280ec62a-b3f2-4458-b260-6b3384f629d1\": \"280ec62a-b3f2-4458-b260-6b3384f629d1\", \"6d9f8f96-4d7c-4912-8210-b9c4b68d2b97\": \"6d9f8f96-4d7c-4912-8210-b9c4b68d2b97\", \"d13e6ad0-e33c-4484-bdf6-ea5451e336ca\": \"d13e6ad0-e33c-4484-bdf6-ea5451e336ca\", \"1952ed09-7df6-42e8-88ec-cf7a64a05c67\": \"1952ed09-7df6-42e8-88ec-cf7a64a05c67\", \"56f14150-fdd7-4d4a-8299-1af60a0bb7c3\": \"56f14150-fdd7-4d4a-8299-1af60a0bb7c3\", \"483cc15a-69a5-46a2-8ab8-f18e1e5a4d75\": \"483cc15a-69a5-46a2-8ab8-f18e1e5a4d75\", \"63e8b380-5817-4f9c-b7b4-0b7a3a34962d\": \"63e8b380-5817-4f9c-b7b4-0b7a3a34962d\", \"e7fa8509-8bec-4b93-974b-e863021e1016\": \"e7fa8509-8bec-4b93-974b-e863021e1016\", \"5ae84309-887b-4558-b547-5a19730c0b88\": \"5ae84309-887b-4558-b547-5a19730c0b88\", \"7e120781-843f-436c-8a25-34e98e2d9a34\": \"7e120781-843f-436c-8a25-34e98e2d9a34\", \"53c6300d-8ddb-4484-8fd0-1c6a4d490ed8\": \"53c6300d-8ddb-4484-8fd0-1c6a4d490ed8\", \"cd01dd77-8932-4a27-94ef-89a504b034c0\": \"cd01dd77-8932-4a27-94ef-89a504b034c0\", \"6ed04c34-1580-4c2e-89e3-f9ba631434ef\": \"6ed04c34-1580-4c2e-89e3-f9ba631434ef\", \"20afe2b3-f8d3-4b53-9888-e2c97748145e\": \"20afe2b3-f8d3-4b53-9888-e2c97748145e\", \"889979a7-dc9d-4f78-acb3-e0544e656edb\": \"889979a7-dc9d-4f78-acb3-e0544e656edb\", \"2054f22f-5f4c-449d-a914-cf6aaebba2bb\": \"2054f22f-5f4c-449d-a914-cf6aaebba2bb\", \"d5aea81e-48b4-49d3-9c71-e13496b1f09a\": \"d5aea81e-48b4-49d3-9c71-e13496b1f09a\", \"da464428-1742-4607-84e9-fa12ffb64b9a\": \"da464428-1742-4607-84e9-fa12ffb64b9a\", \"61bbf52e-5f7d-4483-b59d-60fe5f47d26a\": \"61bbf52e-5f7d-4483-b59d-60fe5f47d26a\", \"73420898-f368-40aa-8b81-f758d4951648\": \"73420898-f368-40aa-8b81-f758d4951648\", \"3c1de954-c97a-4864-a0c8-85747d40201e\": \"3c1de954-c97a-4864-a0c8-85747d40201e\", \"3713e666-2619-4808-9ce4-c4ca2d4a9548\": \"3713e666-2619-4808-9ce4-c4ca2d4a9548\", \"1fbdaaea-236c-45a0-b3b1-2b412c1d9c5f\": \"1fbdaaea-236c-45a0-b3b1-2b412c1d9c5f\", \"cebc685c-00f9-4e60-b988-cfffbfb6d189\": \"cebc685c-00f9-4e60-b988-cfffbfb6d189\", \"c8987e86-9c10-4c5a-ac2b-0345320d5804\": \"c8987e86-9c10-4c5a-ac2b-0345320d5804\", \"3f8c574e-c957-4774-a784-bf317230d905\": \"3f8c574e-c957-4774-a784-bf317230d905\", \"6bc8b02e-82cd-4417-947f-6f351884d094\": \"6bc8b02e-82cd-4417-947f-6f351884d094\", \"61b65276-95ca-4be0-b168-b353a101eec0\": \"61b65276-95ca-4be0-b168-b353a101eec0\", \"08a3b3e9-07ab-40d7-9bba-3a47cd9ead24\": \"08a3b3e9-07ab-40d7-9bba-3a47cd9ead24\", \"e1baa1c3-eeed-4662-9f95-2427d3496f7d\": \"e1baa1c3-eeed-4662-9f95-2427d3496f7d\", \"a52ce642-3952-4320-8618-ccfb7f67dc01\": \"a52ce642-3952-4320-8618-ccfb7f67dc01\", \"4e262530-7286-4d5e-8fab-a564240d6498\": \"4e262530-7286-4d5e-8fab-a564240d6498\", \"4d490a13-6773-4e0a-8d2a-cf8da9862f6c\": \"4d490a13-6773-4e0a-8d2a-cf8da9862f6c\", \"97491151-98ab-4783-ab17-8e84cd470106\": \"97491151-98ab-4783-ab17-8e84cd470106\", \"b8113426-5a5c-4988-8cd1-cdd9d4f650a5\": \"b8113426-5a5c-4988-8cd1-cdd9d4f650a5\", \"ce52331e-c92c-41dd-99ad-98850974f81d\": \"ce52331e-c92c-41dd-99ad-98850974f81d\", \"14471e5c-9ffa-4899-8cf4-5be454c63588\": \"14471e5c-9ffa-4899-8cf4-5be454c63588\", \"80ed9a7d-f5fb-4e54-8d0c-85c4239de423\": \"80ed9a7d-f5fb-4e54-8d0c-85c4239de423\", \"28dfe340-634d-4e4e-ba23-b8c4b39059e1\": \"28dfe340-634d-4e4e-ba23-b8c4b39059e1\", \"23b9d1c7-ad1e-49ea-b59c-fc5eadbe64e0\": \"23b9d1c7-ad1e-49ea-b59c-fc5eadbe64e0\", \"bfec8b10-2d15-4028-8b12-1065c84d7f51\": \"bfec8b10-2d15-4028-8b12-1065c84d7f51\", \"0a64d801-1001-4876-bda2-231893c8b10f\": \"0a64d801-1001-4876-bda2-231893c8b10f\", \"c9357362-6bb2-4f47-9c5e-655f0eaf0776\": \"c9357362-6bb2-4f47-9c5e-655f0eaf0776\", \"71edcbd6-fb22-418d-9585-b0c380572976\": \"71edcbd6-fb22-418d-9585-b0c380572976\", \"24073662-e53d-4063-a58d-758637989a5d\": \"24073662-e53d-4063-a58d-758637989a5d\", \"1762770f-a85a-4371-8d64-c0aff3df922b\": \"1762770f-a85a-4371-8d64-c0aff3df922b\", \"430c114e-9c75-4d85-ac81-5ea55d8bf9f8\": \"430c114e-9c75-4d85-ac81-5ea55d8bf9f8\", \"7238a95f-f834-4c16-89ab-346c3ff138de\": \"7238a95f-f834-4c16-89ab-346c3ff138de\", \"b5fa0b35-f082-4468-b156-6144a3699e11\": \"b5fa0b35-f082-4468-b156-6144a3699e11\", \"9cb50698-f880-40d6-a71e-4107318f381c\": \"9cb50698-f880-40d6-a71e-4107318f381c\", \"b0ec9680-37a1-4008-acfc-3ad9f451c7f0\": \"b0ec9680-37a1-4008-acfc-3ad9f451c7f0\", \"a588a9f2-10d0-4e89-82eb-71ade5666253\": \"a588a9f2-10d0-4e89-82eb-71ade5666253\", \"9e73293d-9c9c-4d2a-b48a-04da4f468d4e\": \"9e73293d-9c9c-4d2a-b48a-04da4f468d4e\", \"4a0dc2fd-2aba-4cb5-9ced-0a5430e474bd\": \"4a0dc2fd-2aba-4cb5-9ced-0a5430e474bd\", \"5305b416-7e21-4442-9862-f57bea5aeb24\": \"5305b416-7e21-4442-9862-f57bea5aeb24\", \"e7f2649c-8983-4b93-a2fa-96e9db1f3421\": \"e7f2649c-8983-4b93-a2fa-96e9db1f3421\", \"fc677c94-2c7f-46a2-ba31-e15b52492c28\": \"fc677c94-2c7f-46a2-ba31-e15b52492c28\", \"35d7070f-a028-48a8-b18e-ccf24a597110\": \"35d7070f-a028-48a8-b18e-ccf24a597110\", \"31bd6e83-602b-4211-9124-1cd660c4823d\": \"31bd6e83-602b-4211-9124-1cd660c4823d\", \"706dbe50-4d29-48ba-91bf-7f6c23937c9a\": \"706dbe50-4d29-48ba-91bf-7f6c23937c9a\", \"084e0995-8557-4a41-927c-c365fd1f6926\": \"084e0995-8557-4a41-927c-c365fd1f6926\", \"481495a3-5da2-4c27-ae67-649ba965c5ae\": \"481495a3-5da2-4c27-ae67-649ba965c5ae\", \"f12049ad-88fa-42a3-85d4-951dbf3be20c\": \"f12049ad-88fa-42a3-85d4-951dbf3be20c\", \"87b0b66f-855a-45d0-8528-f88209c129f5\": \"87b0b66f-855a-45d0-8528-f88209c129f5\", \"739f8c61-90c0-4aae-a782-ab47b05a747d\": \"739f8c61-90c0-4aae-a782-ab47b05a747d\", \"939b9e11-0c04-4e8a-a739-036b69a50013\": \"939b9e11-0c04-4e8a-a739-036b69a50013\", \"d2b886c6-d77f-4977-ab86-28609409c8e7\": \"d2b886c6-d77f-4977-ab86-28609409c8e7\", \"8012f503-0599-4071-bc11-6bcd3fa6cd24\": \"8012f503-0599-4071-bc11-6bcd3fa6cd24\", \"7a51f43e-37da-4541-990f-662299359a13\": \"7a51f43e-37da-4541-990f-662299359a13\", \"086bff2b-df3a-4603-884d-1114d2c314ad\": \"086bff2b-df3a-4603-884d-1114d2c314ad\", \"72faf6e1-673e-49df-99aa-30e3bc91e754\": \"72faf6e1-673e-49df-99aa-30e3bc91e754\", \"b25ac305-cf9b-49dd-802a-b80932efc6fa\": \"b25ac305-cf9b-49dd-802a-b80932efc6fa\", \"d43da16a-65f8-446b-a7cf-1c96a696a297\": \"d43da16a-65f8-446b-a7cf-1c96a696a297\", \"4ab98fa6-da18-4993-bcd9-84502c7bcde8\": \"4ab98fa6-da18-4993-bcd9-84502c7bcde8\", \"565ea9df-0b71-4edc-9419-f8a22c947f84\": \"565ea9df-0b71-4edc-9419-f8a22c947f84\", \"0e0fc2f2-f4f8-42e6-a953-d8b0c7a5abfd\": \"0e0fc2f2-f4f8-42e6-a953-d8b0c7a5abfd\", \"0406a1f1-0406-4d05-922f-f1427f98c93b\": \"0406a1f1-0406-4d05-922f-f1427f98c93b\", \"11cb0bbe-5e61-42ac-b4eb-9e0bd63bb9bb\": \"11cb0bbe-5e61-42ac-b4eb-9e0bd63bb9bb\", \"5b085cb4-abee-453f-a574-da71af56978c\": \"5b085cb4-abee-453f-a574-da71af56978c\", \"f77e36b3-e4df-404e-a881-99cb0dcc88ae\": \"f77e36b3-e4df-404e-a881-99cb0dcc88ae\", \"e23c9b79-4282-453f-8af1-e9fd3038fd88\": \"e23c9b79-4282-453f-8af1-e9fd3038fd88\", \"0371c7ea-85f3-4b7f-a647-eade968e7ca3\": \"0371c7ea-85f3-4b7f-a647-eade968e7ca3\", \"2750152d-b234-43ad-b229-4ced5769e83d\": \"2750152d-b234-43ad-b229-4ced5769e83d\", \"f1f4bc50-21b0-45dc-ab0c-0961ffbdfcc5\": \"f1f4bc50-21b0-45dc-ab0c-0961ffbdfcc5\", \"c156498d-abb3-4aeb-bb67-4318d91a3e60\": \"c156498d-abb3-4aeb-bb67-4318d91a3e60\", \"67291e61-382d-4ce5-8250-e0fdfc9cbc28\": \"67291e61-382d-4ce5-8250-e0fdfc9cbc28\", \"6479ebef-1268-4c99-8d65-548e63340cc8\": \"6479ebef-1268-4c99-8d65-548e63340cc8\", \"6c38f83a-8170-4f50-a19d-a502ed2c9528\": \"6c38f83a-8170-4f50-a19d-a502ed2c9528\", \"ec7c4c87-d07d-4d5f-82db-8f8ed88e3986\": \"ec7c4c87-d07d-4d5f-82db-8f8ed88e3986\", \"c4ea99af-4eab-49b6-a721-a5e7f11dbca3\": \"c4ea99af-4eab-49b6-a721-a5e7f11dbca3\", \"65e41988-0ad3-4e8d-b339-e0a7bed53c62\": \"65e41988-0ad3-4e8d-b339-e0a7bed53c62\", \"4dfcafd4-0ea9-49ba-87eb-a5769ccef28b\": \"4dfcafd4-0ea9-49ba-87eb-a5769ccef28b\", \"19f13ac8-ce91-4077-b8d1-f5db1699002d\": \"19f13ac8-ce91-4077-b8d1-f5db1699002d\", \"2029610b-7396-4417-9d69-de0a7767df03\": \"2029610b-7396-4417-9d69-de0a7767df03\", \"db705b7d-2d53-4713-9953-47ce66126681\": \"db705b7d-2d53-4713-9953-47ce66126681\", \"b6f42f5b-b2ef-45f0-9a79-cd299095804d\": \"b6f42f5b-b2ef-45f0-9a79-cd299095804d\", \"8d683469-6bcd-4a7f-a955-88b5f94623f1\": \"8d683469-6bcd-4a7f-a955-88b5f94623f1\", \"5fcddab8-253a-4ef4-bb2d-db9e5ebb3dec\": \"5fcddab8-253a-4ef4-bb2d-db9e5ebb3dec\", \"2a6206ff-2e94-4793-9718-5c27e7c28998\": \"2a6206ff-2e94-4793-9718-5c27e7c28998\", \"68977ad7-dac1-4f52-8ed0-2674af8e7906\": \"68977ad7-dac1-4f52-8ed0-2674af8e7906\", \"aad94c9e-3914-4825-a7a6-97976e342c62\": \"aad94c9e-3914-4825-a7a6-97976e342c62\", \"d40bd1e8-fe87-433a-b9bb-4877022e6e7c\": \"d40bd1e8-fe87-433a-b9bb-4877022e6e7c\", \"a5bef94d-3129-42d2-9b86-45ff02a6a105\": \"a5bef94d-3129-42d2-9b86-45ff02a6a105\", \"97b22a16-25a5-4ab0-be42-ce1f79be34f3\": \"97b22a16-25a5-4ab0-be42-ce1f79be34f3\", \"958e9b3e-8fe7-48af-adef-d5dbcc8c2f5c\": \"958e9b3e-8fe7-48af-adef-d5dbcc8c2f5c\", \"deaf3d52-a4b7-4361-88da-f0d49abdc9ca\": \"deaf3d52-a4b7-4361-88da-f0d49abdc9ca\", \"97f2437e-bb30-4e87-9a8c-853ec840e795\": \"97f2437e-bb30-4e87-9a8c-853ec840e795\", \"5d686d00-0a1b-4cf0-babe-91a1b319aafd\": \"5d686d00-0a1b-4cf0-babe-91a1b319aafd\", \"f79e3f21-31cb-4321-b31f-fd2ae720c707\": \"f79e3f21-31cb-4321-b31f-fd2ae720c707\", \"e2104350-6cc0-4dff-836d-f2e557909c0e\": \"e2104350-6cc0-4dff-836d-f2e557909c0e\", \"8c00fc5a-1392-424d-970d-bdb3d08d82c6\": \"8c00fc5a-1392-424d-970d-bdb3d08d82c6\", \"7d44c574-d5c1-4050-978d-aa6ff265adf9\": \"7d44c574-d5c1-4050-978d-aa6ff265adf9\", \"3a4a5da2-20ce-4cb7-8863-02bfda49ff02\": \"3a4a5da2-20ce-4cb7-8863-02bfda49ff02\", \"ff22d68d-869f-4351-8d1e-4783bc980aaa\": \"ff22d68d-869f-4351-8d1e-4783bc980aaa\", \"797421cb-c1a2-46ac-b120-54a953e6a36c\": \"797421cb-c1a2-46ac-b120-54a953e6a36c\", \"34737e0f-e1c4-45d5-95a6-ebfa56786657\": \"34737e0f-e1c4-45d5-95a6-ebfa56786657\", \"55a103de-fddb-46bf-8d7d-f27b328bf619\": \"55a103de-fddb-46bf-8d7d-f27b328bf619\", \"eae8014a-3219-4d81-9914-eaf471ba9277\": \"eae8014a-3219-4d81-9914-eaf471ba9277\", \"d00f1043-9806-439b-8f01-e4dce603980b\": \"d00f1043-9806-439b-8f01-e4dce603980b\", \"1a9cea1d-3dfa-4e2a-ac5b-aa8743eaa630\": \"1a9cea1d-3dfa-4e2a-ac5b-aa8743eaa630\", \"542437e0-3b04-4b87-b0b2-ff05548b6847\": \"542437e0-3b04-4b87-b0b2-ff05548b6847\", \"1194b920-d8f9-4f67-b333-0f27fc5dea34\": \"1194b920-d8f9-4f67-b333-0f27fc5dea34\", \"40683c27-7a6f-4ef7-92b1-dabcac6aee8a\": \"40683c27-7a6f-4ef7-92b1-dabcac6aee8a\", \"58353613-c8f8-42cc-8a0a-de5ae5c76507\": \"58353613-c8f8-42cc-8a0a-de5ae5c76507\", \"08a1b5af-2d77-4f3f-a7c3-3979af55927b\": \"08a1b5af-2d77-4f3f-a7c3-3979af55927b\", \"30dac815-611a-4576-a799-51d5f40dacb1\": \"30dac815-611a-4576-a799-51d5f40dacb1\", \"927dfe1d-a24a-4c10-a8bd-4bb3d1417d20\": \"927dfe1d-a24a-4c10-a8bd-4bb3d1417d20\", \"31ee15ae-a44a-45e1-ac97-09117be6841e\": \"31ee15ae-a44a-45e1-ac97-09117be6841e\", \"3ae92dde-13ff-48fb-a70d-36aee6ba18bc\": \"3ae92dde-13ff-48fb-a70d-36aee6ba18bc\", \"b0c251c6-65a3-47c9-af8d-22763bc37e94\": \"b0c251c6-65a3-47c9-af8d-22763bc37e94\", \"9139fb8e-7163-4696-ac37-3bfe42580d39\": \"9139fb8e-7163-4696-ac37-3bfe42580d39\", \"df4cdc83-9997-4480-aed8-93f1c13b07aa\": \"df4cdc83-9997-4480-aed8-93f1c13b07aa\", \"46dfe8c4-945f-4f7a-8f4e-6e9da2a8da7b\": \"46dfe8c4-945f-4f7a-8f4e-6e9da2a8da7b\", \"b9b25f33-694f-4e8d-8f19-a26b9404e296\": \"b9b25f33-694f-4e8d-8f19-a26b9404e296\", \"62987fc3-87f7-48b3-8a94-a661ec36c887\": \"62987fc3-87f7-48b3-8a94-a661ec36c887\", \"346bf031-de41-49e4-b39b-cb015b9fd368\": \"346bf031-de41-49e4-b39b-cb015b9fd368\", \"c04f30ac-6717-4853-8aa1-5a40ac745692\": \"c04f30ac-6717-4853-8aa1-5a40ac745692\", \"39400e49-d3c7-49bd-bfdf-52f4ffd47264\": \"39400e49-d3c7-49bd-bfdf-52f4ffd47264\", \"a4790a9b-4ec1-4fae-a14c-24ccbc4ff9c2\": \"a4790a9b-4ec1-4fae-a14c-24ccbc4ff9c2\", \"87652293-907b-4873-bed4-364afe8b5594\": \"87652293-907b-4873-bed4-364afe8b5594\", \"d103260c-6663-4b58-ab81-3ace7020bc6a\": \"d103260c-6663-4b58-ab81-3ace7020bc6a\", \"7ab73376-a705-458a-9217-8683c5ff373e\": \"7ab73376-a705-458a-9217-8683c5ff373e\", \"0a859091-8ed4-4176-8247-67878293ea93\": \"0a859091-8ed4-4176-8247-67878293ea93\", \"1cc8d3c5-2a50-4f8f-84dd-111c20ce45dc\": \"1cc8d3c5-2a50-4f8f-84dd-111c20ce45dc\", \"b6c7b9d2-ec34-47ca-8f59-fbe178159ee4\": \"b6c7b9d2-ec34-47ca-8f59-fbe178159ee4\", \"af52b7ad-47ac-440b-82d9-608917d7eb02\": \"af52b7ad-47ac-440b-82d9-608917d7eb02\", \"09d07a1e-b506-466e-b2b3-93a95897f884\": \"09d07a1e-b506-466e-b2b3-93a95897f884\", \"11673e89-01a3-43d8-af56-f9eb97eb8587\": \"11673e89-01a3-43d8-af56-f9eb97eb8587\", \"fcc2d220-8ed7-4a41-a4fb-12f470f33d9b\": \"fcc2d220-8ed7-4a41-a4fb-12f470f33d9b\", \"8457a273-c7b4-499f-9df8-3f04130a6f3a\": \"8457a273-c7b4-499f-9df8-3f04130a6f3a\", \"65574567-3461-4bd7-b9c9-2ca0731a4b66\": \"65574567-3461-4bd7-b9c9-2ca0731a4b66\", \"fb207f95-bf43-4182-94c2-9c50ac365802\": \"fb207f95-bf43-4182-94c2-9c50ac365802\", \"e4068d71-72f6-4a6a-acc3-e19fa646d8f4\": \"e4068d71-72f6-4a6a-acc3-e19fa646d8f4\", \"13466d12-0d64-427e-96f8-05193eb8ff8c\": \"13466d12-0d64-427e-96f8-05193eb8ff8c\", \"d8815c94-7dee-4dca-a8d6-fd0eacf3ae88\": \"d8815c94-7dee-4dca-a8d6-fd0eacf3ae88\", \"c37b3d0f-e1ca-4078-b4a1-11084bbb59b2\": \"c37b3d0f-e1ca-4078-b4a1-11084bbb59b2\", \"84d6e8d1-a7b9-4f68-b1db-9f91ee88bbc7\": \"84d6e8d1-a7b9-4f68-b1db-9f91ee88bbc7\", \"d04ce259-3fb9-4b02-bd6d-f7639b7d2eed\": \"d04ce259-3fb9-4b02-bd6d-f7639b7d2eed\", \"fd822ea6-c122-4b52-b734-959d2d1c59b5\": \"fd822ea6-c122-4b52-b734-959d2d1c59b5\", \"b7d38d32-2560-4c23-b01f-43634b74fd70\": \"b7d38d32-2560-4c23-b01f-43634b74fd70\", \"03073157-7adf-483b-b788-818dc439b265\": \"03073157-7adf-483b-b788-818dc439b265\", \"9cdfcb5d-beb3-4f5a-bf68-26944e5d1a8d\": \"9cdfcb5d-beb3-4f5a-bf68-26944e5d1a8d\", \"76377af1-cc61-437c-845f-9a4fb1bd7a4a\": \"76377af1-cc61-437c-845f-9a4fb1bd7a4a\", \"c7e0aad2-b8a6-4258-9665-179376c7c8c4\": \"c7e0aad2-b8a6-4258-9665-179376c7c8c4\", \"0b506b1d-8d3f-47ed-ac9d-e65b23f91eb8\": \"0b506b1d-8d3f-47ed-ac9d-e65b23f91eb8\", \"8716c7ab-2f34-4bf1-85bc-1c661b8957cf\": \"8716c7ab-2f34-4bf1-85bc-1c661b8957cf\", \"2f39f707-fa96-423e-95e4-663b50c419f9\": \"2f39f707-fa96-423e-95e4-663b50c419f9\", \"a3c21ab6-7c51-4c6b-a5b0-0bbe1755a080\": \"a3c21ab6-7c51-4c6b-a5b0-0bbe1755a080\", \"b8bbdad9-e09f-4530-ba94-411c5faf09f8\": \"b8bbdad9-e09f-4530-ba94-411c5faf09f8\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
vectorize.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+
4
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage
5
+ from llama_index.core.node_parser import SentenceSplitter
6
+
7
+ logger = logging.getLogger(__name__)
8
+ DOCUMENT_PATH = './documents'
9
+
10
+
11
+ # remember to delete stored vectors when new documents are added to the data so the storage is recreated
12
+ def load_write_index_nodes(path):
13
+ documents = []
14
+
15
+ if not os.path.exists(path):
16
+ documents = SimpleDirectoryReader(DOCUMENT_PATH, filename_as_id=True).load_data()
17
+ logger.info(f'Indexing documents in {DOCUMENT_PATH}...')
18
+ index = VectorStoreIndex.from_documents(documents)
19
+ index.storage_context.persist(persist_dir=path)
20
+ logger.info(f'{len(documents)} documents indexed.')
21
+ else:
22
+ logger.info(f'Loading index from {path}...')
23
+ storage_context = StorageContext.from_defaults(persist_dir=path)
24
+ index = load_index_from_storage(storage_context)
25
+
26
+ parser = SentenceSplitter()
27
+ nodes = parser.get_nodes_from_documents(documents)
28
+ return index, nodes