lagerbaer commited on
Commit
99e964c
·
1 Parent(s): d409029

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. modal_gradio_test.py +29 -9
  2. modal_script.py +131 -133
  3. simple_script.py +4 -95
  4. update_vector_db.py +97 -0
modal_gradio_test.py CHANGED
@@ -3,8 +3,12 @@ from modal import Stub, Image, asgi_app
3
  from fastapi import FastAPI
4
 
5
 
6
- image = Image.debian_slim("3.11").pip_install(
7
- "gradio",
 
 
 
 
8
  )
9
 
10
  stub = Stub("secsplorer", image=image)
@@ -12,15 +16,31 @@ stub = Stub("secsplorer", image=image)
12
  web_app = FastAPI()
13
 
14
 
15
- @stub.function()
16
  @asgi_app()
17
  def fastapi_app():
18
  import gradio as gr
19
  from gradio.routes import mount_gradio_app
20
 
21
- def chat_function(message, history):
22
- yield "Foo!"
23
-
24
- interface = gr.ChatInterface(chat_function)
25
-
26
- return mount_gradio_app(app=web_app, blocks=interface, path="/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from fastapi import FastAPI
4
 
5
 
6
+ image = (
7
+ Image.debian_slim()
8
+ .run_commands(["pip install --upgrade pip"])
9
+ .pip_install(
10
+ "gradio==3.50.2",
11
+ )
12
  )
13
 
14
  stub = Stub("secsplorer", image=image)
 
16
  web_app = FastAPI()
17
 
18
 
19
+ @stub.function(concurrency_limit=1)
20
  @asgi_app()
21
  def fastapi_app():
22
  import gradio as gr
23
  from gradio.routes import mount_gradio_app
24
 
25
+ import gradio as gr
26
+ import random
27
+ import time
28
+
29
+ with gr.Blocks() as demo:
30
+ chatbot = gr.Chatbot()
31
+ msg = gr.Textbox()
32
+ clear = gr.ClearButton([msg, chatbot])
33
+
34
+ def respond(message, chat_history):
35
+ print("Calling respond...")
36
+ bot_message = random.choice(
37
+ ["How are you?", "I love you", "I'm very hungry"]
38
+ )
39
+ chat_history.append((message, bot_message))
40
+ time.sleep(2)
41
+ print("Returning result...")
42
+ return "", chat_history
43
+
44
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
45
+
46
+ return mount_gradio_app(app=web_app, blocks=demo, path="/")
modal_script.py CHANGED
@@ -6,7 +6,7 @@ from typing import List, Dict
6
 
7
  image = Image.debian_slim("3.11").pip_install(
8
  "cohere",
9
- "gradio",
10
  "pinecone-client",
11
  )
12
 
@@ -28,144 +28,142 @@ def fastapi_app():
28
  import gradio as gr
29
  from gradio.routes import mount_gradio_app
30
 
31
- # print("Connecting to cohere client")
32
- # co = cohere.Client(os.environ["COHERE_API_KEY"])
33
- # print("Done")
34
- # # pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment="gcp-starter")
35
- # # index = pinecone.Index(index_name="td-sec-embeddings")
36
- # index = None
37
-
38
- # def retrieve(
39
- # index: pinecone.Index, query: str, co: cohere.Client
40
- # ) -> List[Dict[str, str]]:
41
- # """
42
- # Retrieves documents based on the given query.
43
-
44
- # Parameters:
45
- # query (str): The query to retrieve documents for.
46
-
47
- # Returns:
48
- # List[Dict[str, str]]: A list of dictionaries representing the retrieved documents, with 'title', 'snippet', and 'url' keys.
49
- # """
50
- # docs_retrieved = []
51
-
52
- # print(f"Calling retrieve for '{query}'")
53
- # print("Embedding the query")
54
- # query_emb = co.embed(
55
- # texts=[query], model="embed-english-v3.0", input_type="search_query"
56
- # ).embeddings
57
-
58
- # print("Querying pinecone")
59
- # res = index.query(query_emb, top_k=10, include_metadata=True)
60
- # print("Preparing to rerank")
61
- # docs_to_rerank = [match["metadata"] for match in res["matches"]]
62
-
63
- # rerank_results = co.rerank(
64
- # query=query,
65
- # documents=docs_to_rerank,
66
- # top_n=3,
67
- # model="rerank-english-v2.0",
68
- # )
69
-
70
- # docs_retrieved = []
71
- # for hit in rerank_results:
72
- # docs_retrieved.append(docs_to_rerank[hit.index])
73
-
74
- # print("Returning retrieved docs")
75
- # return docs_retrieved
76
-
77
- # class Chatbot:
78
- # def __init__(self, co: cohere.Client, index: pinecone.Index):
79
- # self.index = index
80
- # self.conversation_id = str(uuid.uuid4())
81
- # self.co = co
82
-
83
- # def generate_response(self, message: str):
84
- # """
85
- # Generates a response to the user's message.
86
-
87
- # Parameters:
88
- # message (str): The user's message.
89
-
90
- # Yields:
91
- # Event: A response event generated by the chatbot.
92
-
93
- # Returns:
94
- # List[Dict[str, str]]: A list of dictionaries representing the retrieved documents.
95
-
96
- # """
97
-
98
- # # Generate search queries (if any)
99
- # response = self.co.chat(message=message, search_queries_only=True)
100
-
101
- # # If there are search queries, retrieve documents and respond
102
- # if response.search_queries:
103
- # print("Retrieving information")
104
-
105
- # documents = self.retrieve_docs(response)
106
-
107
- # response = self.co.chat(
108
- # message=message,
109
- # documents=documents,
110
- # conversation_id=self.conversation_id,
111
- # stream=True,
112
- # )
113
- # for event in response:
114
- # yield event
115
-
116
- # # If there is no search query, directly respond
117
- # else:
118
- # response = self.co.chat(
119
- # message=message, conversation_id=self.conversation_id, stream=True
120
- # )
121
- # for event in response:
122
- # yield event
123
-
124
- # def retrieve_docs(self, response) -> List[Dict[str, str]]:
125
- # """
126
- # Retrieves documents based on the search queries in the response.
127
-
128
- # Parameters:
129
- # response: The response object containing search queries.
130
-
131
- # Returns:
132
- # List[Dict[str, str]]: A list of dictionaries representing the retrieved documents.
133
-
134
- # """
135
- # # Get the query(s)
136
 
137
- # queries = []
138
- # for search_query in response.search_queries:
139
- # queries.append(search_query["text"])
 
 
140
 
141
- # # Retrieve documents for each query
142
- # retrieved_docs = []
143
- # for query in queries:
144
- # retrieved_docs.extend(retrieve(self.index, query, self.co))
145
 
146
- # return retrieved_docs
 
 
 
 
 
147
 
148
- # chatbot = Chatbot(co, index)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  def chat_function(message, history):
151
- return "Foo!"
152
- # flag = False
153
- # reply = ""
154
- # for event in chatbot.generate_response(message):
155
- # if event.event_type == "text-generation":
156
- # reply += str(event.text)
157
- # yield reply
158
-
159
- # # Citations
160
- # if event.event_type == "citation-generation":
161
- # if not flag:
162
- # reply += "\n\nCITATIONS:\n\n"
163
- # yield reply
164
- # flag = True
165
- # reply += str(event.citations) + "\n"
166
- # yield reply
167
-
168
- interface = gr.ChatInterface(chat_function)
169
 
170
  print("All ready!")
171
  return mount_gradio_app(app=web_app, blocks=interface, path="/")
 
6
 
7
  image = Image.debian_slim("3.11").pip_install(
8
  "cohere",
9
+ "gradio==3.50.2",
10
  "pinecone-client",
11
  )
12
 
 
28
  import gradio as gr
29
  from gradio.routes import mount_gradio_app
30
 
31
+ print("Connecting to cohere client")
32
+ co = cohere.Client(os.environ["COHERE_API_KEY"])
33
+ print("Done")
34
+ pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment="us-west1-gcp")
35
+ index = pinecone.Index(index_name="td-sec-embeddings")
36
+
37
+ def retrieve(
38
+ index: pinecone.Index, query: str, co: cohere.Client
39
+ ) -> List[Dict[str, str]]:
40
+ """
41
+ Retrieves documents based on the given query.
42
+
43
+ Parameters:
44
+ query (str): The query to retrieve documents for.
45
+
46
+ Returns:
47
+ List[Dict[str, str]]: A list of dictionaries representing the retrieved documents, with 'title', 'snippet', and 'url' keys.
48
+ """
49
+ docs_retrieved = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ print(f"Calling retrieve for '{query}'")
52
+ print("Embedding the query")
53
+ query_emb = co.embed(
54
+ texts=[query], model="embed-english-v3.0", input_type="search_query"
55
+ ).embeddings
56
 
57
+ print("Querying pinecone")
58
+ res = index.query(query_emb, top_k=10, include_metadata=True)
59
+ print("Preparing to rerank")
60
+ docs_to_rerank = [match["metadata"] for match in res["matches"]]
61
 
62
+ rerank_results = co.rerank(
63
+ query=query,
64
+ documents=docs_to_rerank,
65
+ top_n=3,
66
+ model="rerank-english-v2.0",
67
+ )
68
 
69
+ docs_retrieved = []
70
+ for hit in rerank_results:
71
+ docs_retrieved.append(docs_to_rerank[hit.index])
72
+
73
+ print("Returning retrieved docs")
74
+ return docs_retrieved
75
+
76
+ class Chatbot:
77
+ def __init__(self, co: cohere.Client, index: pinecone.Index):
78
+ self.index = index
79
+ self.conversation_id = str(uuid.uuid4())
80
+ self.co = co
81
+
82
+ def generate_response(self, message: str):
83
+ """
84
+ Generates a response to the user's message.
85
+
86
+ Parameters:
87
+ message (str): The user's message.
88
+
89
+ Yields:
90
+ Event: A response event generated by the chatbot.
91
+
92
+ Returns:
93
+ List[Dict[str, str]]: A list of dictionaries representing the retrieved documents.
94
+
95
+ """
96
+
97
+ # Generate search queries (if any)
98
+ response = self.co.chat(message=message, search_queries_only=True)
99
+
100
+ # If there are search queries, retrieve documents and respond
101
+ if response.search_queries:
102
+ print("Retrieving information")
103
+
104
+ documents = self.retrieve_docs(response)
105
+
106
+ response = self.co.chat(
107
+ message=message,
108
+ documents=documents,
109
+ conversation_id=self.conversation_id,
110
+ stream=True,
111
+ )
112
+ for event in response:
113
+ yield event
114
+
115
+ # If there is no search query, directly respond
116
+ else:
117
+ response = self.co.chat(
118
+ message=message, conversation_id=self.conversation_id, stream=True
119
+ )
120
+ for event in response:
121
+ yield event
122
+
123
+ def retrieve_docs(self, response) -> List[Dict[str, str]]:
124
+ """
125
+ Retrieves documents based on the search queries in the response.
126
+
127
+ Parameters:
128
+ response: The response object containing search queries.
129
+
130
+ Returns:
131
+ List[Dict[str, str]]: A list of dictionaries representing the retrieved documents.
132
+
133
+ """
134
+ # Get the query(s)
135
+
136
+ queries = []
137
+ for search_query in response.search_queries:
138
+ queries.append(search_query["text"])
139
+
140
+ # Retrieve documents for each query
141
+ retrieved_docs = []
142
+ for query in queries:
143
+ retrieved_docs.extend(retrieve(self.index, query, self.co))
144
+
145
+ return retrieved_docs
146
+
147
+ chatbot = Chatbot(co, index)
148
 
149
  def chat_function(message, history):
150
+ flag = False
151
+ reply = ""
152
+ for event in chatbot.generate_response(message):
153
+ if event.event_type == "text-generation":
154
+ reply += str(event.text)
155
+ yield reply
156
+
157
+ # Citations
158
+ if event.event_type == "citation-generation":
159
+ if not flag:
160
+ reply += "\n\nCITATIONS:\n\n"
161
+ yield reply
162
+ flag = True
163
+ reply += str(event.citations) + "\n"
164
+ yield reply
165
+
166
+ interface = gr.ChatInterface(chat_function).queue()
 
167
 
168
  print("All ready!")
169
  return mount_gradio_app(app=web_app, blocks=interface, path="/")
simple_script.py CHANGED
@@ -4,9 +4,6 @@ import pinecone
4
  import uuid
5
 
6
  from typing import List, Dict
7
-
8
- # from unstructured.chunking.title import chunk_by_title
9
- # from unstructured.partition.pdf import partition_pdf
10
  from dotenv import load_dotenv
11
 
12
 
@@ -14,87 +11,10 @@ load_dotenv()
14
 
15
  co = cohere.Client(os.environ["COHERE_API_KEY"])
16
 
17
- pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment="gcp-starter")
18
 
19
  index = pinecone.Index("td-sec-embeddings")
20
 
21
- from typing import List, Dict
22
-
23
- # from unstructured.partition.pdf import partition_pdf
24
- # from unstructured.chunking.title import chunk_by_title
25
-
26
- import cohere
27
-
28
- sources = [
29
- {
30
- "title": "2023",
31
- "url": "https://www.td.com/content/dam/tdcom/canada/about-td/pdf/quarterly-results/2023/2023-annual-report-e.pdf",
32
- "filename": "/Users/clemensadolphs/git-personal/secsplorer/2023-annual-report-e.pdf",
33
- },
34
- # {
35
- # "title": "2022",
36
- # "url": "https://www.td.com/document/PDF/ar2022/ar2022-Complete-Report.pdf",
37
- # "filename": "/Users/clemensadolphs/git-personal/secsplorer/2023-annual-report-e.pdf",
38
- # },
39
- ]
40
-
41
-
42
- def load() -> List[Dict[str, str]]:
43
- """
44
- Loads the documents from the sources and chunks the HTML content.
45
- """
46
- print("Loading documents...")
47
- docs = []
48
- for source in sources:
49
- elements = partition_pdf(filename=source["filename"])
50
- chunks = chunk_by_title(elements)
51
- for chunk in chunks:
52
- docs.append(
53
- {
54
- "title": source["title"],
55
- "text": str(chunk),
56
- "url": source["url"],
57
- }
58
- )
59
- return docs
60
-
61
-
62
- def embed(docs: List[Dict[str, str]]) -> List[List[float]]:
63
- """
64
- Embeds the documents using the Cohere API.
65
- """
66
- print("Embedding documents...")
67
-
68
- batch_size = 90
69
- docs_len = len(docs)
70
- docs_embs = []
71
-
72
- for i in range(0, docs_len, batch_size):
73
- batch = docs[i : min(i + batch_size, docs_len)]
74
- texts = [item["text"] for item in batch]
75
- docs_embs_batch = co.embed(
76
- texts=texts, model="embed-english-v3.0", input_type="search_document"
77
- ).embeddings
78
- docs_embs.extend(docs_embs_batch)
79
- return docs_embs
80
-
81
-
82
- def update_index(
83
- index: pinecone.Index, docs: List[Dict[str, str]], docs_embs: List[List[float]]
84
- ) -> None:
85
- """
86
- Indexes the documents for efficient retrieval.
87
- """
88
- batch_size = 100
89
-
90
- ids = [str(i) for i in range(len(docs))]
91
-
92
- to_upsert = list(zip(ids, docs_embs, docs))
93
-
94
- for i in range(0, len(docs), batch_size):
95
- i_end = min(i + batch_size, len(docs))
96
- index.upsert(vectors=to_upsert[i:i_end])
97
-
98
 
99
  def retrieve(index: pinecone.Index, query: str) -> List[Dict[str, str]]:
100
  """
@@ -108,21 +28,18 @@ def retrieve(index: pinecone.Index, query: str) -> List[Dict[str, str]]:
108
  """
109
  docs_retrieved = []
110
 
111
- print(f"Calling retrieve for '{query}'")
112
- print("Embedding the query")
113
  query_emb = co.embed(
114
  texts=[query], model="embed-english-v3.0", input_type="search_query"
115
  ).embeddings
116
 
117
- print("Querying pinecone")
118
  res = index.query(query_emb, top_k=100, include_metadata=True)
119
- print("Preparing to rerank")
120
  docs_to_rerank = [match["metadata"] for match in res["matches"]]
121
 
122
  rerank_results = co.rerank(
123
  query=query,
124
  documents=docs_to_rerank,
125
- top_n=10,
126
  model="rerank-english-v2.0",
127
  )
128
 
@@ -130,15 +47,9 @@ def retrieve(index: pinecone.Index, query: str) -> List[Dict[str, str]]:
130
  for hit in rerank_results:
131
  docs_retrieved.append(docs_to_rerank[hit.index])
132
 
133
- print("Returning retrieved docs")
134
  return docs_retrieved
135
 
136
 
137
- # docs = load()
138
- # docs_embeds = embed(docs)
139
- # update_index(index, docs=docs, docs_embs=docs_embeds)
140
-
141
-
142
  class Chatbot:
143
  def __init__(self, co: cohere.Client, index: pinecone.Index):
144
  self.index = index
@@ -168,7 +79,7 @@ class Chatbot:
168
  print("Retrieving information...")
169
 
170
  documents = self.retrieve_docs(response)
171
- print(f"Generating response with documents {documents}")
172
  response = self.co.chat(
173
  message=message,
174
  documents=documents,
@@ -198,11 +109,9 @@ class Chatbot:
198
 
199
  """
200
  # Get the query(s)
201
- print("Calling retrieve_docs")
202
  queries = []
203
  for search_query in response.search_queries:
204
  queries.append(search_query["text"])
205
- print(queries)
206
 
207
  # Retrieve documents for each query
208
  retrieved_docs = []
 
4
  import uuid
5
 
6
  from typing import List, Dict
 
 
 
7
  from dotenv import load_dotenv
8
 
9
 
 
11
 
12
  co = cohere.Client(os.environ["COHERE_API_KEY"])
13
 
14
+ pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment="us-west1-gcp")
15
 
16
  index = pinecone.Index("td-sec-embeddings")
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def retrieve(index: pinecone.Index, query: str) -> List[Dict[str, str]]:
20
  """
 
28
  """
29
  docs_retrieved = []
30
 
 
 
31
  query_emb = co.embed(
32
  texts=[query], model="embed-english-v3.0", input_type="search_query"
33
  ).embeddings
34
 
 
35
  res = index.query(query_emb, top_k=100, include_metadata=True)
36
+
37
  docs_to_rerank = [match["metadata"] for match in res["matches"]]
38
 
39
  rerank_results = co.rerank(
40
  query=query,
41
  documents=docs_to_rerank,
42
+ top_n=3,
43
  model="rerank-english-v2.0",
44
  )
45
 
 
47
  for hit in rerank_results:
48
  docs_retrieved.append(docs_to_rerank[hit.index])
49
 
 
50
  return docs_retrieved
51
 
52
 
 
 
 
 
 
53
  class Chatbot:
54
  def __init__(self, co: cohere.Client, index: pinecone.Index):
55
  self.index = index
 
79
  print("Retrieving information...")
80
 
81
  documents = self.retrieve_docs(response)
82
+
83
  response = self.co.chat(
84
  message=message,
85
  documents=documents,
 
109
 
110
  """
111
  # Get the query(s)
 
112
  queries = []
113
  for search_query in response.search_queries:
114
  queries.append(search_query["text"])
 
115
 
116
  # Retrieve documents for each query
117
  retrieved_docs = []
update_vector_db.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cohere
2
+ import os
3
+ import pinecone
4
+
5
+ from typing import List, Dict
6
+
7
+ from unstructured.chunking.title import chunk_by_title
8
+ from unstructured.partition.pdf import partition_pdf
9
+
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+ co = cohere.Client(os.environ["COHERE_API_KEY"])
15
+
16
+ pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment="us-west1-gcp")
17
+
18
+ index = pinecone.Index("td-sec-embeddings")
19
+
20
+ from typing import List, Dict
21
+
22
+ sources = [
23
+ {
24
+ "title": "2023",
25
+ "url": "https://www.td.com/content/dam/tdcom/canada/about-td/pdf/quarterly-results/2023/2023-annual-report-e.pdf",
26
+ "filename": "/Users/clemensadolphs/git-personal/secsplorer/2023-annual-report-e.pdf",
27
+ },
28
+ {
29
+ "title": "2022",
30
+ "url": "https://www.td.com/document/PDF/ar2022/ar2022-Complete-Report.pdf",
31
+ "filename": "/Users/clemensadolphs/git-personal/secsplorer/2023-annual-report-e.pdf",
32
+ },
33
+ ]
34
+
35
+
36
+ def load() -> List[Dict[str, str]]:
37
+ """
38
+ Loads the documents from the sources and chunks the HTML content.
39
+ """
40
+ print("Loading documents...")
41
+ docs = []
42
+ for source in sources:
43
+ elements = partition_pdf(filename=source["filename"])
44
+ chunks = chunk_by_title(elements)
45
+ for chunk in chunks:
46
+ docs.append(
47
+ {
48
+ "title": source["title"],
49
+ "text": str(chunk),
50
+ "url": source["url"],
51
+ }
52
+ )
53
+ return docs
54
+
55
+
56
+ def embed(docs: List[Dict[str, str]]) -> List[List[float]]:
57
+ """
58
+ Embeds the documents using the Cohere API.
59
+ """
60
+ print("Embedding documents...")
61
+
62
+ batch_size = 90
63
+ docs_len = len(docs)
64
+ docs_embs = []
65
+
66
+ for i in range(0, docs_len, batch_size):
67
+ batch = docs[i : min(i + batch_size, docs_len)]
68
+ texts = [item["text"] for item in batch]
69
+ docs_embs_batch = co.embed(
70
+ texts=texts, model="embed-english-v3.0", input_type="search_document"
71
+ ).embeddings
72
+ docs_embs.extend(docs_embs_batch)
73
+ return docs_embs
74
+
75
+
76
+ def update_index(
77
+ index: pinecone.Index, docs: List[Dict[str, str]], docs_embs: List[List[float]]
78
+ ) -> None:
79
+ """
80
+ Indexes the documents for efficient retrieval.
81
+ """
82
+ print("Indexing documents in Pinecone")
83
+ batch_size = 100
84
+
85
+ ids = [str(i) for i in range(len(docs))]
86
+
87
+ to_upsert = list(zip(ids, docs_embs, docs))
88
+
89
+ for i in range(0, len(docs), batch_size):
90
+ i_end = min(i + batch_size, len(docs))
91
+ index.upsert(vectors=to_upsert[i:i_end])
92
+
93
+
94
+ if __name__ == "__main__":
95
+ docs = load()
96
+ docs_embeds = embed(docs)
97
+ update_index(index, docs=docs, docs_embs=docs_embeds)