YoniFriedman commited on
Commit
02d8135
·
verified ·
1 Parent(s): eca8b37

Update to metadata approach

Browse files
Files changed (1) hide show
  1. app.py +39 -394
app.py CHANGED
@@ -16,409 +16,54 @@ os.environ["OPENAI_API_KEY"]
16
  llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
17
  service_context = ServiceContext.from_defaults(llm=llm)
18
 
19
- # Load query engines
20
- claw_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/claw_guidance"))
21
- claw_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/claw_guidance_summary"))
22
- claw_guidance_vector_query_engine = claw_guidance_index.as_query_engine(similarity_top_k=2)
23
- claw_guidance_summary_query_engine = claw_guidance_summary_index.as_query_engine()
24
-
25
- itpc_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/itpc_guidance"))
26
- itpc_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/itpc_guidance_summary"))
27
- itpc_guidance_vector_query_engine = itpc_guidance_index.as_query_engine(similarity_top_k=2)
28
- itpc_guidance_summary_query_engine = itpc_guidance_summary_index.as_query_engine()
29
-
30
- unaids_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/unaids_guidance"))
31
- unaids_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/unaids_guidance_summary"))
32
- unaids_guidance_vector_query_engine = unaids_guidance_index.as_query_engine(similarity_top_k=2)
33
- unaids_guidance_summary_query_engine = unaids_guidance_summary_index.as_query_engine()
34
-
35
- pepfar_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/pepfar_guidance"))
36
- pepfar_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/pepfar_guidance_summary"))
37
- pepfar_guidance_vector_query_engine = pepfar_guidance_index.as_query_engine(similarity_top_k=2)
38
- pepfar_guidance_summary_query_engine = pepfar_guidance_summary_index.as_query_engine()
39
-
40
- ennaso_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/ennaso_guidance"))
41
- ennaso_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/ennaso_guidance_summary"))
42
- ennaso_guidance_vector_query_engine = ennaso_guidance_index.as_query_engine(similarity_top_k=2)
43
- ennaso_guidance_summary_query_engine = ennaso_guidance_summary_index.as_query_engine()
44
-
45
- globalfund_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/globalfund_guidance"))
46
- globalfund_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/globalfund_guidance_summary"))
47
- globalfund_guidance_vector_query_engine = globalfund_guidance_index.as_query_engine(similarity_top_k=2)
48
- globalfund_guidance_summary_query_engine = globalfund_guidance_summary_index.as_query_engine()
49
-
50
- from llama_index.agent import OpenAIAgent
51
-
52
- agents = {}
53
-
54
- # define tools
55
- query_engine_tools = [
56
- QueryEngineTool(
57
- query_engine=claw_guidance_vector_query_engine,
58
- metadata=ToolMetadata(
59
- name="claw_guidance_vector_tool",
60
- description=(
61
- "Useful for questions related to specific questions about best practices and guidance"
62
- " about community led monitoring, also known as CLM."
63
- ),
64
- ),
65
- ),
66
- QueryEngineTool(
67
- query_engine=claw_guidance_summary_query_engine,
68
- metadata=ToolMetadata(
69
- name="claw_guidance_summary_tool",
70
- description=(
71
- "Useful for requests that require a wholistic summary related to guidance on CLM,"
72
- " or community led monitoring. For specific questions about CLM, please use the vector tool."
73
- ),
74
- ),
75
- ),
76
- ]
77
-
78
- # build agent
79
- function_llm = OpenAI(model="gpt-4", temperature = 0)
80
- agent = OpenAIAgent.from_tools(
81
- query_engine_tools,
82
- llm=function_llm,
83
- verbose=True,
84
- )
85
-
86
- agents["claw_guidance"] = agent
87
-
88
- # define tools
89
- query_engine_tools = [
90
- QueryEngineTool(
91
- query_engine=itpc_guidance_vector_query_engine,
92
- metadata=ToolMetadata(
93
- name="itpc_guidance_vector_tool",
94
- description=(
95
- "Useful for questions related to specific questions about best practices and guidance"
96
- " about community led monitoring, also known as CLM."
97
- ),
98
- ),
99
- ),
100
- QueryEngineTool(
101
- query_engine=claw_guidance_summary_query_engine,
102
- metadata=ToolMetadata(
103
- name="itpc_guidance_summary_tool",
104
- description=(
105
- "Useful for requests that require a wholistic summary related to guidance on CLM,"
106
- " or community led monitoring. For specific questions about CLM, please use the vector tool."
107
- ),
108
- ),
109
- ),
110
- ]
111
-
112
- # build agent
113
- function_llm = OpenAI(model="gpt-4", temperature = 0)
114
- agent = OpenAIAgent.from_tools(
115
- query_engine_tools,
116
- llm=function_llm,
117
- verbose=True,
118
- # system_prompt=f"""\
119
- # You are a specialized agent designed to answer queries about guidance on community led monitoring from ITPC, also
120
- # known as the International Treatment Preparedness Coalition.
121
- # You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
122
- # """,
123
- )
124
-
125
- agents["itpc_guidance"] = agent
126
-
127
- # define tools
128
- query_engine_tools = [
129
- QueryEngineTool(
130
- query_engine=unaids_guidance_vector_query_engine,
131
- metadata=ToolMetadata(
132
- name="unaids_guidance_vector_tool",
133
- description=(
134
- "Useful for questions related to specific questions about best practices and guidance"
135
- " about community led monitoring, also known as CLM."
136
- ),
137
- ),
138
- ),
139
- QueryEngineTool(
140
- query_engine=unaids_guidance_summary_query_engine,
141
- metadata=ToolMetadata(
142
- name="unaids_guidance_summary_tool",
143
- description=(
144
- "Useful for requests that require a wholistic summary related to guidance on CLM,"
145
- " or community led monitoring. For specific questions about CLM, please use the vector tool."
146
- ),
147
- ),
148
- ),
149
- ]
150
-
151
- # build agent
152
- function_llm = OpenAI(model="gpt-4", temperature = 0)
153
- agent = OpenAIAgent.from_tools(
154
- query_engine_tools,
155
- llm=function_llm,
156
- verbose=True,
157
- # system_prompt=f"""\
158
- # You are a specialized agent designed to answer queries about guidance on community led monitoring from UNAIDS.
159
- # You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
160
- # """,
161
- )
162
-
163
- agents["unaids_guidance"] = agent
164
-
165
- # define tools
166
- query_engine_tools = [
167
- QueryEngineTool(
168
- query_engine=pepfar_guidance_vector_query_engine,
169
- metadata=ToolMetadata(
170
- name="pepfar_guidance_vector_tool",
171
- description=(
172
- "Useful for questions related to specific questions about best practices and guidance"
173
- " about community led monitoring, also known as CLM."
174
- ),
175
- ),
176
- ),
177
- QueryEngineTool(
178
- query_engine=pepfar_guidance_summary_query_engine,
179
- metadata=ToolMetadata(
180
- name="pepfar_guidance_summary_tool",
181
- description=(
182
- "Useful for requests that require a wholistic summary related to guidance on CLM,"
183
- " or community led monitoring. For specific questions about CLM, please use the vector tool."
184
- ),
185
- ),
186
- ),
187
- ]
188
-
189
- # build agent
190
- function_llm = OpenAI(model="gpt-4", temperature = 0)
191
- agent = OpenAIAgent.from_tools(
192
- query_engine_tools,
193
- llm=function_llm,
194
- verbose=True,
195
- # system_prompt=f"""\
196
- # You are a specialized agent designed to answer queries about guidance on community led monitoring from PEPFAR.
197
- # You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
198
- # """,
199
- )
200
-
201
- agents["pepfar_guidance"] = agent
202
-
203
- # define tools
204
- query_engine_tools = [
205
- QueryEngineTool(
206
- query_engine=ennaso_guidance_vector_query_engine,
207
- metadata=ToolMetadata(
208
- name="ennaso_guidance_vector_tool",
209
- description=(
210
- "Useful for questions related to specific questions about best practices and guidance"
211
- " about community led monitoring, also known as CLM."
212
- ),
213
- ),
214
- ),
215
- QueryEngineTool(
216
- query_engine=ennaso_guidance_summary_query_engine,
217
- metadata=ToolMetadata(
218
- name="ennaso_guidance_summary_tool",
219
- description=(
220
- "Useful for requests that require a wholistic summary related to guidance on CLM,"
221
- " or community led monitoring. For specific questions about CLM, please use the vector tool."
222
- ),
223
- ),
224
- ),
225
- ]
226
-
227
- # build agent
228
- function_llm = OpenAI(model="gpt-4", temperature = 0)
229
- agent = OpenAIAgent.from_tools(
230
- query_engine_tools,
231
- llm=function_llm,
232
- verbose=True,
233
- # system_prompt=f"""\
234
- # You are a specialized agent designed to answer queries about guidance on community led monitoring from ENNASO,
235
- # the Eastern African National Networks of AIDS and Health Service Organizations.
236
- # You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
237
- # """,
238
- )
239
-
240
- agents["ennaso_guidance"] = agent
241
-
242
- # define tools
243
- query_engine_tools = [
244
- QueryEngineTool(
245
- query_engine=globalfund_guidance_vector_query_engine,
246
- metadata=ToolMetadata(
247
- name="globalfund_guidance_vector_tool",
248
- description=(
249
- "Useful for questions related to specific questions about best practices and guidance"
250
- " about community led monitoring, also known as CLM."
251
- ),
252
- ),
253
- ),
254
- QueryEngineTool(
255
- query_engine=globalfund_guidance_summary_query_engine,
256
- metadata=ToolMetadata(
257
- name="globalfund_guidance_summary_tool",
258
- description=(
259
- "Useful for requests that require a wholistic summary related to guidance on CLM,"
260
- " or community led monitoring. For specific questions about CLM, please use the vector tool."
261
- ),
262
- ),
263
- ),
264
- ]
265
-
266
- # build agent
267
- function_llm = OpenAI(model="gpt-4", temperature = 0)
268
- agent = OpenAIAgent.from_tools(
269
- query_engine_tools,
270
- llm=function_llm,
271
- verbose=True,
272
- # system_prompt=f"""\
273
- # You are a specialized agent designed to answer queries about guidance on community led monitoring from the Global
274
- # Fund to Fight AIDS, Tuberculosis, and Malaria.
275
- # You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
276
- # """,
277
- )
278
-
279
- agents["globalfund_guidance"] = agent
280
-
281
- all_tools = []
282
-
283
- claw_summary = (
284
- "This content contains guidance on community led monitoring from CLAW, also known as"
285
- " Community Led Monitoring Working Group. Use this tool if you want to answer any questions about CLM"
286
- " particularly when asked about CLAW guidance. If using this tool, mention that the response comes from"
287
- " CLAW guidance."
288
- )
289
-
290
- doc_tool = QueryEngineTool(
291
- query_engine=agents["claw_guidance"],
292
- metadata=ToolMetadata(
293
- name="claw_tool",
294
- description=claw_summary,
295
- ),
296
- )
297
- all_tools.append(doc_tool)
298
-
299
- itpc_summary = (
300
- "This content contains guidance on community led monitoring from ITPC, also known as"
301
- " International Treatment Preparedness Coalition. Use this tool if you want to answer any questions about CLM"
302
- " particularly when asked about ITPC guidance. If using this tool, mention that the response comes from"
303
- " ITPC guidance."
304
- )
305
-
306
- doc_tool = QueryEngineTool(
307
- query_engine=agents["itpc_guidance"],
308
- metadata=ToolMetadata(
309
- name="itpc_tool",
310
- description=itpc_summary,
311
- ),
312
- )
313
- all_tools.append(doc_tool)
314
-
315
- unaids_summary = (
316
- "This content contains guidance on community led monitoring from UNAIDS."
317
- " Use this tool if you want to answer any questions about CLM"
318
- " particularly when asked about UNAIDS guidance. If using this tool, mention that the response comes from"
319
- " UNAIDS guidance."
320
- )
321
-
322
- doc_tool = QueryEngineTool(
323
- query_engine=agents["unaids_guidance"],
324
- metadata=ToolMetadata(
325
- name="unaids_tool",
326
- description=unaids_summary,
327
- ),
328
- )
329
- all_tools.append(doc_tool)
330
-
331
- pepfar_summary = (
332
- "This content contains guidance on community led monitoring from PEPFAR, also known as the President"
333
- " Emergency Plan for AIDS Relief. Use this tool if you want to answer any questions about CLM"
334
- " particularly when asked about PEPFAR guidance. If using this tool, mention that the response comes from"
335
- " PEPFAR guidance."
336
- )
337
-
338
- doc_tool = QueryEngineTool(
339
- query_engine=agents["pepfar_guidance"],
340
- metadata=ToolMetadata(
341
- name="pepfar_tool",
342
- description=pepfar_summary,
343
- ),
344
- )
345
- all_tools.append(doc_tool)
346
-
347
- ennaso_summary = (
348
- "This content contains guidance on community led monitoring from ENNASO, also known as the Eastern African"
349
- " National Networks for HIV and Health Service Organizations. Use this tool if you want to answer any questions about CLM"
350
- " particularly when asked about ENNASO guidance. If using this tool, mention that the response comes from"
351
- " ENNASO guidance."
352
- )
353
-
354
- doc_tool = QueryEngineTool(
355
- query_engine=agents["ennaso_guidance"],
356
- metadata=ToolMetadata(
357
- name="ennaso_tool",
358
- description=ennaso_summary,
359
- ),
360
- )
361
- all_tools.append(doc_tool)
362
-
363
- globalfund_summary = (
364
- "This content contains guidance on community led monitoring from the Global Fund to Fight HIV, Tuberculosis, and Malaria."
365
- " Use this tool if you want to answer any questions about CLM"
366
- " particularly when asked about Global Fund guidance. If using this tool, mention that the response comes from"
367
- " Global Fund guidance."
368
- )
369
- doc_tool = QueryEngineTool(
370
- query_engine=agents["globalfund_guidance"],
371
- metadata=ToolMetadata(
372
- name="globalfund_tool",
373
- description=globalfund_summary,
374
- ),
375
- )
376
- all_tools.append(doc_tool)
377
-
378
- # define an "object" index and retriever over these tools
379
- from llama_index import VectorStoreIndex
380
- from llama_index.objects import ObjectIndex, SimpleToolNodeMapping
381
-
382
- tool_mapping = SimpleToolNodeMapping.from_objects(all_tools)
383
- obj_index = ObjectIndex.from_objects(
384
- all_tools,
385
- tool_mapping,
386
- VectorStoreIndex,
387
- )
388
-
389
- from llama_index.agent import FnRetrieverOpenAIAgent
390
-
391
- top_agent = FnRetrieverOpenAIAgent.from_retriever(
392
- obj_index.as_retriever(similarity_top_k=2),
393
- system_prompt=""" \
394
- You are an agent designed to answer queries about about community led monitoring.
395
- Please always use the tools provided to answer a question.
396
- Do not rely on prior knowledge.\
397
-
398
- """,
399
- verbose=True,
400
- )
401
 
402
  import gradio as gr
403
 
404
  def clm(question: str, conversation_history: list[str]):
405
 
406
  context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
407
- response = top_agent.query("the user previously asked and received the following: " +
408
  context +
409
- " Here is the new question: " +
410
  question)
411
 
412
  conversation_history.append({"user": question, "chatbot": response.response})
413
 
414
- # num_queries += 1
415
- return response, conversation_history
416
-
417
- demo = gr.Interface(
418
- title = "CLM Chatbot Demo",
419
- fn=clm,
420
- inputs=["text", gr.State(value=[])],
421
- outputs=["text", gr.State()],
422
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
- demo.launch()
 
 
16
  llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
17
  service_context = ServiceContext.from_defaults(llm=llm)
18
 
19
+ PERSIST_DIR = "clm_guidance_metadata"
20
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
21
+ index = load_index_from_storage(storage_context)
22
+ query_engine = index.as_query_engine(similarity_top_k=3, llm=OpenAI(model="gpt-3.5-turbo"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  import gradio as gr
25
 
26
  def clm(question: str, conversation_history: list[str]):
27
 
28
  context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
29
+ response = query_engine.query("the user previously asked and received the following: " +
30
  context +
 
31
  question)
32
 
33
  conversation_history.append({"user": question, "chatbot": response.response})
34
 
35
+ source1 = ("File Name: " +
36
+ response.source_nodes[0].metadata["file_name"] +
37
+ "\nPage Number: " +
38
+ response.source_nodes[0].metadata["page_label"] +
39
+ "\n Source Text: " +
40
+ response.source_nodes[0].text)
41
+
42
+ source2 = ("File Name: " +
43
+ response.source_nodes[1].metadata["file_name"] +
44
+ "\nPage Number: " +
45
+ response.source_nodes[1].metadata["page_label"] +
46
+ "\n Source Text: " +
47
+ response.source_nodes[1].text)
48
+
49
+ source3 = ("File Name: " +
50
+ response.source_nodes[2].metadata["file_name"] +
51
+ "\nPage Number: " +
52
+ response.source_nodes[2].metadata["page_label"] +
53
+ "\n Source Text: " +
54
+ response.source_nodes[2].text)
55
+
56
+ return response, source1, source2, source3, conversation_history
57
+
58
+ inputs = [gr.Textbox(lines=10, label="Question"),
59
+ gr.State(value=[])]
60
+ outputs = [
61
+ gr.Textbox(label="Chatbot Response", type="text"),
62
+ gr.Textbox(label="Source 1", max_lines = 10, autoscroll = False, type="text"),
63
+ gr.Textbox(label="Source 2", max_lines = 10, autoscroll = False, type="text"),
64
+ gr.Textbox(label="Source 3", max_lines = 10, autoscroll = False, type="text"),
65
+ gr.State()
66
+ ]
67
 
68
+ gr.Interface(fn=clm, inputs=inputs, outputs=outputs, title="CLM Chatbot",
69
+ description="Enter a question and see the processed outputs in collapsible boxes.").launch()