Spaces:

YoniFriedman
/

CLMGuidance

Sleeping

App Files Files Community

YoniFriedman commited on Feb 26, 2024

Commit

02d8135

verified ·

1 Parent(s): eca8b37

Update to metadata approach

Browse files

Files changed (1) hide show

app.py +39 -394

app.py CHANGED Viewed

@@ -16,409 +16,54 @@ os.environ["OPENAI_API_KEY"]
 llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
 service_context = ServiceContext.from_defaults(llm=llm)
-# Load query engines
-claw_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/claw_guidance"))
-claw_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/claw_guidance_summary"))
-claw_guidance_vector_query_engine = claw_guidance_index.as_query_engine(similarity_top_k=2)
-claw_guidance_summary_query_engine = claw_guidance_summary_index.as_query_engine()
-itpc_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/itpc_guidance"))
-itpc_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/itpc_guidance_summary"))
-itpc_guidance_vector_query_engine = itpc_guidance_index.as_query_engine(similarity_top_k=2)
-itpc_guidance_summary_query_engine = itpc_guidance_summary_index.as_query_engine()
-unaids_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/unaids_guidance"))
-unaids_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/unaids_guidance_summary"))
-unaids_guidance_vector_query_engine = unaids_guidance_index.as_query_engine(similarity_top_k=2)
-unaids_guidance_summary_query_engine = unaids_guidance_summary_index.as_query_engine()
-pepfar_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/pepfar_guidance"))
-pepfar_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/pepfar_guidance_summary"))
-pepfar_guidance_vector_query_engine = pepfar_guidance_index.as_query_engine(similarity_top_k=2)
-pepfar_guidance_summary_query_engine = pepfar_guidance_summary_index.as_query_engine()
-ennaso_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/ennaso_guidance"))
-ennaso_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/ennaso_guidance_summary"))
-ennaso_guidance_vector_query_engine = ennaso_guidance_index.as_query_engine(similarity_top_k=2)
-ennaso_guidance_summary_query_engine = ennaso_guidance_summary_index.as_query_engine()
-globalfund_guidance_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/globalfund_guidance"))
-globalfund_guidance_summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir = "./storage/globalfund_guidance_summary"))
-globalfund_guidance_vector_query_engine = globalfund_guidance_index.as_query_engine(similarity_top_k=2)
-globalfund_guidance_summary_query_engine = globalfund_guidance_summary_index.as_query_engine()
-from llama_index.agent import OpenAIAgent
-agents = {}
-# define tools
-query_engine_tools = [
-    QueryEngineTool(
-        query_engine=claw_guidance_vector_query_engine,
-        metadata=ToolMetadata(
-            name="claw_guidance_vector_tool",
-            description=(
-                "Useful for questions related to specific questions about best practices and guidance"
-                " about community led monitoring, also known as CLM."
-            ),
-        ),
-    ),
-    QueryEngineTool(
-        query_engine=claw_guidance_summary_query_engine,
-        metadata=ToolMetadata(
-            name="claw_guidance_summary_tool",
-            description=(
-                "Useful for requests that require a wholistic summary related to guidance on CLM,"
-                " or community led monitoring. For specific questions about CLM, please use the vector tool."
-            ),
-        ),
-    ),
-]
-# build agent
-function_llm = OpenAI(model="gpt-4", temperature = 0)
-agent = OpenAIAgent.from_tools(
-    query_engine_tools,
-    llm=function_llm,
-    verbose=True,
-)
-agents["claw_guidance"] = agent
-# define tools
-query_engine_tools = [
-    QueryEngineTool(
-        query_engine=itpc_guidance_vector_query_engine,
-        metadata=ToolMetadata(
-            name="itpc_guidance_vector_tool",
-            description=(
-                "Useful for questions related to specific questions about best practices and guidance"
-                " about community led monitoring, also known as CLM."
-            ),
-        ),
-    ),
-    QueryEngineTool(
-        query_engine=claw_guidance_summary_query_engine,
-        metadata=ToolMetadata(
-            name="itpc_guidance_summary_tool",
-            description=(
-                "Useful for requests that require a wholistic summary related to guidance on CLM,"
-                " or community led monitoring. For specific questions about CLM, please use the vector tool."
-            ),
-        ),
-    ),
-]
-# build agent
-function_llm = OpenAI(model="gpt-4", temperature = 0)
-agent = OpenAIAgent.from_tools(
-    query_engine_tools,
-    llm=function_llm,
-    verbose=True,
-#         system_prompt=f"""\
-# You are a specialized agent designed to answer queries about guidance on community led monitoring from ITPC, also
-# known as the International Treatment Preparedness Coalition.
-# You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
-# """,
-)
-agents["itpc_guidance"] = agent
-# define tools
-query_engine_tools = [
-    QueryEngineTool(
-        query_engine=unaids_guidance_vector_query_engine,
-        metadata=ToolMetadata(
-            name="unaids_guidance_vector_tool",
-            description=(
-                "Useful for questions related to specific questions about best practices and guidance"
-                " about community led monitoring, also known as CLM."
-            ),
-        ),
-    ),
-    QueryEngineTool(
-        query_engine=unaids_guidance_summary_query_engine,
-        metadata=ToolMetadata(
-            name="unaids_guidance_summary_tool",
-            description=(
-                "Useful for requests that require a wholistic summary related to guidance on CLM,"
-                " or community led monitoring. For specific questions about CLM, please use the vector tool."
-            ),
-        ),
-    ),
-]
-# build agent
-function_llm = OpenAI(model="gpt-4", temperature = 0)
-agent = OpenAIAgent.from_tools(
-    query_engine_tools,
-    llm=function_llm,
-    verbose=True,
-#         system_prompt=f"""\
-# You are a specialized agent designed to answer queries about guidance on community led monitoring from UNAIDS.
-# You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
-# """,
-)
-agents["unaids_guidance"] = agent
-# define tools
-query_engine_tools = [
-    QueryEngineTool(
-        query_engine=pepfar_guidance_vector_query_engine,
-        metadata=ToolMetadata(
-            name="pepfar_guidance_vector_tool",
-            description=(
-                "Useful for questions related to specific questions about best practices and guidance"
-                " about community led monitoring, also known as CLM."
-            ),
-        ),
-    ),
-    QueryEngineTool(
-        query_engine=pepfar_guidance_summary_query_engine,
-        metadata=ToolMetadata(
-            name="pepfar_guidance_summary_tool",
-            description=(
-                "Useful for requests that require a wholistic summary related to guidance on CLM,"
-                " or community led monitoring. For specific questions about CLM, please use the vector tool."
-            ),
-        ),
-    ),
-]
-# build agent
-function_llm = OpenAI(model="gpt-4", temperature = 0)
-agent = OpenAIAgent.from_tools(
-    query_engine_tools,
-    llm=function_llm,
-    verbose=True,
-#         system_prompt=f"""\
-# You are a specialized agent designed to answer queries about guidance on community led monitoring from PEPFAR.
-# You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
-# """,
-)
-agents["pepfar_guidance"] = agent
-# define tools
-query_engine_tools = [
-    QueryEngineTool(
-        query_engine=ennaso_guidance_vector_query_engine,
-        metadata=ToolMetadata(
-            name="ennaso_guidance_vector_tool",
-            description=(
-                "Useful for questions related to specific questions about best practices and guidance"
-                " about community led monitoring, also known as CLM."
-            ),
-        ),
-    ),
-    QueryEngineTool(
-        query_engine=ennaso_guidance_summary_query_engine,
-        metadata=ToolMetadata(
-            name="ennaso_guidance_summary_tool",
-            description=(
-                "Useful for requests that require a wholistic summary related to guidance on CLM,"
-                " or community led monitoring. For specific questions about CLM, please use the vector tool."
-            ),
-        ),
-    ),
-]
-# build agent
-function_llm = OpenAI(model="gpt-4", temperature = 0)
-agent = OpenAIAgent.from_tools(
-    query_engine_tools,
-    llm=function_llm,
-    verbose=True,
-#         system_prompt=f"""\
-# You are a specialized agent designed to answer queries about guidance on community led monitoring from ENNASO,
-# the Eastern African National Networks of AIDS and Health Service Organizations.
-# You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
-# """,
-)
-agents["ennaso_guidance"] = agent
-# define tools
-query_engine_tools = [
-    QueryEngineTool(
-        query_engine=globalfund_guidance_vector_query_engine,
-        metadata=ToolMetadata(
-            name="globalfund_guidance_vector_tool",
-            description=(
-                "Useful for questions related to specific questions about best practices and guidance"
-                " about community led monitoring, also known as CLM."
-            ),
-        ),
-    ),
-    QueryEngineTool(
-        query_engine=globalfund_guidance_summary_query_engine,
-        metadata=ToolMetadata(
-            name="globalfund_guidance_summary_tool",
-            description=(
-                "Useful for requests that require a wholistic summary related to guidance on CLM,"
-                " or community led monitoring. For specific questions about CLM, please use the vector tool."
-            ),
-        ),
-    ),
-]
-# build agent
-function_llm = OpenAI(model="gpt-4", temperature = 0)
-agent = OpenAIAgent.from_tools(
-    query_engine_tools,
-    llm=function_llm,
-    verbose=True,
-#         system_prompt=f"""\
-# You are a specialized agent designed to answer queries about guidance on community led monitoring from the Global
-# Fund to Fight AIDS, Tuberculosis, and Malaria.
-# You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
-# """,
-)
-agents["globalfund_guidance"] = agent
-all_tools = []
-claw_summary = (
-    "This content contains guidance on community led monitoring from CLAW, also known as"
-    " Community Led Monitoring Working Group. Use this tool if you want to answer any questions about CLM"
-    " particularly when asked about CLAW guidance. If using this tool, mention that the response comes from"
-    " CLAW guidance."
-)
-doc_tool = QueryEngineTool(
-    query_engine=agents["claw_guidance"],
-    metadata=ToolMetadata(
-        name="claw_tool",
-        description=claw_summary,
-    ),
-)
-all_tools.append(doc_tool)
-itpc_summary = (
-    "This content contains guidance on community led monitoring from ITPC, also known as"
-    " International Treatment Preparedness Coalition. Use this tool if you want to answer any questions about CLM"
-    " particularly when asked about ITPC guidance. If using this tool, mention that the response comes from"
-    " ITPC guidance."
-)
-doc_tool = QueryEngineTool(
-    query_engine=agents["itpc_guidance"],
-    metadata=ToolMetadata(
-        name="itpc_tool",
-        description=itpc_summary,
-    ),
-)
-all_tools.append(doc_tool)
-unaids_summary = (
-    "This content contains guidance on community led monitoring from UNAIDS."
-    " Use this tool if you want to answer any questions about CLM"
-    " particularly when asked about UNAIDS guidance. If using this tool, mention that the response comes from"
-    " UNAIDS guidance."
-)
-doc_tool = QueryEngineTool(
-    query_engine=agents["unaids_guidance"],
-    metadata=ToolMetadata(
-        name="unaids_tool",
-        description=unaids_summary,
-    ),
-)
-all_tools.append(doc_tool)
-pepfar_summary = (
-    "This content contains guidance on community led monitoring from PEPFAR, also known as the President"
-    " Emergency Plan for AIDS Relief. Use this tool if you want to answer any questions about CLM"
-    " particularly when asked about PEPFAR guidance. If using this tool, mention that the response comes from"
-    " PEPFAR guidance."
-)
-doc_tool = QueryEngineTool(
-    query_engine=agents["pepfar_guidance"],
-    metadata=ToolMetadata(
-        name="pepfar_tool",
-        description=pepfar_summary,
-    ),
-)
-all_tools.append(doc_tool)
-ennaso_summary = (
-    "This content contains guidance on community led monitoring from ENNASO, also known as the Eastern African"
-    " National Networks for HIV and Health Service Organizations. Use this tool if you want to answer any questions about CLM"
-    " particularly when asked about ENNASO guidance. If using this tool, mention that the response comes from"
-    " ENNASO guidance."
-)
-doc_tool = QueryEngineTool(
-    query_engine=agents["ennaso_guidance"],
-    metadata=ToolMetadata(
-        name="ennaso_tool",
-        description=ennaso_summary,
-    ),
-)
-all_tools.append(doc_tool)
-globalfund_summary = (
-    "This content contains guidance on community led monitoring from the Global Fund to Fight HIV, Tuberculosis, and Malaria."
-    " Use this tool if you want to answer any questions about CLM"
-    " particularly when asked about Global Fund guidance. If using this tool, mention that the response comes from"
-    " Global Fund guidance."
-)
-doc_tool = QueryEngineTool(
-    query_engine=agents["globalfund_guidance"],
-    metadata=ToolMetadata(
-        name="globalfund_tool",
-        description=globalfund_summary,
-    ),
-)
-all_tools.append(doc_tool)
-# define an "object" index and retriever over these tools
-from llama_index import VectorStoreIndex
-from llama_index.objects import ObjectIndex, SimpleToolNodeMapping
-tool_mapping = SimpleToolNodeMapping.from_objects(all_tools)
-obj_index = ObjectIndex.from_objects(
-    all_tools,
-    tool_mapping,
-    VectorStoreIndex,
-)
-from llama_index.agent import FnRetrieverOpenAIAgent
-top_agent = FnRetrieverOpenAIAgent.from_retriever(
-    obj_index.as_retriever(similarity_top_k=2),
-    system_prompt=""" \
-You are an agent designed to answer queries about about community led monitoring.
-Please always use the tools provided to answer a question.
-Do not rely on prior knowledge.\
-""",
-    verbose=True,
-)
 import gradio as gr
 def clm(question: str, conversation_history: list[str]):
     context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
-    response = top_agent.query("the user previously asked and received the following: " +
                                   context +
-                                  " Here is the new question: " +
                                   question)
     conversation_history.append({"user": question, "chatbot": response.response})
-    # num_queries += 1
-    return response, conversation_history
-demo = gr.Interface(
-    title = "CLM Chatbot Demo",
-    fn=clm,
-    inputs=["text", gr.State(value=[])],
-    outputs=["text", gr.State()],
-)
-demo.launch()

 llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
 service_context = ServiceContext.from_defaults(llm=llm)
+PERSIST_DIR = "clm_guidance_metadata"
+storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
+index = load_index_from_storage(storage_context)
+query_engine = index.as_query_engine(similarity_top_k=3, llm=OpenAI(model="gpt-3.5-turbo"))
 import gradio as gr
 def clm(question: str, conversation_history: list[str]):
     context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])
+    response = query_engine.query("the user previously asked and received the following: " +
                                   context +
                                   question)
     conversation_history.append({"user": question, "chatbot": response.response})
+    source1 = ("File Name: " +
+      response.source_nodes[0].metadata["file_name"] +
+      "\nPage Number: " +
+      response.source_nodes[0].metadata["page_label"] +
+      "\n Source Text: " +
+     response.source_nodes[0].text)
+    source2 = ("File Name: " +
+      response.source_nodes[1].metadata["file_name"] +
+      "\nPage Number: " +
+      response.source_nodes[1].metadata["page_label"] +
+      "\n Source Text: " +
+     response.source_nodes[1].text)
+    source3 = ("File Name: " +
+      response.source_nodes[2].metadata["file_name"] +
+      "\nPage Number: " +
+      response.source_nodes[2].metadata["page_label"] +
+      "\n Source Text: " +
+     response.source_nodes[2].text)
+    return response, source1, source2, source3, conversation_history
+inputs = [gr.Textbox(lines=10, label="Question"),
+          gr.State(value=[])]
+outputs = [
+    gr.Textbox(label="Chatbot Response", type="text"),
+    gr.Textbox(label="Source 1", max_lines = 10, autoscroll = False, type="text"),
+    gr.Textbox(label="Source 2", max_lines = 10, autoscroll = False, type="text"),
+    gr.Textbox(label="Source 3", max_lines = 10, autoscroll = False, type="text"),
+    gr.State()
+]
+gr.Interface(fn=clm, inputs=inputs, outputs=outputs, title="CLM Chatbot",
+             description="Enter a question and see the processed outputs in collapsible boxes.").launch()