AliA1997 commited on
Commit
5dde853
·
1 Parent(s): 84a782b

Integrated multi-agent workflow from llama index.

Browse files
Files changed (8) hide show
  1. .gitignore +3 -0
  2. app.py +133 -53
  3. code_agent.py +49 -0
  4. requirements.txt +20 -0
  5. scientific_paper_agent.py +46 -0
  6. search_agent.py +7 -0
  7. tools.py +59 -0
  8. web_agent.py +40 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .env
2
+ /chat
3
+ /code
app.py CHANGED
@@ -1,5 +1,103 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  def respond(
@@ -9,62 +107,44 @@ def respond(
9
  max_tokens,
10
  temperature,
11
  top_p,
12
- hf_token: gr.OAuthToken,
13
  ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
-
19
- messages = [{"role": "system", "content": system_message}]
20
-
21
- messages.extend(history)
22
-
23
- messages.append({"role": "user", "content": message})
24
-
25
- response = ""
26
-
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
 
63
  with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
 
 
 
 
 
 
 
 
 
67
 
 
 
 
 
 
 
68
 
69
  if __name__ == "__main__":
70
  demo.launch()
 
 
1
+ import os
2
  import gradio as gr
3
+ from llama_index.core.tools import FunctionTool
4
+ from llama_index.core.agent.workflow import AgentWorkflow, ReActAgent
5
+ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
6
+ from code_agent import initialize_code_agent
7
+ from scientific_paper_agent import load_scientific_paper_dataset, ScientificPaperRetriever
8
+ from search_agent import init_search_tool
9
+ from tools import math_tool_func, init_image_to_text
10
+ from web_agent import initialize_web_agent
11
+
12
+ global currentMode
13
+
14
+ hf_token = os.environ.get('HF_TOKEN')
15
+
16
+ llm = HuggingFaceInferenceAPI(
17
+ model_name="Qwen/Qwen2.5-7B-Instruct",
18
+ token=hf_token
19
+ )
20
+ image_to_text_tool = FunctionTool.from_defaults(
21
+ fn=init_image_to_text,
22
+ name="image_to_text_tool",
23
+ description="Generate captions from an image URL using BLIP. Returns both conditional and unconditional captions."
24
+ )
25
+ search_tool = init_search_tool()
26
+ math_tool = FunctionTool.from_defaults(
27
+ fn=math_tool_func,
28
+ name="math_tool",
29
+ description="Solving math problems using the Qwen2.5-Math-1.5B model."
30
+ )
31
+
32
+ scientific_paper_dataset = load_scientific_paper_dataset()
33
+ scientific_paper_tool = FunctionTool.from_defaults(
34
+ fn=ScientificPaperRetriever(scientific_paper_dataset).run,
35
+ name="scientific_paper_info_retriever",
36
+ description="Retrieves detailed information about scientific papers."
37
+ )
38
+
39
+ # Define Agents
40
+ code_agent = initialize_code_agent()
41
+ image_to_text_agent = ReActAgent(
42
+ name="image_to_text",
43
+ description="Generate text captions from images",
44
+ tools=[image_to_text_tool],
45
+ system_prompt=(
46
+ "You are an assistant specialized in image understanding. "
47
+ "When given an image URL, use the image_to_text_tool to generate captions. "
48
+ "Provide both conditional and unconditional descriptions in clear, concise language. "
49
+ "Do not invent details beyond what the tool provides."
50
+ ),
51
+ llm=llm
52
+ )
53
+ math_agent = ReActAgent(
54
+ name="math_solver",
55
+ description="Solve math problems using a dedicated math model",
56
+ tools=[math_tool],
57
+ system_prompt=(
58
+ "You are an assistant specialized in solving math problems. "
59
+ "When given a math query, use the math_solver_tool to compute the answer. "
60
+ "Explain the solution clearly and step by step when possible, "
61
+ "but keep the final answer concise and accurate."
62
+ ),
63
+ llm=llm
64
+ )
65
+ search_web_agent = ReActAgent(
66
+ name="search_web",
67
+ description="Searches the web for answers",
68
+ tools=[search_tool],
69
+ system_prompt=(
70
+ "You are a helpful assistant. Use DuckDuckGoSearch to look up information. "
71
+ "Always summarize the first useful result and return it directly. "
72
+ "Do not keep searching repeatedly."
73
+ ),
74
+ llm=llm
75
+ )
76
+ scientific_paper_agent = ReActAgent(
77
+ name="scientific_paper_agent",
78
+ description="Search scientific papers for the agent",
79
+ tools=[scientific_paper_tool],
80
+ system_prompt="You are a helpful assistant that can answer scientific questions based on scientific papers.",
81
+ llm=llm
82
+ )
83
+ query_engine_agent = initialize_web_agent(llm)
84
+
85
+
86
+ # DEFINE THE WORKFLOW
87
+ multi_agent_workflow = AgentWorkflow(
88
+ agents=[
89
+ query_engine_agent,
90
+ search_web_agent,
91
+ math_agent,
92
+ image_to_text_agent,
93
+ scientific_paper_agent,
94
+ code_agent
95
+ ],
96
+ root_agent="query_engine",
97
+ initial_state={ "num_of_calls": 0 },
98
+ state_prompt="Current state: {state}. User Message: {msg}"
99
+ )
100
+
101
 
102
 
103
  def respond(
 
107
  max_tokens,
108
  temperature,
109
  top_p,
110
+ mode
111
  ):
112
+ global currentMode
113
+ print("Current Mode: " + mode)
114
+
115
+ if mode == "Math Mode":
116
+ currentMode = "math"
117
+ elif mode == "Conversation Mode":
118
+ currentMode = "conversation"
119
+ elif mode == "Image Mode":
120
+ currentMode = "image"
121
+ else:
122
+ currentMode = "conversation"
123
+
124
+ yield multi_agent_workflow.run(message, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
125
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  with gr.Blocks() as demo:
128
+ # Dropdown placed above the chat input
129
+ mode_dropdown = gr.Dropdown(
130
+ choices=["Math Mode", "Conversation Mode", "Image Mode"],
131
+ value="Conversation Mode",
132
+ label="Interaction Mode"
133
+ )
134
+
135
+ # ChatInterface without additional_inputs
136
+ chatbot = gr.ChatInterface(
137
+ fn=respond,
138
+ type="messages"
139
+ )
140
 
141
+ # Link dropdown value to respond function
142
+ mode_dropdown.change(
143
+ lambda m: m,
144
+ inputs=mode_dropdown,
145
+ outputs=[]
146
+ )
147
 
148
  if __name__ == "__main__":
149
  demo.launch()
150
+
code_agent.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import chromadb
3
+ from llama_index.core import VectorStoreIndex
4
+ from llama_index.core.tools import QueryEngineTool
5
+ from llama_index.vector_stores.chroma import ChromaVectorStore
6
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
7
+ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
8
+ # from llama_index.llms.litellm import LiteLLM
9
+ from llama_index.core.agent.workflow import ReActAgent
10
+
11
+ def initialize_code_agent():
12
+ hf_token = os.environ.get('HF_TOKEN')
13
+ deepseek_token = os.environ.get('DEEPSEEK_TOKEN')
14
+
15
+ code_db = chromadb.PersistentClient(path="./code_db")
16
+
17
+ code_chroma_collection = code_db.get_or_create_collection('code')
18
+ code_vector_store = ChromaVectorStore(chroma_collection=code_chroma_collection)
19
+
20
+ embedding_model = HuggingFaceEmbedding(
21
+ model_name="BAAI/bge-small-en-v1.5",
22
+ device="cpu",
23
+ token=hf_token,
24
+ )
25
+ index = VectorStoreIndex.from_vector_store(code_vector_store, embed_model=embedding_model)
26
+
27
+ code_llm = HuggingFaceInferenceAPI(
28
+ model_name="deepseek-ai/deepseek-coder-1.3b-instruct",
29
+ api_key=deepseek_token,
30
+ token=hf_token,
31
+ )
32
+ code_query_engine = index.as_query_engine(
33
+ llm=code_llm,
34
+ similarity_top_k=3
35
+ )
36
+ code_query_engine_tool = QueryEngineTool.from_defaults(
37
+ query_engine=code_query_engine,
38
+ name="my_code_query_engine",
39
+ description="Code Query engine for the agent",
40
+ return_direct=False
41
+ )
42
+ return ReActAgent(
43
+ name="code_engine",
44
+ description="Query engine for the agent",
45
+ tools=[code_query_engine_tool],
46
+ system_prompt="You are a calculator assistant. Use your tools for any math operation.",
47
+ llm=code_llm
48
+ )
49
+
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ datasets
3
+ smolagents
4
+ llama-index
5
+ huggingface_hub
6
+ llama-index-llms-huggingface-api
7
+ langchain_core
8
+ langchain_community
9
+ llama-index-embeddings-huggingface
10
+ llama-index-tools-duckduckgo
11
+ rank_bm25
12
+ chromadb
13
+ llama-index-vector-stores-chroma
14
+ torch
15
+ torchvision
16
+ torchaudio
17
+ pillow
18
+ transformers
19
+ llama-index-llms-litellm
20
+ llama-index-utils-workflow
scientific_paper_agent.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datasets
2
+ from langchain_core.documents import Document
3
+ from langchain_community.retrievers import BM25Retriever
4
+
5
+ def load_scientific_paper_dataset():
6
+ # Convert dataset entries into Document objects
7
+ scientific_paper_dataset = datasets.load_dataset("gsasikiran/Summarize-Scientific-Papers-Processed", split="train")
8
+ docs = [
9
+ Document(
10
+ page_content="\n".join([
11
+ f"Title: {scientific_paper['title']}",
12
+ f"Authors: {scientific_paper['authors']}",
13
+ f"What is it: {scientific_paper['article_classification']}",
14
+ f"Claims: {scientific_paper['claims']}",
15
+ f"Contradictions: {scientific_paper['contradictions_and_limitations']}",
16
+ f"Ethical Considerations: {scientific_paper['ethical_considerations']}",
17
+ f"Summary: {scientific_paper['executive_summary']}",
18
+ f"Subfield: {scientific_paper['field_subfield']}",
19
+ f"Theorical Implications: {scientific_paper['interpretation_and_theoretical_implications']}",
20
+ f"Method to Retrieve Info: {scientific_paper['methodological_details']}",
21
+ f"People used to get data: {scientific_paper['procedures_and_architectures']}",
22
+ f"Context of Research: {scientific_paper['research_context']}",
23
+ f"Research Hypothesis: {scientific_paper['research_question_and_hypothesis']}",
24
+ f"Three Takeways: {scientific_paper['three_takeaways']}",
25
+ f"Type of Paper: {scientific_paper['type_of_paper']}"
26
+ ]),
27
+ metadata={"title": scientific_paper["title"]}
28
+ )
29
+ for scientific_paper in scientific_paper_dataset
30
+ ]
31
+ return docs
32
+
33
+
34
+ # --- Scientific Paper Retriever ---
35
+ class ScientificPaperRetriever:
36
+ def __init__(self, docs):
37
+ # Build BM25 retriever from documents
38
+ self.retriever = BM25Retriever.from_documents(docs)
39
+
40
+ def run(self, query: str) -> str:
41
+ results = self.retriever.retrieve(query)
42
+ if results:
43
+ return "\n\n".join([doc.text for doc in results[:3]])
44
+ else:
45
+ return "No matching scientific paper found."
46
+
search_agent.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from llama_index.core.tools import FunctionTool
2
+ from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
3
+
4
+ def init_search_tool():
5
+ search_tool_spec = DuckDuckGoSearchToolSpec()
6
+ search_tool = FunctionTool.from_defaults(search_tool_spec.duckduckgo_full_search)
7
+ return search_tool
tools.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ import torch
4
+ from PIL import Image
5
+ from transformers import BlipProcessor, BlipForConditionalGeneration
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
+ from llama_index.core.tools import FunctionTool
8
+
9
+ hf_token = os.environ.get("HF_TOKEN")
10
+
11
+ # Load processor and model once (outside the function for efficiency)
12
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
13
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
14
+
15
+ math_model_id = "Qwen/Qwen2.5-Math-1.5B"
16
+ math_tokenizer = AutoTokenizer.from_pretrained(math_model_id, use_auth_token=hf_token)
17
+ math_model = AutoModelForCausalLM.from_pretrained(
18
+ math_model_id,
19
+ dtype=torch.float16,
20
+ device_map="auto",
21
+ use_auth_token=hf_token
22
+ )
23
+
24
+
25
+ def math_tool_func(problem: str) -> str:
26
+ inputs = math_tokenizer(problem, return_tensors="pt").to(math_model.device)
27
+ outputs = math_model.generate(**inputs, max_new_tokens=128)
28
+ result = math_tokenizer.decode(outputs[0], skip_special_tokens=True)
29
+ return result
30
+
31
+
32
+
33
+ def init_image_to_text(img_url: str) -> dict:
34
+ """
35
+ Convert an image URL into text captions using BLIP.
36
+
37
+ Args:
38
+ img_url (str): URL of the image to caption.
39
+
40
+ Returns:
41
+ dict: Contains both conditional and unconditional captions.
42
+ """
43
+ raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
44
+
45
+ # Conditional captioning
46
+ conditional_prompt = "a photography of"
47
+ inputs_cond = processor(raw_image, conditional_prompt, return_tensors="pt")
48
+ out_cond = model.generate(**inputs_cond)
49
+ conditional_caption = processor.decode(out_cond[0], skip_special_tokens=True)
50
+
51
+ # Unconditional captioning
52
+ inputs_uncond = processor(raw_image, return_tensors="pt")
53
+ out_uncond = model.generate(**inputs_uncond)
54
+ unconditional_caption = processor.decode(out_uncond[0], skip_special_tokens=True)
55
+
56
+ return {
57
+ "conditional_caption": conditional_caption,
58
+ "unconditional_caption": unconditional_caption,
59
+ }
web_agent.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import chromadb
3
+ from llama_index.core import VectorStoreIndex
4
+ from llama_index.vector_stores.chroma import ChromaVectorStore
5
+ from llama_index.core.tools import QueryEngineTool
6
+
7
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
8
+ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
9
+ from llama_index.core.agent.workflow import ReActAgent
10
+
11
+ def initialize_web_agent(llm: HuggingFaceInferenceAPI):
12
+ hf_token = os.environ.get('HF_TOKEN')
13
+ db = chromadb.PersistentClient(path="./chat_db")
14
+ chroma_collection = db.get_or_create_collection("chat")
15
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
16
+
17
+ embedding_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5", device="cpu")
18
+ index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embedding_model)
19
+
20
+ query_engine = index.as_query_engine(
21
+ llm=llm,
22
+ similarity_top_k=3
23
+ )
24
+
25
+ query_engine_tool = QueryEngineTool.from_defaults(
26
+ query_engine=query_engine,
27
+ name="my_query_engine",
28
+ description="Query engine for the agent",
29
+ return_direct=False
30
+ )
31
+
32
+ return ReActAgent(
33
+ name="query_engine",
34
+ description="Query engine for the agent",
35
+ tools=[query_engine_tool],
36
+ system_prompt="You are a calculator assistant. Use your tools for any math operation.",
37
+ llm=llm
38
+ )
39
+
40
+