AliA1997 commited on
Commit
a6dbfdf
·
1 Parent(s): dd75c3c

Completed Final Assignment for Huggingface Agents Course

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ __pycache__
app.py CHANGED
@@ -6,25 +6,56 @@ import pandas as pd
6
  from init_agent import build_workflow
7
  from langchain_core.messages import HumanMessage
8
 
 
9
  # (Keep Constants as is)
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
-
13
  # --- Basic Agent Definition ---
14
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
15
  class BasicAgent:
16
  """A langgraph agent."""
17
  workflow: Optional[Any]
 
18
  def __init__(self):
19
  print("BasicAgent initialized.")
20
  self.workflow = build_workflow()
21
 
22
  def __call__(self, question: str) -> str:
23
  print(f"Agent received question (first 50 chars): {question[:50]}...")
 
 
24
  messages = [HumanMessage(content=question)]
25
- result = self.workflow.invoke({"messages": messages})
26
- answer = result['messages'][-1].content
27
- return answer # kein [14:] mehr nötig!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def run_and_submit_all( profile: gr.OAuthProfile | None):
30
  """
 
6
  from init_agent import build_workflow
7
  from langchain_core.messages import HumanMessage
8
 
9
+
10
  # (Keep Constants as is)
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
13
  # --- Basic Agent Definition ---
 
14
  class BasicAgent:
15
  """A langgraph agent."""
16
  workflow: Optional[Any]
17
+
18
  def __init__(self):
19
  print("BasicAgent initialized.")
20
  self.workflow = build_workflow()
21
 
22
  def __call__(self, question: str) -> str:
23
  print(f"Agent received question (first 50 chars): {question[:50]}...")
24
+
25
+ # Always wrap the question as a HumanMessage
26
  messages = [HumanMessage(content=question)]
27
+
28
+ # Run the workflow
29
+ result = self.workflow.invoke({
30
+ "ai_agent": None,
31
+ "messages": messages,
32
+ "classification": "not coding"
33
+ })
34
+
35
+ # --- FIX: safely extract the final answer ---
36
+ final_messages = result.get("messages", [])
37
+
38
+ if not final_messages:
39
+ return "No answer produced."
40
+
41
+ last_msg = final_messages[-1]
42
+
43
+ # LangChain messages always have .content, but sometimes it's a list or None
44
+ content = getattr(last_msg, "content", None)
45
+
46
+ # If content is a list (Gemini, some HF models), flatten it
47
+ if isinstance(content, list):
48
+ content = " ".join(
49
+ part.get("text", "") if isinstance(part, dict) else str(part)
50
+ for part in content
51
+ )
52
+
53
+ # Fallback if still empty
54
+ if not content:
55
+ content = str(last_msg)
56
+
57
+ return content
58
+
59
 
60
  def run_and_submit_all( profile: gr.OAuthProfile | None):
61
  """
init_agent.py CHANGED
@@ -3,37 +3,33 @@ from transformers import pipeline
3
  from typing import Annotated, TypedDict, Optional, Any
4
 
5
  from langgraph.graph import StateGraph, START, END
 
 
6
  from langgraph.graph.message import add_messages
7
-
8
  from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
9
- from langchain_core.messages import AnyMessage
10
- from langchain_community.tools import DuckDuckGoSearchRun
11
  from langchain_core.tools import Tool
 
 
 
12
 
13
  hf_token = os.environ.get("HF_TOKEN")
14
 
15
-
16
- # -----------------------------
17
- # CLASSIFIER
18
- # -----------------------------
19
- def init_classifier():
20
- return pipeline(
21
- "zero-shot-classification",
22
- model="cross-encoder/nli-distilroberta-base"
23
- )
24
-
25
 
26
  # -----------------------------
27
  # CODE LLM TOOL
28
  # -----------------------------
29
- def run_code_llm(prompt: str) -> str:
30
  """Call the coder model directly as a tool."""
31
  coder = HuggingFaceEndpoint(
32
  repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
33
  huggingfacehub_api_token=hf_token
34
  )
35
  chat = ChatHuggingFace(llm=coder, verbose=True)
36
- result = chat.invoke([{"role": "user", "content": prompt}])
37
  return result.content
38
 
39
 
@@ -43,21 +39,44 @@ code_llm_tool = Tool(
43
  func=run_code_llm
44
  )
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  # -----------------------------
48
  # AGENT WRAPPER
49
  # -----------------------------
50
  class CurrentAgent:
51
- def __init__(self):
52
- self.current_llm = HuggingFaceEndpoint(
53
- repo_id="Qwen/Qwen3-VL-8B-Instruct",
54
- huggingfacehub_api_token=hf_token
55
- )
56
- self.current_chat = ChatHuggingFace(
57
- llm=self.current_llm,
58
- verbose=True,
59
- tools=[DuckDuckGoSearchRun(), code_llm_tool]
60
- )
61
 
62
 
63
  # -----------------------------
@@ -69,29 +88,6 @@ class AgentState(TypedDict):
69
  messages: Annotated[list[AnyMessage], add_messages]
70
 
71
 
72
- # -----------------------------
73
- # CLASSIFICATION NODE
74
- # -----------------------------
75
- def classify(state: AgentState) -> AgentState:
76
- classifier = init_classifier()
77
- message = state["messages"][-1].content
78
-
79
- result = classifier(message, ["coding", "not coding"])
80
- label = result["labels"][0]
81
- score = result["scores"][0]
82
-
83
- new_class = "coding" if (label == "coding" and score > 0.6) else "not coding"
84
-
85
- if state["ai_agent"] is None:
86
- state["ai_agent"] = CurrentAgent()
87
-
88
- return {
89
- "ai_agent": state["ai_agent"],
90
- "classification": new_class,
91
- "messages": state["messages"]
92
- }
93
-
94
-
95
  # -----------------------------
96
  # GENERAL ASSISTANT NODE
97
  # -----------------------------
@@ -99,50 +95,74 @@ def general_assistant(state: AgentState) -> AgentState:
99
  if state["ai_agent"] is None:
100
  state["ai_agent"] = CurrentAgent()
101
 
102
- updated = [state["ai_agent"].current_chat.invoke(state["messages"])]
103
 
104
  return {
105
  "ai_agent": state["ai_agent"],
106
  "classification": state["classification"],
107
- "messages": updated
108
  }
109
 
 
 
 
110
 
111
- # -----------------------------
112
- # CODE ASSISTANT NODE
113
- # -----------------------------
114
- def code_assistant(state: AgentState) -> AgentState:
 
 
 
115
  if state["ai_agent"] is None:
116
  state["ai_agent"] = CurrentAgent()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
- # The agent will automatically call the code_llm tool
119
- updated = [state["ai_agent"].current_chat.invoke(state["messages"])]
120
 
121
  return {
122
  "ai_agent": state["ai_agent"],
123
  "classification": state["classification"],
124
- "messages": updated
125
  }
126
 
127
 
128
- # -----------------------------
129
- # ROUTER
130
- # -----------------------------
131
- def route(state: AgentState):
132
- return "code_assistant" if state["classification"] == "coding" else "general_assistant"
133
-
134
-
135
  # -----------------------------
136
  # WORKFLOW
137
  # -----------------------------
138
  def build_workflow() -> Any:
139
  graph = StateGraph(AgentState)
140
-
141
- graph.add_node("classify", classify)
142
  graph.add_node("general_assistant", general_assistant)
143
-
144
- graph.add_edge(START, "classify")
145
- graph.add_edge("classify", "general_assistant")
 
 
 
 
 
 
 
146
  graph.add_edge("general_assistant", END)
147
-
148
  return graph.compile()
 
3
  from typing import Annotated, TypedDict, Optional, Any
4
 
5
  from langgraph.graph import StateGraph, START, END
6
+ from langgraph.prebuilt import tools_condition
7
+ from langgraph.prebuilt import ToolNode
8
  from langgraph.graph.message import add_messages
9
+ # from langchain_google_genai import ChatGoogleGenerativeAI
10
  from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
11
+ from langchain_openai import ChatOpenAI
12
+ from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
13
  from langchain_core.tools import Tool
14
+ from math_tools import add, subtract, multiply, modulus, divide
15
+ from search_tools import wiki_search, web_search, arvix_search, question_search, vector_store
16
+ # from init_models import image_to_text_model
17
 
18
  hf_token = os.environ.get("HF_TOKEN")
19
 
20
+ google_api_key = os.environ.get("GOOGLE_API_KEY")
 
 
 
 
 
 
 
 
 
21
 
22
  # -----------------------------
23
  # CODE LLM TOOL
24
  # -----------------------------
25
+ def run_code_llm(input: str) -> str:
26
  """Call the coder model directly as a tool."""
27
  coder = HuggingFaceEndpoint(
28
  repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
29
  huggingfacehub_api_token=hf_token
30
  )
31
  chat = ChatHuggingFace(llm=coder, verbose=True)
32
+ result = chat.invoke([{"role": "user", "content": input}])
33
  return result.content
34
 
35
 
 
39
  func=run_code_llm
40
  )
41
 
42
+ ## Classify images
43
+
44
+ ## Classify videos
45
+
46
+ ## Classify other items
47
+
48
+
49
+ # def run_image_to_text_llm(prompt: str) -> str:
50
+ # """Call the image to ext model directly as a tool."""
51
+ # raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
52
+
53
+ # # conditional image captioning
54
+ # text = "a photography of"
55
+ # inputs = processor(raw_image, text, return_tensors="pt").to("cuda")
56
+
57
+ # out = model.generate(**inputs)
58
+
59
+
60
+ tools = [
61
+ add,
62
+ code_llm_tool,
63
+ divide,
64
+ subtract,
65
+ multiply,
66
+ modulus,
67
+ arvix_search,
68
+ web_search,
69
+ question_search,
70
+ wiki_search
71
+ ]
72
 
73
  # -----------------------------
74
  # AGENT WRAPPER
75
  # -----------------------------
76
  class CurrentAgent:
77
+ def __init__(self):
78
+ # 1. Define the base endpoint
79
+ self.current_chat = ChatOpenAI(model="gpt-5-nano").bind_tools(tools)
 
 
 
 
 
 
 
80
 
81
 
82
  # -----------------------------
 
88
  messages: Annotated[list[AnyMessage], add_messages]
89
 
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  # -----------------------------
92
  # GENERAL ASSISTANT NODE
93
  # -----------------------------
 
95
  if state["ai_agent"] is None:
96
  state["ai_agent"] = CurrentAgent()
97
 
98
+ response = state["ai_agent"].current_chat.invoke(state["messages"])
99
 
100
  return {
101
  "ai_agent": state["ai_agent"],
102
  "classification": state["classification"],
103
+ "messages": [response] # with add_messages, this will be appended
104
  }
105
 
106
+ # load the system prompt from the file
107
+ with open("system_prompt.txt", "r", encoding="utf-8") as f:
108
+ system_prompt = f.read()
109
 
110
+ # System message
111
+ sys_msg = SystemMessage(content=system_prompt)
112
+
113
+
114
+
115
+ def retriever(state: AgentState):
116
+ """Retriever node"""
117
  if state["ai_agent"] is None:
118
  state["ai_agent"] = CurrentAgent()
119
+
120
+ # Find the latest human message
121
+ user_messages = [m for m in state["messages"] if isinstance(m, HumanMessage)]
122
+ if not user_messages:
123
+ return {"messages": state["messages"]}
124
+
125
+ query = user_messages[-1].content
126
+
127
+ # Perform vector search
128
+ similar_docs = vector_store.similarity_search(query, k=1)
129
+
130
+ if similar_docs:
131
+ context = similar_docs[0].page_content
132
+ response = (
133
+ "Here is a similar question and answer for reference:\n\n"
134
+ f"{context}"
135
+ )
136
+ else:
137
+ response = "No similar questions were found in the vector database."
138
 
139
+ example_msg = HumanMessage(content=response)
 
140
 
141
  return {
142
  "ai_agent": state["ai_agent"],
143
  "classification": state["classification"],
144
+ "messages": state["messages"] + [example_msg]
145
  }
146
 
147
 
 
 
 
 
 
 
 
148
  # -----------------------------
149
  # WORKFLOW
150
  # -----------------------------
151
  def build_workflow() -> Any:
152
  graph = StateGraph(AgentState)
153
+
154
+ graph.add_node("retriever", retriever)
155
  graph.add_node("general_assistant", general_assistant)
156
+ graph.add_node("tools", ToolNode(tools))
157
+
158
+ graph.add_edge(START, "retriever")
159
+ graph.add_edge("retriever", "general_assistant")
160
+ graph.add_conditional_edges(
161
+ "general_assistant",
162
+ tools_condition,
163
+ )
164
+ graph.add_edge("tools", "general_assistant")
165
+
166
  graph.add_edge("general_assistant", END)
167
+
168
  return graph.compile()
math_tools.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+
3
+ @tool
4
+ def multiply(a: int, b: int) -> int:
5
+ """Multiply two numbers.
6
+ Args:
7
+ a: first int
8
+ b: second int
9
+ """
10
+ return a * b
11
+
12
+ @tool
13
+ def add(a: int, b: int) -> int:
14
+ """Add two numbers.
15
+
16
+ Args:
17
+ a: first int
18
+ b: second int
19
+ """
20
+ return a + b
21
+
22
+ @tool
23
+ def subtract(a: int, b: int) -> int:
24
+ """Subtract two numbers.
25
+
26
+ Args:
27
+ a: first int
28
+ b: second int
29
+ """
30
+ return a - b
31
+
32
+ @tool
33
+ def divide(a: int, b: int) -> int:
34
+ """Divide two numbers.
35
+
36
+ Args:
37
+ a: first int
38
+ b: second int
39
+ """
40
+ if b == 0:
41
+ raise ValueError("Cannot divide by zero.")
42
+ return a / b
43
+
44
+ @tool
45
+ def modulus(a: int, b: int) -> int:
46
+ """Get the modulus of two numbers.
47
+
48
+ Args:
49
+ a: first int
50
+ b: second int
51
+ """
52
+ return a % b
requirements.txt CHANGED
@@ -1,11 +1,22 @@
1
  gradio
 
 
2
  requests
3
  transformers
 
4
  torch
5
  langgraph
6
  langchain
7
  langchain_core
8
  langchain_community
9
  langchain_huggingface
 
10
  langchain_tools
11
  huggingface-hub
 
 
 
 
 
 
 
 
1
  gradio
2
+ gradio[oauth]
3
+ python-dotenv
4
  requests
5
  transformers
6
+ sentence_transformers
7
  torch
8
  langgraph
9
  langchain
10
  langchain_core
11
  langchain_community
12
  langchain_huggingface
13
+ langchain_openai
14
  langchain_tools
15
  huggingface-hub
16
+ wikipedia
17
+ arxiv
18
+ supabase==1.0.3
19
+ chromadb
20
+ tavily-python
21
+ langchain-tavily
22
+
search_tools.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # import chromadb
3
+ from dotenv import load_dotenv
4
+ from langchain_core.tools import tool
5
+ from langchain_tavily import TavilySearch
6
+ from langchain_community.document_loaders import WikipediaLoader
7
+ from langchain_community.document_loaders import ArxivLoader
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_community.vectorstores import SupabaseVectorStore
10
+ from supabase.client import Client, create_client
11
+ from langchain_core.tools import create_retriever_tool
12
+
13
+ load_dotenv()
14
+
15
+ @tool
16
+ def wiki_search(input: str) -> str:
17
+ """Search Wikipedia for a query and return maximum 2 results.
18
+
19
+ Args:
20
+ query: The search query."""
21
+ search_docs = WikipediaLoader(query=input, load_max_docs=2).load()
22
+ formatted_search_docs = "\n\n---\n\n".join(
23
+ [
24
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
25
+ for doc in search_docs
26
+ ])
27
+ return formatted_search_docs
28
+
29
+ @tool
30
+ def web_search(input: str) -> str:
31
+ """Search Tavily for a query and return maximum 3 results."""
32
+ results = TavilySearch(max_results=3).invoke(input)
33
+
34
+ formatted_items = []
35
+ for item in results:
36
+ # Case 1: item is a dict (new Tavily format)
37
+ if isinstance(item, dict):
38
+ url = item.get("url", "")
39
+ content = item.get("content", "")
40
+ formatted_items.append(
41
+ f'<Document source="{url}"/>\n{content}\n</Document>'
42
+ )
43
+ # Case 2: item is a string (fallback format)
44
+ else:
45
+ formatted_items.append(
46
+ f'<Document source=""/>\n{str(item)}\n</Document>'
47
+ )
48
+
49
+ return "\n\n---\n\n".join(formatted_items)
50
+
51
+ @tool
52
+ def arvix_search(input: str) -> str:
53
+ """Search Arxiv for a query and return maximum 3 result.
54
+
55
+ Args:
56
+ query: The search query."""
57
+ search_docs = ArxivLoader(query=input, load_max_docs=3).load()
58
+ formatted_search_docs = "\n\n---\n\n".join(
59
+ [
60
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
61
+ for doc in search_docs
62
+ ])
63
+ return formatted_search_docs
64
+
65
+
66
+ # Build embeddings
67
+ embeddings = HuggingFaceEmbeddings(
68
+ model_name="sentence-transformers/all-mpnet-base-v2"
69
+ )
70
+
71
+ # Connect to Supabase
72
+ supabase_url = os.environ["SUPABASE_URL"]
73
+ supabase_service_key = os.environ["SUPABASE_SERVICE_KEY"]
74
+ supabase = create_client(supabase_url, supabase_service_key)
75
+
76
+ # Create Supabase vector store
77
+ vector_store = SupabaseVectorStore(
78
+ client=supabase,
79
+ embedding=embeddings,
80
+ table_name="documents", # your table
81
+ query_name="match_documents_langchain" # your RPC function
82
+ )
83
+
84
+ # Convert to retriever
85
+ retriever = vector_store.as_retriever()
86
+
87
+ @tool
88
+ def question_search(input: str):
89
+ """Retrieve similar questions from Supabase vector store."""
90
+ docs = retriever.invoke(input)
91
+ return "\n\n".join([d.page_content for d in docs])
sql/match_documents_langchain.sql ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ create or replace function match_documents_langchain(
2
+ query_embedding vector(768),
3
+ match_count int default 5
4
+ )
5
+ returns table (
6
+ id uuid,
7
+ content text,
8
+ metadata json,
9
+ similarity float
10
+ )
11
+ language plpgsql
12
+ as $$
13
+ begin
14
+ return query
15
+ select
16
+ documents.id,
17
+ documents.content,
18
+ documents.metadata,
19
+ 1 - (documents.embedding <=> query_embedding) as similarity
20
+ from documents
21
+ order by documents.embedding <=> query_embedding
22
+ limit match_count;
23
+ end;
24
+ $$;
supabase-data.csv ADDED
The diff for this file is too large to render. See raw diff
 
system_prompt.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ You are a helpful assistant tasked with answering questions using a set of tools.
2
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
3
+ FINAL ANSWER: [YOUR FINAL ANSWER].
4
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
5
+ Your answer should only start with "FINAL ANSWER: ", then follows with the answer.