Spaces:

alialhaddad
/

FinalAssignment-AliA

Sleeping

App Files Files Community

AliA1997 commited on Jan 17

Commit

a6dbfdf

1 Parent(s): dd75c3c

Completed Final Assignment for Huggingface Agents Course

Browse files

Files changed (9) hide show

.gitignore +2 -0
app.py +36 -5
init_agent.py +90 -70
math_tools.py +52 -0
requirements.txt +11 -0
search_tools.py +91 -0
sql/match_documents_langchain.sql +24 -0
supabase-data.csv +0 -0
system_prompt.txt +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+ __pycache__

app.py CHANGED Viewed

@@ -6,25 +6,56 @@ import pandas as pd
 from init_agent import build_workflow
 from langchain_core.messages import HumanMessage
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     """A langgraph agent."""
     workflow: Optional[Any]
     def __init__(self):
         print("BasicAgent initialized.")
         self.workflow = build_workflow()
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         messages = [HumanMessage(content=question)]
-        result = self.workflow.invoke({"messages": messages})
-        answer = result['messages'][-1].content
-        return answer  # kein [14:] mehr nötig!
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 from init_agent import build_workflow
 from langchain_core.messages import HumanMessage
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 class BasicAgent:
     """A langgraph agent."""
     workflow: Optional[Any]
     def __init__(self):
         print("BasicAgent initialized.")
         self.workflow = build_workflow()
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # Always wrap the question as a HumanMessage
         messages = [HumanMessage(content=question)]
+        # Run the workflow
+        result = self.workflow.invoke({
+            "ai_agent": None,
+            "messages": messages,
+            "classification": "not coding"
+        })
+        # --- FIX: safely extract the final answer ---
+        final_messages = result.get("messages", [])
+        if not final_messages:
+            return "No answer produced."
+        last_msg = final_messages[-1]
+        # LangChain messages always have .content, but sometimes it's a list or None
+        content = getattr(last_msg, "content", None)
+        # If content is a list (Gemini, some HF models), flatten it
+        if isinstance(content, list):
+            content = " ".join(
+                part.get("text", "") if isinstance(part, dict) else str(part)
+                for part in content
+            )
+        # Fallback if still empty
+        if not content:
+            content = str(last_msg)
+        return content
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

init_agent.py CHANGED Viewed

@@ -3,37 +3,33 @@ from transformers import pipeline
 from typing import Annotated, TypedDict, Optional, Any
 from langgraph.graph import StateGraph, START, END
 from langgraph.graph.message import add_messages
 from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
-from langchain_core.messages import AnyMessage
-from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.tools import Tool
 hf_token = os.environ.get("HF_TOKEN")
-# -----------------------------
-# CLASSIFIER
-# -----------------------------
-def init_classifier():
-    return pipeline(
-        "zero-shot-classification",
-        model="cross-encoder/nli-distilroberta-base"
-    )
 # -----------------------------
 # CODE LLM TOOL
 # -----------------------------
-def run_code_llm(prompt: str) -> str:
     """Call the coder model directly as a tool."""
     coder = HuggingFaceEndpoint(
         repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
         huggingfacehub_api_token=hf_token
     )
     chat = ChatHuggingFace(llm=coder, verbose=True)
-    result = chat.invoke([{"role": "user", "content": prompt}])
     return result.content
@@ -43,21 +39,44 @@ code_llm_tool = Tool(
     func=run_code_llm
 )
 # -----------------------------
 # AGENT WRAPPER
 # -----------------------------
 class CurrentAgent:
-    def __init__(self):
-        self.current_llm = HuggingFaceEndpoint(
-            repo_id="Qwen/Qwen3-VL-8B-Instruct",
-            huggingfacehub_api_token=hf_token
-        )
-        self.current_chat = ChatHuggingFace(
-            llm=self.current_llm,
-            verbose=True,
-            tools=[DuckDuckGoSearchRun(), code_llm_tool]
-        )
 # -----------------------------
@@ -69,29 +88,6 @@ class AgentState(TypedDict):
     messages: Annotated[list[AnyMessage], add_messages]
-# -----------------------------
-# CLASSIFICATION NODE
-# -----------------------------
-def classify(state: AgentState) -> AgentState:
-    classifier = init_classifier()
-    message = state["messages"][-1].content
-    result = classifier(message, ["coding", "not coding"])
-    label = result["labels"][0]
-    score = result["scores"][0]
-    new_class = "coding" if (label == "coding" and score > 0.6) else "not coding"
-    if state["ai_agent"] is None:
-        state["ai_agent"] = CurrentAgent()
-    return {
-        "ai_agent": state["ai_agent"],
-        "classification": new_class,
-        "messages": state["messages"]
-    }
 # -----------------------------
 # GENERAL ASSISTANT NODE
 # -----------------------------
@@ -99,50 +95,74 @@ def general_assistant(state: AgentState) -> AgentState:
     if state["ai_agent"] is None:
         state["ai_agent"] = CurrentAgent()
-    updated = [state["ai_agent"].current_chat.invoke(state["messages"])]
     return {
         "ai_agent": state["ai_agent"],
         "classification": state["classification"],
-        "messages": updated
     }
-# -----------------------------
-# CODE ASSISTANT NODE
-# -----------------------------
-def code_assistant(state: AgentState) -> AgentState:
     if state["ai_agent"] is None:
         state["ai_agent"] = CurrentAgent()
-    # The agent will automatically call the code_llm tool
-    updated = [state["ai_agent"].current_chat.invoke(state["messages"])]
     return {
         "ai_agent": state["ai_agent"],
         "classification": state["classification"],
-        "messages": updated
     }
-# -----------------------------
-# ROUTER
-# -----------------------------
-def route(state: AgentState):
-    return "code_assistant" if state["classification"] == "coding" else "general_assistant"
 # -----------------------------
 # WORKFLOW
 # -----------------------------
 def build_workflow() -> Any:
     graph = StateGraph(AgentState)
-    graph.add_node("classify", classify)
     graph.add_node("general_assistant", general_assistant)
-    graph.add_edge(START, "classify")
-    graph.add_edge("classify", "general_assistant")
     graph.add_edge("general_assistant", END)
     return graph.compile()

 from typing import Annotated, TypedDict, Optional, Any
 from langgraph.graph import StateGraph, START, END
+from langgraph.prebuilt import tools_condition
+from langgraph.prebuilt import ToolNode
 from langgraph.graph.message import add_messages
+# from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
 from langchain_core.tools import Tool
+from math_tools import add, subtract, multiply, modulus, divide
+from search_tools import wiki_search, web_search, arvix_search, question_search, vector_store
+# from init_models import image_to_text_model
 hf_token = os.environ.get("HF_TOKEN")
+google_api_key = os.environ.get("GOOGLE_API_KEY")
 # -----------------------------
 # CODE LLM TOOL
 # -----------------------------
+def run_code_llm(input: str) -> str:
     """Call the coder model directly as a tool."""
     coder = HuggingFaceEndpoint(
         repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
         huggingfacehub_api_token=hf_token
     )
     chat = ChatHuggingFace(llm=coder, verbose=True)
+    result = chat.invoke([{"role": "user", "content": input}])
     return result.content
     func=run_code_llm
 )
+## Classify images
+## Classify videos
+## Classify other items
+# def run_image_to_text_llm(prompt: str) -> str:
+#     """Call the image to ext model directly as a tool."""
+#     raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
+#     # conditional image captioning
+#     text = "a photography of"
+#     inputs = processor(raw_image, text, return_tensors="pt").to("cuda")
+#     out = model.generate(**inputs)
+tools = [
+    add,
+    code_llm_tool,
+    divide,
+    subtract,
+    multiply,
+    modulus,
+    arvix_search,
+    web_search,
+    question_search,
+    wiki_search
+]
 # -----------------------------
 # AGENT WRAPPER
 # -----------------------------
 class CurrentAgent:
+    def __init__(self):
+        # 1. Define the base endpoint
+        self.current_chat = ChatOpenAI(model="gpt-5-nano").bind_tools(tools)
 # -----------------------------
     messages: Annotated[list[AnyMessage], add_messages]
 # -----------------------------
 # GENERAL ASSISTANT NODE
 # -----------------------------
     if state["ai_agent"] is None:
         state["ai_agent"] = CurrentAgent()
+    response = state["ai_agent"].current_chat.invoke(state["messages"])
     return {
         "ai_agent": state["ai_agent"],
         "classification": state["classification"],
+        "messages": [response]  # with add_messages, this will be appended
     }
+# load the system prompt from the file
+with open("system_prompt.txt", "r", encoding="utf-8") as f:
+    system_prompt = f.read()
+# System message
+sys_msg = SystemMessage(content=system_prompt)
+def retriever(state: AgentState):
+    """Retriever node"""
     if state["ai_agent"] is None:
         state["ai_agent"] = CurrentAgent()
+    # Find the latest human message
+    user_messages = [m for m in state["messages"] if isinstance(m, HumanMessage)]
+    if not user_messages:
+        return {"messages": state["messages"]}
+    query = user_messages[-1].content
+    # Perform vector search
+    similar_docs = vector_store.similarity_search(query, k=1)
+    if similar_docs:
+        context = similar_docs[0].page_content
+        response = (
+            "Here is a similar question and answer for reference:\n\n"
+            f"{context}"
+        )
+    else:
+        response = "No similar questions were found in the vector database."
+    example_msg = HumanMessage(content=response)
     return {
         "ai_agent": state["ai_agent"],
         "classification": state["classification"],
+        "messages": state["messages"] + [example_msg]
     }
 # -----------------------------
 # WORKFLOW
 # -----------------------------
 def build_workflow() -> Any:
     graph = StateGraph(AgentState)
+    graph.add_node("retriever", retriever)
     graph.add_node("general_assistant", general_assistant)
+    graph.add_node("tools", ToolNode(tools))
+    graph.add_edge(START, "retriever")
+    graph.add_edge("retriever", "general_assistant")
+    graph.add_conditional_edges(
+        "general_assistant",
+        tools_condition,
+    )
+    graph.add_edge("tools", "general_assistant")
     graph.add_edge("general_assistant", END)
     return graph.compile()

math_tools.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from langchain_core.tools import tool
+@tool
+def multiply(a: int, b: int) -> int:
+    """Multiply two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a * b
+@tool
+def add(a: int, b: int) -> int:
+    """Add two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a + b
+@tool
+def subtract(a: int, b: int) -> int:
+    """Subtract two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a - b
+@tool
+def divide(a: int, b: int) -> int:
+    """Divide two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    if b == 0:
+        raise ValueError("Cannot divide by zero.")
+    return a / b
+@tool
+def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a % b

requirements.txt CHANGED Viewed

@@ -1,11 +1,22 @@
 gradio
 requests
 transformers
 torch
 langgraph
 langchain
 langchain_core
 langchain_community
 langchain_huggingface
 langchain_tools
 huggingface-hub

 gradio
+gradio[oauth]
+python-dotenv
 requests
 transformers
+sentence_transformers
 torch
 langgraph
 langchain
 langchain_core
 langchain_community
 langchain_huggingface
+langchain_openai
 langchain_tools
 huggingface-hub
+wikipedia
+arxiv
+supabase==1.0.3
+chromadb
+tavily-python
+langchain-tavily

search_tools.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+# import chromadb
+from dotenv import load_dotenv
+from langchain_core.tools import tool
+from langchain_tavily import TavilySearch
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.document_loaders import ArxivLoader
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import SupabaseVectorStore
+from supabase.client import Client, create_client
+from langchain_core.tools import create_retriever_tool
+load_dotenv()
+@tool
+def wiki_search(input: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results.
+    Args:
+        query: The search query."""
+    search_docs = WikipediaLoader(query=input, load_max_docs=2).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ])
+    return formatted_search_docs
+@tool
+def web_search(input: str) -> str:
+    """Search Tavily for a query and return maximum 3 results."""
+    results = TavilySearch(max_results=3).invoke(input)
+    formatted_items = []
+    for item in results:
+        # Case 1: item is a dict (new Tavily format)
+        if isinstance(item, dict):
+            url = item.get("url", "")
+            content = item.get("content", "")
+            formatted_items.append(
+                f'<Document source="{url}"/>\n{content}\n</Document>'
+            )
+        # Case 2: item is a string (fallback format)
+        else:
+            formatted_items.append(
+                f'<Document source=""/>\n{str(item)}\n</Document>'
+            )
+    return "\n\n---\n\n".join(formatted_items)
+@tool
+def arvix_search(input: str) -> str:
+    """Search Arxiv for a query and return maximum 3 result.
+    Args:
+        query: The search query."""
+    search_docs = ArxivLoader(query=input, load_max_docs=3).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+            for doc in search_docs
+        ])
+    return formatted_search_docs
+# Build embeddings
+embeddings = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-mpnet-base-v2"
+)
+# Connect to Supabase
+supabase_url = os.environ["SUPABASE_URL"]
+supabase_service_key = os.environ["SUPABASE_SERVICE_KEY"]
+supabase = create_client(supabase_url, supabase_service_key)
+# Create Supabase vector store
+vector_store = SupabaseVectorStore(
+    client=supabase,
+    embedding=embeddings,
+    table_name="documents",                 # your table
+    query_name="match_documents_langchain"  # your RPC function
+)
+# Convert to retriever
+retriever = vector_store.as_retriever()
+@tool
+def question_search(input: str):
+    """Retrieve similar questions from Supabase vector store."""
+    docs = retriever.invoke(input)
+    return "\n\n".join([d.page_content for d in docs])

sql/match_documents_langchain.sql ADDED Viewed

	@@ -0,0 +1,24 @@

+create or replace function match_documents_langchain(
+  query_embedding vector(768),
+  match_count int default 5
+)
+returns table (
+  id uuid,
+  content text,
+  metadata json,
+  similarity float
+)
+language plpgsql
+as $$
+begin
+  return query
+  select
+    documents.id,
+    documents.content,
+    documents.metadata,
+    1 - (documents.embedding <=> query_embedding) as similarity
+  from documents
+  order by documents.embedding <=> query_embedding
+  limit match_count;
+end;
+$$;

supabase-data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

system_prompt.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+You are a helpful assistant tasked with answering questions using a set of tools.
+Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
+FINAL ANSWER: [YOUR FINAL ANSWER].
+YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+Your answer should only start with "FINAL ANSWER: ", then follows with the answer.