agentic-workflow

Sleeping

App Files Files Community

raaec commited on May 5, 2025

Commit

5d520be

verified ·

1 Parent(s): 7ee22fe

Update agent.py

Browse files

Files changed (1) hide show

agent.py +380 -270

agent.py CHANGED Viewed

@@ -1,13 +1,38 @@
 import os
 import logging
-from typing import Tuple, List, Dict, Any, Optional
-import gradio as gr
-import requests
-import pandas as pd
-from langchain_core.messages import HumanMessage
-from agent import build_graph
 # Configure logging
 logging.basicConfig(
@@ -17,339 +42,424 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-REQUEST_TIMEOUT = 60  # seconds
-class BasicAgent:
-    """
-    A LangGraph-based agent that answers questions using a graph-based workflow.
-    This agent takes natural language questions, processes them through a
-    predefined graph workflow, and returns the answer.
-    Attributes:
-        graph: The LangGraph workflow that processes the questions
-    """
-    def __init__(self):
-        """Initialize the agent with a graph-based workflow."""
-        logger.info("Initializing BasicAgent")
-        self.graph = build_graph()
-    def __call__(self, question: str) -> str:
-        """
-        Process a question and return an answer.
-        Args:
-            question: The natural language question to process
-        Returns:
-            The agent's answer to the question
-        """
-        logger.info(f"Processing question (first 50 chars): {question[:50]}...")
-        # Wrap the question in a HumanMessage from langchain_core
-        messages = [HumanMessage(content=question)]
-        # Process through the graph
-        messages = self.graph.invoke({"messages": messages})
-        # Extract and clean the answer
-        answer = messages['messages'][-1].content
-        # Remove the "FINAL ANSWER:" prefix if present
-        return answer[14:] if answer.startswith("FINAL ANSWER:") else answer
-def fetch_questions(api_url: str) -> List[Dict[str, Any]]:
-    """
-    Fetch questions from the evaluation server.
     Args:
-        api_url: Base URL of the evaluation API
     Returns:
-        List of question data dictionaries
-    Raises:
-        requests.exceptions.RequestException: If there's an error fetching questions
     """
-    questions_url = f"{api_url}/questions"
-    logger.info(f"Fetching questions from: {questions_url}")
-    response = requests.get(questions_url, timeout=REQUEST_TIMEOUT)
-    response.raise_for_status()
-    questions_data = response.json()
-    if not questions_data:
-        raise ValueError("Fetched questions list is empty or invalid format")
-    logger.info(f"Successfully fetched {len(questions_data)} questions")
-    return questions_data
-def run_agent_on_questions(
-    agent: BasicAgent,
-    questions_data: List[Dict[str, Any]]
-) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
-    """
-    Run the agent on a list of questions.
     Args:
-        agent: The agent to run
-        questions_data: List of question data dictionaries
     Returns:
-        Tuple of (answers_payload, results_log)
     """
-    results_log = []
-    answers_payload = []
-    logger.info(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            logger.warning(f"Skipping item with missing task_id or question: {item}")
-            continue
-        try:
-            submitted_answer = agent(question_text)
-            # Prepare answer for submission
-            answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": submitted_answer
-            })
-            # Log result for display
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text,
-                "Submitted Answer": submitted_answer
-            })
-        except Exception as e:
-            logger.error(f"Error running agent on task {task_id}: {e}", exc_info=True)
-            # Log error in results
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text,
-                "Submitted Answer": f"AGENT ERROR: {e}"
-            })
-    return answers_payload, results_log
-def submit_answers(
-    api_url: str,
-    username: str,
-    agent_code: str,
-    answers_payload: List[Dict[str, Any]]
-) -> Dict[str, Any]:
     """
-    Submit answers to the evaluation server.
     Args:
-        api_url: Base URL of the evaluation API
-        username: Hugging Face username
-        agent_code: URL to the agent code repository
-        answers_payload: List of answer dictionaries
     Returns:
-        Response data from the server
     Raises:
-        requests.exceptions.RequestException: If there's an error during submission
     """
-    submit_url = f"{api_url}/submit"
-    # Prepare submission data
-    submission_data = {
-        "username": username.strip(),
-        "agent_code": agent_code,
-        "answers": answers_payload
-    }
-    logger.info(f"Submitting {len(answers_payload)} answers to: {submit_url}")
-    # Submit answers
-    response = requests.post(submit_url, json=submission_data, timeout=REQUEST_TIMEOUT)
-    response.raise_for_status()
-    result_data = response.json()
-    logger.info("Submission successful")
-    return result_data
-def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None) -> Tuple[str, pd.DataFrame]:
-    """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results.
     Args:
-        profile: Gradio OAuth profile containing user information
     Returns:
-        Tuple of (status_message, results_dataframe)
     """
-    # Check if user is logged in
-    if not profile:
-        logger.warning("User not logged in")
-        return "Please Login to Hugging Face with the button.", None
-    username = profile.username
-    logger.info(f"User logged in: {username}")
-    # Get the space ID for linking to code
-    space_id = os.getenv("SPACE_ID")
-    api_url = DEFAULT_API_URL
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
-        # 1. Instantiate Agent
-        agent = BasicAgent()
-        # 2. Fetch Questions
-        questions_data = fetch_questions(api_url)
-        # 3. Run Agent on Questions
-        answers_payload, results_log = run_agent_on_questions(agent, questions_data)
-        if not answers_payload:
-            logger.warning("Agent did not produce any answers to submit")
-            return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-        # 4. Submit Answers
-        result_data = submit_answers(api_url, username, agent_code, answers_payload)
-        # 5. Format and Return Results
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
         )
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        # Handle HTTP errors with detailed error information
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        logger.error(status_message)
-        results_df = pd.DataFrame(results_log if 'results_log' in locals() else [])
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = f"Submission Failed: The request timed out after {REQUEST_TIMEOUT} seconds"
-        logger.error(status_message)
-        results_df = pd.DataFrame(results_log if 'results_log' in locals() else [])
-        return status_message, results_df
-    except Exception as e:
-        status_message = f"An unexpected error occurred: {str(e)}"
-        logger.error(status_message, exc_info=True)
-        results_df = pd.DataFrame(results_log if 'results_log' in locals() else [])
-        return status_message, results_df
-def create_gradio_interface() -> gr.Blocks:
     """
-    Create and configure the Gradio interface.
     Returns:
-        Configured Gradio Blocks interface
     """
-    with gr.Blocks() as demo:
-        gr.Markdown("# Agent Evaluation Runner")
-        gr.Markdown(
-            """
-            ## Instructions
-            1. **Clone this space** and modify the code to define your agent's logic, tools, and dependencies
-            2. **Log in to your Hugging Face account** using the button below (required for submission)
-            3. **Run Evaluation** to fetch questions, run your agent, and submit answers
-            ## Important Notes
-            - The evaluation process may take several minutes to complete
-            - This agent framework is intentionally minimal to allow for your own improvements
-            - Consider implementing caching or async processing for better performance
-            """
         )
-        gr.LoginButton()
-        run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
-        status_output = gr.Textbox(
-            label="Run Status / Submission Result",
-            lines=5,
-            interactive=False
-        )
-        results_table = gr.DataFrame(
-            label="Questions and Agent Answers",
-            wrap=True
         )
-        run_button.click(
-            fn=run_and_submit_all,
-            outputs=[status_output, results_table]
-        )
-    return demo
-def check_environment() -> None:
     """
-    Check and log environment variables at startup.
     """
-    logger.info("-" * 30 + " App Starting " + "-" * 30)
-    # Check for SPACE_HOST
-    space_host = os.getenv("SPACE_HOST")
-    if space_host:
-        logger.info(f"✅ SPACE_HOST found: {space_host}")
-        logger.info(f"   Runtime URL should be: https://{space_host}.hf.space")
-    else:
-        logger.info("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    # Check for SPACE_ID
-    space_id = os.getenv("SPACE_ID")
-    if space_id:
-        logger.info(f"✅ SPACE_ID found: {space_id}")
-        logger.info(f"   Repo URL: https://huggingface.co/spaces/{space_id}")
-        logger.info(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id}/tree/main")
-    else:
-        logger.info("ℹ️  SPACE_ID environment variable not found (running locally?).")
-    logger.info("-" * (60 + len(" App Starting ")) + "\n")
 if __name__ == "__main__":
-    # Check environment at startup
-    check_environment()
-    # Create and launch Gradio interface
-    logger.info("Launching Gradio Interface for Agent Evaluation...")
-    demo = create_gradio_interface()
-    demo.launch(debug=True, share=False)

+"""
+LLM Agent Graph Implementation
+=============================
+This module defines a graph-based LLM agent workflow with various tools and retrieval capabilities.
+The agent can:
+- Perform mathematical operations
+- Search Wikipedia, web, and arXiv
+- Retrieve similar questions from a vector database
+- Process user queries using different LLM providers
+Components:
+- Tool definitions: Math operations, search tools
+- Vector database retrieval
+- Graph construction with different LLM options
+- Workflow management with LangGraph
+"""
 import os
 import logging
+from typing import Dict, List, Union, Optional, Any, Callable
+from dotenv import load_dotenv
+from langgraph.graph import START, StateGraph, MessagesState
+from langgraph.prebuilt import tools_condition, ToolNode
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_groq import ChatGroq
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
+from langchain_community.vectorstores import SupabaseVectorStore
+from langchain_core.messages import SystemMessage, HumanMessage
+from langchain_core.tools import tool
+from langchain.tools.retriever import create_retriever_tool
+from supabase.client import Client, create_client
 # Configure logging
 logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
+# Load environment variables
+load_dotenv()
+# ===================
+# Math Operation Tools
+# ===================
+@tool
+def multiply(a: int, b: int) -> int:
+    """Multiply two integers and return the result.
+    Args:
+        a: First integer to multiply
+        b: Second integer to multiply
+    Returns:
+        The product of a and b
+    """
+    return a * b
+@tool
+def add(a: int, b: int) -> int:
+    """Add two integers and return the result.
     Args:
+        a: First integer to add
+        b: Second integer to add
     Returns:
+        The sum of a and b
     """
+    return a + b
+@tool
+def subtract(a: int, b: int) -> int:
+    """Subtract the second integer from the first and return the result.
     Args:
+        a: Integer to subtract from
+        b: Integer to subtract
     Returns:
+        The difference (a - b)
     """
+    return a - b
+@tool
+def divide(a: int, b: int) -> float:
+    """Divide the first integer by the second and return the result.
+    Args:
+        a: Numerator (dividend)
+        b: Denominator (divisor)
+    Returns:
+        The quotient (a / b) as a float
+    Raises:
+        ValueError: If b is zero (division by zero)
     """
+    if b == 0:
+        raise ValueError("Cannot divide by zero.")
+    return a / b
+@tool
+def modulus(a: int, b: int) -> int:
+    """Calculate the remainder when the first integer is divided by the second.
     Args:
+        a: Dividend
+        b: Divisor
     Returns:
+        The remainder of a divided by b
     Raises:
+        ValueError: If b is zero (modulo by zero)
     """
+    if b == 0:
+        raise ValueError("Cannot calculate modulus with divisor zero.")
+    return a % b
+# ===================
+# Search Tools
+# ===================
+@tool
+def wiki_search(query: str) -> Dict[str, str]:
+    """Search Wikipedia for a query and return formatted results.
     Args:
+        query: The search term to look up on Wikipedia
     Returns:
+        Dictionary with formatted Wikipedia search results
     """
+    logger.info(f"Searching Wikipedia for: {query}")
     try:
+        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+        if not search_docs:
+            return {"wiki_results": "No Wikipedia results found for this query."}
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+                for doc in search_docs
+            ]
+        )
+        logger.info(f"Found {len(search_docs)} Wikipedia results")
+        return {"wiki_results": formatted_search_docs}
+    except Exception as e:
+        logger.error(f"Error searching Wikipedia: {e}", exc_info=True)
+        return {"wiki_results": f"Error searching Wikipedia: {str(e)}"}
+@tool
+def web_search(query: str) -> Dict[str, str]:
+    """Search the web using Tavily for a query and return formatted results.
+    Args:
+        query: The search term to look up on the web
+    Returns:
+        Dictionary with formatted web search results
+    """
+    logger.info(f"Searching the web for: {query}")
+    try:
+        search_results = TavilySearchResults(max_results=3).invoke(query=query)
+        if not search_results:
+            return {"web_results": "No web results found for this query."}
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{result["url"]}">\n{result["content"]}\n</Document>'
+                for result in search_results
+            ]
         )
+        logger.info(f"Found {len(search_results)} web search results")
+        return {"web_results": formatted_search_docs}
+    except Exception as e:
+        logger.error(f"Error searching the web: {e}", exc_info=True)
+        return {"web_results": f"Error searching the web: {str(e)}"}
+@tool
+def arxiv_search(query: str) -> Dict[str, str]:
+    """Search arXiv for academic papers and return formatted results.
+    Args:
+        query: The search term to look up on arXiv
+    Returns:
+        Dictionary with formatted arXiv search results
+    """
+    logger.info(f"Searching arXiv for: {query}")
+    try:
+        search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+        if not search_docs:
+            return {"arxiv_results": "No arXiv results found for this query."}
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.metadata["entry_id"]}" title="{doc.metadata.get("Title", "")}">\n{doc.page_content[:1000]}\n</Document>'
+                for doc in search_docs
+            ]
+        )
+        logger.info(f"Found {len(search_docs)} arXiv results")
+        return {"arxiv_results": formatted_search_docs}
+    except Exception as e:
+        logger.error(f"Error searching arXiv: {e}", exc_info=True)
+        return {"arxiv_results": f"Error searching arXiv: {str(e)}"}
+# ===================
+# Vector Store Setup
+# ===================
+def setup_vector_store() -> SupabaseVectorStore:
     """
+    Set up and configure the Supabase vector store for question retrieval.
     Returns:
+        Configured SupabaseVectorStore instance
+    Raises:
+        ValueError: If required environment variables are missing
     """
+    # Check for required environment variables
+    supabase_url = os.environ.get("SUPABASE_URL")
+    supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
+    if not supabase_url or not supabase_key:
+        raise ValueError(
+            "Missing required environment variables: SUPABASE_URL and/or SUPABASE_SERVICE_KEY"
         )
+    # Initialize embeddings model
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
+    # Initialize Supabase client
+    supabase_client: Client = create_client(supabase_url, supabase_key)
+    # Create vector store
+    vector_store = SupabaseVectorStore(
+        client=supabase_client,
+        embedding=embeddings,
+        table_name="documents",
+        query_name="match_documents_langchain",
+    )
+    logger.info("Vector store initialized successfully")
+    return vector_store
+# ===================
+# LLM Provider Setup
+# ===================
+def get_llm(provider: str = "groq"):
+    """
+    Initialize and return an LLM based on the specified provider.
+    Args:
+        provider: The LLM provider to use ('google', 'groq', or 'huggingface')
+    Returns:
+        Initialized LLM instance
+    Raises:
+        ValueError: If an invalid provider is specified
+    """
+    if provider == "google":
+        logger.info("Using Google Gemini as LLM provider")
+        return ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
+    elif provider == "groq":
+        logger.info("Using Groq as LLM provider with qwen-qwq-32b model")
+        return ChatGroq(model="qwen-qwq-32b", temperature=0)
+    elif provider == "huggingface":
+        logger.info("Using Hugging Face as LLM provider with llama-2-7b-chat-hf model")
+        return ChatHuggingFace(
+            llm=HuggingFaceEndpoint(
+                url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
+                temperature=0,
+            ),
         )
+    else:
+        available_providers = ['google', 'groq', 'huggingface']
+        raise ValueError(f"Invalid provider: '{provider}'. Choose from {available_providers}")
+# ===================
+# Graph Building
+# ===================
+def build_graph(provider: str = "groq"):
     """
+    Build and compile the agent workflow graph.
+    This function creates a LangGraph workflow that includes:
+    - A retriever node to find similar questions
+    - An assistant node that uses an LLM to generate responses
+    - A tools node for executing various tools
+    Args:
+        provider: The LLM provider to use ('google', 'groq', or 'huggingface')
+    Returns:
+        Compiled StateGraph ready for execution
     """
+    logger.info(f"Building agent graph with {provider} as LLM provider")
+    # Load system prompt
+    try:
+        with open("system_prompt.txt", "r", encoding="utf-8") as f:
+            system_prompt = f.read()
+            logger.info("Loaded system prompt from file")
+    except FileNotFoundError:
+        system_prompt = """You are a helpful AI assistant that answers questions accurately and concisely.
+Use the available tools when appropriate to find information or perform calculations.
+Always cite your sources when you use search tools."""
+        logger.warning("system_prompt.txt not found, using default system prompt")
+    # Initialize system message
+    sys_msg = SystemMessage(content=system_prompt)
+    # Set up vector store and retriever tool
+    try:
+        vector_store = setup_vector_store()
+        retriever_tool = create_retriever_tool(
+            retriever=vector_store.as_retriever(),
+            name="Question Search",
+            description="A tool to retrieve similar questions from a vector store.",
+        )
+        logger.info("Vector store retrieval tool initialized")
+    except Exception as e:
+        logger.error(f"Failed to set up vector store: {e}", exc_info=True)
+        retriever_tool = None
+    # Define available tools
+    tools = [
+        multiply,
+        add,
+        subtract,
+        divide,
+        modulus,
+        wiki_search,
+        web_search,
+        arxiv_search,
+    ]
+    # Add retriever tool if available
+    if retriever_tool:
+        tools.append(retriever_tool)
+    # Get LLM and bind tools
+    llm = get_llm(provider)
+    llm_with_tools = llm.bind_tools(tools)
+    # Define graph nodes
+    def assistant(state: MessagesState) -> Dict[str, List]:
+        """
+        Assistant node that processes messages with the LLM.
+        Args:
+            state: Current message state
+        Returns:
+            Updated message state with LLM response
+        """
+        return {"messages": [llm_with_tools.invoke(state["messages"])]}
+    def retriever(state: MessagesState) -> Dict[str, List]:
+        """
+        Retriever node that finds similar questions from the vector store.
+        Args:
+            state: Current message state
+        Returns:
+            Updated message state with retrieved examples
+        """
+        # Only use retrieval if vector_store is available
+        if vector_store:
+            try:
+                similar_questions = vector_store.similarity_search(state["messages"][0].content)
+                if similar_questions:
+                    example_msg = HumanMessage(
+                        content=f"Here I provide a similar question and answer for reference: \n\n{similar_questions[0].page_content}",
+                    )
+                    return {"messages": [sys_msg] + state["messages"] + [example_msg]}
+            except Exception as e:
+                logger.error(f"Error in retriever node: {e}", exc_info=True)
+        # If vector_store is unavailable or retrieval fails, just add system message
+        return {"messages": [sys_msg] + state["messages"]}
+    # Build graph
+    builder = StateGraph(MessagesState)
+    # Add nodes
+    builder.add_node("retriever", retriever)
+    builder.add_node("assistant", assistant)
+    builder.add_node("tools", ToolNode(tools))
+    # Add edges
+    builder.add_edge(START, "retriever")
+    builder.add_edge("retriever", "assistant")
+    builder.add_conditional_edges(
+        "assistant",
+        tools_condition,
+    )
+    builder.add_edge("tools", "assistant")
+    # Compile graph
+    compiled_graph = builder.compile()
+    logger.info("Agent graph compiled successfully")
+    return compiled_graph
+# ===================
+# Testing
+# ===================
 if __name__ == "__main__":
+    test_question = "When was the wiki entry of Boethius on De Philosophiae Consolatione first added?"
+    # Build the graph
+    logger.info("Starting test run")
+    graph = build_graph(provider="groq")
+    # Run the graph
+    logger.info(f"Testing with question: {test_question}")
+    messages = [HumanMessage(content=test_question)]
+    result_messages = graph.invoke({"messages": messages})
+    # Display results
+    logger.info("Test completed, printing messages:")
+    for message in result_messages["messages"]:
+        message.pretty_print()