Agents_Course_Final_Assignment

Sleeping

App Files Files Community

SerotoninRonin commited on Jul 6, 2025

Commit

62ed5af

1 Parent(s): 81917a3

Attempting LlamaIndex approach

Browse files

Files changed (5) hide show

.gitignore +3 -0
agents.py +75 -0
app.py +105 -39
requirements.txt +5 -1
tools.py +119 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.env
+__pycache__/
+files/

agents.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import os
+from llama_index.core.agent.workflow import ReActAgent, FunctionAgent
+from llama_index.core import PromptTemplate
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+from tools import (
+    search_tool,
+    describe_image_tool,
+    parse_excel_tool,
+    access_webpage_tool,
+    string_functions_tool
+)
+thinking_agent = ReActAgent(
+    name="Thinking Agent",
+    description="An agent that can think and reason about tasks, and then handoff the task to Technician Agent for execution, or to Manager Agent for review.",
+    llm=HuggingFaceInferenceAPI(
+        model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
+        provider="auto",
+        token=os.environ.get("HF_TOKEN")
+    ),
+    system_prompt="You are a thinking agent that can reason about tasks and communicate the necessary steps to complete them to Technician Agent, if necessary. If you believe the task is completed and the question is answered, you must handoff the answer to Manager Agent for final review.",
+    can_handoff_to=["Technician Agent", "Manager Agent"]
+)
+technician_agent = ReActAgent(
+    name="Technician Agent",
+    description="An agent that can perform various technical tasks such as searching the web, describing images, parsing Excel files, string operations, and accessing webpages.",
+    tools=[search_tool, describe_image_tool, parse_excel_tool, access_webpage_tool, string_functions_tool],
+    llm=HuggingFaceInferenceAPI(
+        model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
+        provider="auto",
+        token=os.environ.get("HF_TOKEN")
+    ),
+    system_prompt="You are a helpful agent that answers questions based on the provided tools. Use the tools to gather information and provide accurate answers, and send those answers to Thinking Agent. If the task is too complex or requires further reasoning, handoff the task to Thinking Agent for analysis with the reasoning as why you cannot complete it. You must always handoff to Thinking Agent",
+    can_handoff_to=["Thinking Agent"]
+)
+manager_agent = ReActAgent(
+    name="Manager Agent",
+    description="A high-level agent that can manage tasks and coordinate between other agents.",
+    llm=HuggingFaceInferenceAPI(
+        model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
+        provider="auto",
+        token=os.environ.get("HF_TOKEN")
+    ),
+    can_handoff_to=["Thinking Agent"],
+    # system_prompt="You are a manager agent that oversees tasks and coordinates between other agents. Do not include thoughts in your response. Just the answer. " \
+    # "You will be given a question and you need to respond with the correct answer provided by the other agents. " \
+    # "Your response should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. " \
+    # "For example, if the question is 'What color are the stars on the American flag?' Your response would be 'White'. " \
+    # "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. " \
+    # "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. " \
+    # "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. " \
+    # "Do not engage in conversation, just respond with the answer unless the question explicitly asks for a certain style of response. " \
+    # "If the results are questionable, you can send the task back to Thinking Agent for further analysis. If the answer can not be concluded, respond with 'I don't know'"
+)
+minimal_system_prompt = """
+You are the Manager Agent. 💼
+Respond with **only the final answer**, in as few words as possible.
+Do **not** include any reasoning, thoughts, or tool calls.
+If it's a number: use plain digits (no commas, %, etc.).
+If it's a string: no articles, no abbreviations.
+If it's a list: comma-separated, each element following the rules above.
+If unsure, reply: I don't know.
+Below is the current conversation consisting of interleaving human and assistant messages.
+"""
+manager_prompt = PromptTemplate(template=minimal_system_prompt)
+manager_agent.update_prompts({"react_header", manager_prompt.get_template()})
+prompt_dict = manager_agent.get_prompts()
+for k, v in prompt_dict.items():
+    print(f"Prompt: {k}\n\nValue: {v.template}")

app.py CHANGED Viewed

@@ -1,25 +1,67 @@
 import os
 import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
@@ -27,20 +69,23 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
-    if profile:
-        username= f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -73,14 +118,34 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
@@ -92,26 +157,27 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        print("Submission successful.")
         results_df = pd.DataFrame(results_log)
-        return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
@@ -158,7 +224,7 @@ with gr.Blocks() as demo:
         """
     )
-    gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")

+import base64
 import os
 import gradio as gr
 import requests
 import inspect
+import dotenv
+dotenv.load_dotenv()  # Load environment variables from .env file
 import pandas as pd
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+from llama_index.core.agent.workflow import AgentWorkflow, AgentOutput, ToolCall, ToolCallResult, AgentInput, AgentStream
+from agents import thinking_agent, technician_agent, manager_agent
+import asyncio
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+llm = HuggingFaceInferenceAPI(model_name="deepseek-ai/DeepSeek-R1", provider="auto", token=os.environ.get("HF_TOKEN"))
+#def run_and_submit_all( profile: gr.OAuthProfile | None):
+async def run_agent_query(agent: AgentWorkflow, question: str):
+    """
+    Runs the agent on a single question and returns the answer.
+    This function is intended to be used with Gradio for interactive querying.
+    """
+    handler = agent.run(user_msg=question)
+    current_agent = None
+    final_response = None
+    current_tool_calls = ""
+    async for event in handler.stream_events():
+        if (
+            hasattr(event, "current_agent_name")
+            and event.current_agent_name != current_agent
+        ):
+            current_agent = event.current_agent_name
+            print(f"\n{'='*50}")
+            print(f"🤖 Agent: {current_agent}")
+            print(f"{'='*50}\n")
+        if isinstance(event, AgentStream):
+            if event.delta:
+                print(event.delta, end="", flush=True)
+        elif isinstance(event, AgentInput):
+            print("📥 Input:", event.input)
+        elif isinstance(event, AgentOutput):
+            if event.response.content:
+                print("📤 Output:", event.response.content)
+                final_response = event.response.content
+            if event.tool_calls:
+                print(
+                    "🛠️  Planning to use tools:",
+                    [call.tool_name for call in event.tool_calls],
+                )
+        elif isinstance(event, ToolCallResult):
+            print(f"🔧 Tool Result ({event.tool_name}):")
+            print(f"  Arguments: {event.tool_kwargs}")
+            print(f"  Output: {event.tool_output}")
+        elif isinstance(event, ToolCall):
+            print(f"🔨 Calling Tool: {event.tool_name}")
+            print(f"  With arguments: {event.tool_kwargs}")
+    return final_response if final_response else "No response from agent."
+def run_and_submit_all():
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    # if profile:
+    #     username= f"{profile.username}"
+    #     print(f"User logged in: {username}")
+    # else:
+    #     print("User not logged in.")
+    #     return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     try:
+        agent = AgentWorkflow(
+                agents=[thinking_agent, technician_agent, manager_agent],
+                root_agent=manager_agent.name,
+                handoff_output_prompt="handoff_result: Passed to {to_agent}. Reason: {reason}. Please continue processing using the original user question."
+            )
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data[:4]:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        file_name = item.get("file_name")
+        if file_name:
+            try:
+                response = requests.get(f"{api_url}/files/{task_id}", timeout=15)
+                response.raise_for_status()
+                save_path = os.path.join("files", file_name)
+                os.makedirs("files", exist_ok=True)
+                with open(save_path, "wb") as f:
+                    f.write(response.content)
+                question_text += f" (File: {save_path})"
+            except requests.exceptions.RequestException as e:
+                print(f"Error fetching file for task {task_id}: {e}")
+                return f"Error fetching file for task {task_id}: {e}", None
+            except requests.exceptions.JSONDecodeError as e:
+                print(f"Error decoding JSON response for file {file_name}: {e}")
+                return f"Error decoding JSON response for file {file_name}: {e}", None
+            except Exception as e:
+                print(f"An unexpected error occurred fetching file {file_name}: {e}")
+                return f"An unexpected error occurred fetching file {file_name}: {e}", None
         try:
+            print(f"Running agent on task {task_id} with question: {question_text}")
+            submitted_answer = asyncio.run(run_agent_query(agent, question_text))
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
+    # submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    # status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    # print(status_update)
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        # response = requests.post(submit_url, json=submission_data, timeout=60)
+        # response.raise_for_status()
+        # result_data = response.json()
+        # final_status = (
+        #     f"Submission Successful!\n"
+        #     f"User: {result_data.get('username')}\n"
+        #     f"Overall Score: {result_data.get('score', 'N/A')}% "
+        #     f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+        #     f"Message: {result_data.get('message', 'No message received.')}"
+        # )
+        # print("Submission successful.")
         results_df = pd.DataFrame(results_log)
+        return "Nothing submitted", results_df
+        # return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
         """
     )
+    # gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")

requirements.txt CHANGED Viewed

@@ -1,2 +1,6 @@
 gradio
-requests

 gradio
+requests
+llama-index-multi-modal-llms-huggingface
+llama-index
+llama-index-llms-huggingface-api
+llama-index-readers-whisper

tools.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from llama_index.core.tools import FunctionTool
+from llama_index.tools.tavily_research import TavilyToolSpec
+from llama_index.core.schema import ImageDocument
+from llama_index.readers.whisper import WhisperReader
+import os
+tool_spec = TavilyToolSpec(
+    api_key=os.environ.get("TAVILY_API_KEY"),
+)
+search_tool = FunctionTool.from_defaults(tool_spec.search)
+from llama_index.multi_modal_llms.huggingface import HuggingFaceMultiModal
+def describe_image(image_path: str, prompt: str = "Describe the following image:") -> str:
+    """
+    Function to describe an image using a multi-modal LLM.
+    :param image_path: Path to the image file or base64 encoded image data.
+    :param prompt: Prompt to use for the description. Defaults to "Describe the following image:".
+    :return: Description of the image.
+    """
+    image = ImageDocument(image_path=image_path)
+    try:
+        llm = HuggingFaceMultiModal.from_model_name("Qwen/Qwen2-VL-2B-Instruct", use_fast=True)
+        return llm.complete(
+            prompt=f"{prompt}",
+            image_documents=[image]
+        ).text
+    except Exception as e:
+        return f"Error describing image: {e}"
+describe_image_tool = FunctionTool.from_defaults(describe_image)
+# Tool to parse xls/xlsx files
+def parse_excel(file_path: str) -> str:
+    """
+    Function to parse an Excel file and return its content as a string.
+    :param file_path: Path to the Excel file (xls or xlsx).
+    :return: Content of the Excel file as a string.
+    """
+    import pandas as pd
+    df = pd.read_excel(io=file_path)
+    # Convert DataFrame to string
+    return df.to_string() if not df.empty else "The Excel file is empty."
+parse_excel_tool = FunctionTool.from_defaults(parse_excel)
+def access_webpage(url: str) -> str:
+    """
+    Function to access a webpage and return its content.
+    :param url: URL of the webpage to access.
+    :return: Content of the webpage.
+    """
+    import requests
+    try:
+        print(f"Accessing webpage: {url}")
+        response = requests.get(url)
+        print(f"Response status code: {response.status_code}")
+        response.raise_for_status()  # Raise an error for bad responses
+        return response.text
+    except requests.RequestException as e:
+        return f"Error accessing {url}: {e}"
+    except Exception as e:
+        return f"An unexpected error occurred: {e}"
+access_webpage_tool = FunctionTool.from_defaults(access_webpage)
+def string_functions(input_string: str, operation: str) -> str:
+    """
+    Function to perform string operations.
+    :param input_string: The input string to operate on.
+    :param operation: The operation to perform (e.g., "uppercase", "lowercase", "reverse", "length", "count_vowels", "count_consonants", "count_words", "count_sentences").
+    :return: Result of the string operation.
+    """
+    if operation == "uppercase":
+        return input_string.upper()
+    elif operation == "lowercase":
+        return input_string.lower()
+    elif operation == "reverse":
+        return input_string[::-1]
+    elif operation == "length":
+        return str(len(input_string))
+    elif operation == "count_vowels":
+        vowels = "aeiouAEIOU"
+        return str(sum(1 for char in input_string if char in vowels))
+    elif operation == "count_consonants":
+        vowels = "aeiouAEIOU"
+        return str(sum(1 for char in input_string if char.isalpha() and char not in vowels))
+    elif operation == "count_words":
+        return str(len(input_string.split()))
+    elif operation == "count_sentences":
+        import re
+        sentences = re.split(r'[.!?]+', input_string)
+        return str(len([s for s in sentences if s.strip()]))
+    else:
+        return "Invalid operation. Supported operations: uppercase, lowercase, reverse."
+string_functions_tool = FunctionTool.from_defaults(string_functions)
+def transcribe_audio(audio_path: str) -> str:
+    """
+    Function to transcribe audio using a multi-modal LLM.
+    :param audio_path: Path to the audio file.
+    :return: Transcription of the audio.
+    """
+    try:
+        reader = WhisperReader(api_key=os.environ.get("OPENAI_API_KEY"))
+        documents = reader.load_data(file=audio_path)
+        if not documents:
+            return "No audio content found."
+        # Assuming the first document contains the transcription
+        return documents[0].text if documents else "No transcription available."
+    except Exception as e:
+        return f"Error transcribing audio: {e}"
+transcribe_audio_tool = FunctionTool.from_defaults(transcribe_audio)