Final_Assignment_Template

Sleeping

App Files Files Community

ksj47 commited on May 23, 2025

Commit

fbede70

verified ·

1 Parent(s): 6d731a6

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -126

app.py CHANGED Viewed

@@ -18,33 +18,21 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class BasicAgent:
     def __init__(self, hf_api_token: str | None = None):
         print("BasicAgent initializing...")
-        # Determine the Hugging Face API token
-        # Priority: 1. hf_api_token argument (if passed),
-        #           2. HUGGINGFACEHUB_API_TOKEN env var,
-        #           3. HF_TOKEN env var (common for HF Spaces)
-        token_to_use = hf_api_token
-        if not token_to_use:
-            token_to_use = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-        if not token_to_use:
-            token_to_use = os.getenv("HF_TOKEN")
         if not token_to_use:
-            # This error will be caught by the agent instantiation try-except block
-            # in run_and_submit_all, and a message will be shown in the UI.
             raise ValueError(
                 "Hugging Face API token not found. Please set HUGGINGFACEHUB_API_TOKEN or HF_TOKEN "
                 "as a secret in your Hugging Face Space. This token is required for the LLM."
             )
-        # You can change the repo_id to any model on the Hugging Face Hub.
-        # Ensure the chosen model is suitable for instruction following / question answering.
-        # Examples: "mistralai/Mistral-7B-Instruct-v0.1", "google/flan-t5-large", "HuggingFaceH4/zephyr-7b-beta"
-        # Using a smaller, faster model for demonstration:
-        self.llm_repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
         try:
             self.llm = HuggingFaceHub(
                 repo_id=self.llm_repo_id,
-                model_kwargs={"temperature": 0.1, "max_new_tokens": 150}, # Adjust max_new_tokens as needed
                 huggingfacehub_api_token=token_to_use
             )
             print(f"BasicAgent initialized with LLM: {self.llm_repo_id}")
@@ -52,106 +40,77 @@ class BasicAgent:
             print(f"Error initializing HuggingFaceHub: {e}")
             raise ValueError(f"Failed to initialize LLM: {e}. Check token and model repo_id.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 80 chars): {question[:80]}...")
         # Prompt engineering is crucial.
-        # Instruct the LLM to provide a concise answer without any extra phrases.
-        # Per GAIA instructions: "make sure you don’t include the text “FINAL ANSWER”
-        # in your submission, just make your agent reply with the answer and nothing else"
-        prompt = f"""You are a diligent and highly intelligent AI assistant. Your goal is to answer the given `Question` accurately and concisely by following the ReAct (Reasoning and Acting) framework.
-You must break down the problem into a sequence of thoughts and actions.
-**Available Tools:**
-1.  **`GAIAFileLookup(filename: str) -> str`**:
-    *   Use this tool to retrieve the content of a specific file relevant to the current question.
-    *   The `task_id` associated with the question will be handled by the system; you only need to provide the `filename`.
-    *   The question might explicitly name the file or give strong hints.
-    *   Returns the text content of the file or an error message if the file cannot be found/read.
-2.  **`Calculator(expression: str) -> str`**:
-    *   Use this tool to perform mathematical calculations.
-    *   Input a valid mathematical expression (e.g., "150 * 2 + 57", "(1024 - 256) / 8").
-    *   Returns the numerical result as a string, or an error message for invalid expressions.
-3.  **`LLM_Query(sub_question: str) -> str`**:
-    *   Use this tool for general knowledge lookups, complex reasoning that doesn't fit other tools, or to rephrase/summarize information you've gathered.
-    *   Input a clear question or instruction.
-    *   Returns the response from a powerful language model.
-**Output Format & Process:**
-You must strictly follow this format for each step of your reasoning:
-`Question:` The user's question you need to answer.
-`Thought:` Your reasoning about the question, your plan to answer it, and self-correction if needed. Explain what you need to find out or calculate.
-`Action:` The tool you choose to use from the list above (e.g., `GAIAFileLookup`, `Calculator`, `LLM_Query`). If you believe you can answer directly without a tool, you can skip to `Final Answer:` after your `Thought:`.
-`Action Input:` The input string for the chosen `Action`. For `GAIAFileLookup`, this is the filename. For `Calculator`, the mathematical expression. For `LLM_Query`, the sub-question.
-`Observation:` The result returned by the tool after your `Action` and `Action Input`. (This will be provided to you by the system).
-... (You can have multiple Thought/Action/Action Input/Observation cycles) ...
-`Thought:` Once you have gathered all necessary information and are confident in your answer, summarize your findings.
-`Final Answer:` The concise answer to the original `Question`. **IMPORTANT: Provide ONLY the answer value itself. Do NOT include the prefix "Final Answer:" or any other explanatory text in the string that represents the actual answer to be submitted. The system will extract the text following this label.**
-**Key Guidelines for GAIA:**
-1.  **Conciseness:** The final answer must be precise and directly address the question. Avoid any extra text or explanation in the final answer value.
-2.  **Exact Match:** The scoring system uses exact match, so precision is critical.
-3.  **No "FINAL ANSWER" Prefix in Submission:** Remember, the text *after* your `Final Answer:` label is what gets submitted. Do not include the phrase "FINAL ANSWER" or "The answer is" *within that value*.
-4.  **File Identification:** Pay close attention to filenames mentioned or implied in the question.
-5.  **Multi-Step Reasoning:** Break down complex questions into smaller, manageable steps using the Thought/Action/Observation cycle.
-**Example Scenario (Illustrative):**
-`Question:` According to `report_Q3.txt`, what was the percentage increase in sales from $1500 in Q2 to the Q3 sales figure, rounded to one decimal place? The Q3 sales figure is mentioned as "Total Revenue".
-`Thought:` I need to find the "Total Revenue" in `report_Q3.txt`. Then I need to calculate the percentage increase from $1500 to that revenue. Finally, I need to round the result to one decimal place.
-`Action: GAIAFileLookup`
-`Action Input: report_Q3.txt`
-`Observation: [System provides content of report_Q3.txt, e.g., "...Total Revenue: $1800..."]`
-`Thought:` The report states Total Revenue (Q3 sales) is $1800. Q2 sales were $1500. Now I need to calculate the percentage increase: ((New - Old) / Old) * 100.
-`Action: Calculator`
-`Action Input: ((1800 - 1500) / 1500) * 100`
-`Observation: 20.0`
-`Thought:` The percentage increase is 20.0%. The question asks for it rounded to one decimal place, which it already is.
-`Final Answer: 20.0%`
 ---
-Now, please answer the following question using the ReAct framework:
-`Question: {actual_question_text_will_be_inserted_here}"""
         try:
-            response = self.llm.invoke(prompt)
-            answer = response.strip()
-            # Further cleaning if the model still adds prefixes (common with some models)
-            # Convert to lower for case-insensitive prefix checking
-            answer_lower = answer.lower()
-            common_prefixes = ["answer:", "the answer is:", "concise answer:"]
-            for prefix in common_prefixes:
-                if answer_lower.startswith(prefix):
                     answer = answer[len(prefix):].strip()
-                    break # Remove only the first matching prefix
-            print(f"Agent LLM raw response (first 80 chars): {response[:80]}...")
-            print(f"Agent final answer (first 80 chars): {answer[:80]}...")
-            if not answer: # Handle cases where the answer becomes empty after stripping
                 print("Warning: Agent produced an empty answer after cleaning.")
-                # Return a placeholder that indicates an issue but is still a string
                 return "Unable to generate a valid answer."
             return answer
         except Exception as e:
             print(f"Error during LLM call for question '{question[:50]}...': {e}")
-            # Return an error message string, as the submission expects a string answer.
             return f"AGENT_ERROR: LLM call failed. ({type(e).__name__})"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
@@ -174,11 +133,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent
     try:
-        # The BasicAgent will attempt to find the HF token from env variables.
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
-        # Return the error message to be displayed in the Gradio UI
         return f"Error initializing agent: {str(e)}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run_no_space_id"
@@ -187,7 +144,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=20) # Increased timeout
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
@@ -218,12 +175,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"\nProcessing question {i+1}/{len(questions_data)}, Task ID: {task_id}")
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
-             # Ensure a placeholder is added for submission to maintain structure
              error_answer = f"AGENT_RUNTIME_ERROR: {type(e).__name__}"
              answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -259,8 +216,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         try:
             error_json = e.response.json()
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError: # Renamed from JSONDecodeError for clarity
-            error_detail += f" Response: {e.response.text[:500]}" # Log part of the response
         status_message = f"Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
@@ -275,7 +232,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-    except Exception as e: # Catch any other unexpected errors during submission
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
@@ -296,41 +253,25 @@ with gr.Blocks() as demo:
         Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions using an LLM).
         This space provides a basic setup. For better GAIA scores, you might need to:
             - Choose a more powerful LLM.
-            - Improve prompt engineering.
-            - Implement tool usage for questions requiring file access or external actions (the API provides `/files/{task_id}`).
         """
     )
-    # Session state to hold the Hugging Face profile (token and username)
-    # This isn't strictly necessary for this version as token is read from env for LLM
-    # but good practice if profile info is needed elsewhere.
     hf_profile_state = gr.State(None)
-    # Wrap LoginButton with a function to capture the profile
     def login_handler(profile: gr.OAuthProfile | None):
         if profile:
             print(f"Profile captured: {profile.username}")
-            # If you wanted to pass profile.token to agent:
-            # BasicAgent(hf_api_token=profile.token) - but env var method is preferred for LLM token
         return profile
-    # The gr.LoginButton() automatically provides the profile to functions that list it as an input
-    # So, `run_and_submit_all` will receive it directly when triggered by `run_button`.
-    # No explicit state management for profile passing to `run_and_submit_all` is needed here.
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) # Removed max_rows
-    # The profile from gr.LoginButton() is implicitly passed as the first argument
-    # to `run_and_submit_all` if its signature includes it.
     run_button.click(
         fn=run_and_submit_all,
-        # No explicit inputs needed here if `gr.LoginButton` handles profile passing.
-        # If explicit passing was needed from a state: inputs=[hf_profile_state],
         outputs=[status_output, results_table]
     )
@@ -352,13 +293,10 @@ if __name__ == "__main__":
     else:
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    # Check for HF_TOKEN at startup as a hint for the user
     if not (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")):
         print("⚠️  WARNING: HUGGINGFACEHUB_API_TOKEN or HF_TOKEN environment variable not found.")
         print("   The LLM agent will likely fail to initialize. Please set this token in your Space secrets.")
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)

 class BasicAgent:
     def __init__(self, hf_api_token: str | None = None):
         print("BasicAgent initializing...")
+        token_to_use = hf_api_token or os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
         if not token_to_use:
             raise ValueError(
                 "Hugging Face API token not found. Please set HUGGINGFACEHUB_API_TOKEN or HF_TOKEN "
                 "as a secret in your Hugging Face Space. This token is required for the LLM."
             )
+        self.llm_repo_id = "mistralai/Mistral-7B-Instruct-v0.1" # Or your preferred model
         try:
             self.llm = HuggingFaceHub(
                 repo_id=self.llm_repo_id,
+                # Increased max_new_tokens as the ReAct prompt is long and might generate a longer thought process
+                # Temperature 0.0 for more deterministic ReAct output, 0.1 is also fine.
+                model_kwargs={"temperature": 0.1, "max_new_tokens": 512},
                 huggingfacehub_api_token=token_to_use
             )
             print(f"BasicAgent initialized with LLM: {self.llm_repo_id}")
             print(f"Error initializing HuggingFaceHub: {e}")
             raise ValueError(f"Failed to initialize LLM: {e}. Check token and model repo_id.")
+    # Modified signature to accept task_id (though not used in this simple version yet)
+    def __call__(self, question: str, task_id: str | None = None) -> str:
+        print(f"Agent received question (Task ID: {task_id}, first 80 chars): {question[:80]}...")
         # Prompt engineering is crucial.
+        # The `question` variable (method argument) is now correctly inserted here.
+        # This is a single-shot prompt. A true ReAct agent would have a loop.
+        current_prompt = f"""You are a diligent and highly intelligent AI assistant. Your goal is to answer the given `Question` accurately and concisely.
+If the question requires multiple steps or information from tools, think step-by-step.
+**Available Tools (Conceptual - for your reasoning process, actual tool calls are not implemented in this version):**
+1.  **`GAIAFileLookup(filename: str) -> str`**: Retrieves file content.
+2.  **`Calculator(expression: str) -> str`**: Performs calculations.
+3.  **`LLM_Query(sub_question: str) -> str`**: For general knowledge.
+**Output Format Expectation:**
+While you might reason using a "Thought:", "Action:", "Observation:" cycle internally, for this specific task, your final output should be ONLY the direct answer to the question.
+Example: If asked "What is 2+2?", your output should be "4".
+**Key Guidelines for GAIA Submission:**
+1.  **Conciseness:** The final answer must be precise and directly address the question.
+2.  **No "FINAL ANSWER" Prefix in Submission:** Do NOT include "FINAL ANSWER:" or "The answer is:" in your actual response. Just the answer value.
 ---
+Now, please answer the following question:
+Question: {question}
+Answer:""" # Modified to guide the LLM towards a direct answer for this simplified agent
         try:
+            print(f"Sending to LLM (first 200 chars of prompt): {current_prompt[:200]}...")
+            response_text = self.llm.invoke(current_prompt)
+            answer = response_text.strip()
+            # Further cleaning if the model still adds prefixes or explanations
+            # This is important because we are not doing a full ReAct loop to extract "Final Answer:"
+            # Try to find "Answer:" if the LLM adds it despite instructions
+            if "Answer:" in answer:
+                # Take text after the last occurrence of "Answer:"
+                answer = answer.split("Answer:")[-1].strip()
+            # Remove common conversational prefixes that might slip through
+            common_prefixes_to_remove = [
+                "The answer is", "My answer is", "Based on the information", "The final answer is",
+                "Here is the answer", "I found that", "It seems that"
+            ] # Case-insensitive removal
+            for prefix in common_prefixes_to_remove:
+                if answer.lower().startswith(prefix.lower()):
                     answer = answer[len(prefix):].strip()
+                    # If the first character is now a colon or period, remove it
+                    if answer.startswith(":") or answer.startswith("."):
+                        answer = answer[1:].strip()
+                    break # Only remove one such prefix
+            # If the LLM generated a ReAct-style "Final Answer:", extract from it.
+            if "Final Answer:" in answer:
+                answer = answer.split("Final Answer:")[-1].strip()
+            print(f"Agent LLM raw response (first 80 chars): {response_text[:80]}...")
+            print(f"Agent cleaned answer (first 80 chars): {answer[:80]}...")
+            if not answer:
                 print("Warning: Agent produced an empty answer after cleaning.")
                 return "Unable to generate a valid answer."
             return answer
         except Exception as e:
             print(f"Error during LLM call for question '{question[:50]}...': {e}")
             return f"AGENT_ERROR: LLM call failed. ({type(e).__name__})"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {str(e)}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run_no_space_id"
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
+        response = requests.get(questions_url, timeout=20)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
         print(f"\nProcessing question {i+1}/{len(questions_data)}, Task ID: {task_id}")
         try:
+            # Pass task_id to the agent call
+            submitted_answer = agent(question_text, task_id=task_id)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
              error_answer = f"AGENT_RUNTIME_ERROR: {type(e).__name__}"
              answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
         try:
             error_json = e.response.json()
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
+    except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions using an LLM).
         This space provides a basic setup. For better GAIA scores, you might need to:
             - Choose a more powerful LLM.
+            - Implement a proper ReAct loop with tool parsing and execution.
+            - Implement actual tool usage (e.g., `/files/{task_id}`, calculator).
         """
     )
     hf_profile_state = gr.State(None)
     def login_handler(profile: gr.OAuthProfile | None):
         if profile:
             print(f"Profile captured: {profile.username}")
         return profile
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
     else:
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     if not (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")):
         print("⚠️  WARNING: HUGGINGFACEHUB_API_TOKEN or HF_TOKEN environment variable not found.")
         print("   The LLM agent will likely fail to initialize. Please set this token in your Space secrets.")
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)