Final_Assignment_Template_Final

Sleeping

App Files Files Community

mujtabarizvi commited on May 17, 2025

Commit

93dbedf

verified ·

1 Parent(s): b83c856

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -70

app.py CHANGED Viewed

@@ -6,19 +6,17 @@ import pandas as pd
 import re # For parsing LLM output
 # --- HF Inference API for LLM ---
-from huggingface_hub import InferenceClient
 # You can choose a different model, but make sure it's good at instruction following and ReAct-style prompting.
-# Zephyr-7B-beta or Mistral-7B-Instruct are good choices available on the free inference API.
-# Starling-LM-7B-beta is also excellent if available and performant enough.
 LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" # or "mistralai/Mistral-7B-Instruct-v0.2"
-# Ensure you have a Hugging Face token set in your space's secrets if using certain models,
-# though many popular ones work without it for basic inference.
-# Name: HF_TOKEN, Value: your_hf_token_here (with read access is usually enough for inference)
 try:
     hf_token = os.getenv("HF_TOKEN")
     llm_client = InferenceClient(model=LLM_MODEL, token=hf_token)
 except Exception as e:
-    print(f"Error initializing HfInference client: {e}")
     llm_client = None
 # --- Tools ---
@@ -31,7 +29,7 @@ def search_tool(query: str) -> str:
     Args:
         query (str): The search query.
     Returns:
-        str: A string containing the search results.
     """
     print(f"Tool: search_tool, Query: {query}")
     try:
@@ -40,10 +38,12 @@ def search_tool(query: str) -> str:
             if results:
                 return "\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
             else:
-                return "No results found for your query."
     except Exception as e:
         print(f"Error in search_tool: {e}")
-        return f"Error performing search: {str(e)}"
 # 2. Calculator Tool
 def calculator_tool(expression: str) -> str:
@@ -57,21 +57,7 @@ def calculator_tool(expression: str) -> str:
     """
     print(f"Tool: calculator_tool, Expression: {expression}")
     try:
-        # Basic security: allow only numbers, operators, parentheses, and math functions.
-        # This is not perfectly secure for a public-facing app with arbitrary eval,
-        # but for this constrained GAIA context, it's a common approach.
-        # A safer approach would be to use a dedicated math parsing library.
-        allowed_chars = "0123456789+-*/(). "
-        if not all(char in allowed_chars or char.isspace() for char in expression):
-            # A more robust check would involve parsing the expression.
-            # For now, we'll allow what seems reasonable for GAIA math.
-            # Let's try to evaluate common math patterns more safely.
-            # This simple check is insufficient for true security.
-            pass # Relaxing this for now to allow GAIA questions like "sqrt(16)" etc.
         # A slightly safer eval using a limited global scope
-        # For GAIA, often questions involve simple arithmetic or known constants like pi.
-        # This eval is still risky; a dedicated math expression parser is better for production.
         result = eval(expression, {"__builtins__": {}}, {"sqrt": lambda x: x**0.5, "pi": 3.1415926535})
         return str(result)
     except Exception as e:
@@ -89,14 +75,12 @@ class ReActAgent:
         self.max_iterations = max_iterations
         self.stop_pattern = "Final Answer:"
-        # Construct tool descriptions for the prompt
         self.tool_descriptions = "\n".join([
             f"- {name}: {inspect.getdoc(func)}"
             for name, func in tools.items()
         ])
         self.tool_names = ", ".join(tools.keys())
-        # This is the core ReAct prompt template
         self.react_prompt_template = inspect.cleandoc(f"""
             You are a helpful and observant AI assistant. Your goal is to answer the following question accurately.
             You must use a step-by-step thinking process (Thought, Action, Observation).
@@ -119,20 +103,12 @@ class ReActAgent:
     def run_llm(self, prompt: str) -> str:
         try:
-            # print(f"\n--- LLM Prompt ---\n{prompt}\n--- End LLM Prompt ---")
-            # Parameters for the LLM call
-            # `max_new_tokens` is important to give the LLM enough space to think and provide an answer.
-            # `temperature` can be low for more deterministic ReAct steps.
-            # `stop_sequences` can help control generation if the model supports it well.
             response = self.llm.text_generation(
                 prompt,
-                max_new_tokens=512,  # Increased to allow for longer thought processes
-                temperature=0.2,     # Lower for more factual/less creative ReAct steps
-                do_sample=True,      # Required if temperature is not 1.0
-                # stop_sequences=["Observation:", "\nThought:", self.stop_pattern] # Helps stop at logical points
-                # Using stop_sequences can be tricky and model-dependent. Simpler to parse output.
             )
-            # print(f"--- LLM Raw Response ---\n{response}\n--- End LLM Raw Response ---")
             return response.strip()
         except Exception as e:
             print(f"Error during LLM call: {e}")
@@ -148,21 +124,18 @@ class ReActAgent:
             print(f"\nIteration {i+1}")
             llm_output = self.run_llm(current_prompt)
-            if not llm_output: # Handle cases where LLM returns empty or error
                 print("LLM returned empty or error, stopping.")
                 return "Agent Error: LLM failed to respond."
-            scratchpad += llm_output + "\n" # Add LLM's entire unfiltered output to scratchpad
-            # Check for Final Answer
             final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_output, re.DOTALL | re.IGNORECASE)
             if final_answer_match:
                 answer = final_answer_match.group(1).strip()
                 print(f"Found Final Answer: {answer}")
                 return answer
-            # Parse Action
-            # Regex to capture: Action: tool_name[input]
             action_match = re.search(r"Action:\s*([a-zA-Z_0-9]+)\[(.*?)\]", llm_output, re.DOTALL)
             if action_match:
                 tool_name = action_match.group(1).strip()
@@ -174,29 +147,21 @@ class ReActAgent:
                         observation = self.tools[tool_name](tool_input)
                     except Exception as e:
                         observation = f"Error executing tool {tool_name}: {e}"
-                    print(f"Observation: {observation[:200]}...") # Print truncated observation
                     scratchpad += f"Observation: {observation}\n"
                 else:
                     print(f"Unknown tool: {tool_name}")
                     scratchpad += f"Observation: Error - Unknown tool '{tool_name}'. Available tools: {self.tool_names}\n"
             else:
-                # If no action, it might be just a thought, or malformed. Add the thought to scratchpad.
-                # Or it might be the LLM directly trying to answer without "Final Answer:"
-                # We assume the LLM is trying to continue the thought process or has given up.
                 print("No valid action found in LLM output for this iteration.")
-                # If the LLM isn't producing actions, it might be stuck or directly answering.
-                # We will let the loop continue, hoping it recovers or hits max_iterations/Final Answer.
-                # If it's a malformed output that isn't a Final Answer, it will just be added to scratchpad.
             current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
-        print("Max iterations reached. Returning current scratchpad or best guess.")
-        # If max iterations reached without "Final Answer:", try to extract a plausible answer from the last thought
-        # or just return a message. This is a fallback.
-        last_thought_match = re.findall(r"Thought:\s*(.*)", scratchpad, re.IGNORECASE)
-        if last_thought_match:
-            return f"Max iterations reached. Last thought: {last_thought_match[-1].strip()}"
-        return "Agent failed to find an answer within the iteration limit."
 # --- Constants (from template) ---
@@ -204,10 +169,6 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Main Execution Logic (from template, modified to use ReActAgent) ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """
-    Fetches all questions, runs the ReActAgent on them, submits all answers,
-    and displays the results.
-    """
     space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
@@ -220,13 +181,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent
     try:
         available_tools = {
             "search_tool": search_tool,
             "calculator_tool": calculator_tool,
         }
-        if llm_client is None: # Check if llm_client was initialized
              return "LLM Client could not be initialized. Check logs and HF_TOKEN.", None
         agent = ReActAgent(llm_client=llm_client, tools=available_tools)
     except Exception as e:
@@ -236,10 +196,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code not available (SPACE_ID not set)"
     print(f"Agent code link: {agent_code}")
-    # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=20) # Increased timeout
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
@@ -254,7 +213,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"Response text: {response.text[:500]}")
         return f"Error decoding server response for questions: {e}", None
-    # 3. Run your Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
@@ -278,15 +236,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
-    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=120) # Increased timeout for submission
         response.raise_for_status()
         result_data = response.json()
         final_status = (
@@ -347,7 +303,7 @@ with gr.Blocks() as demo:
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) # Removed max_rows
     run_button.click(
         fn=run_and_submit_all,
@@ -371,7 +327,7 @@ if __name__ == "__main__":
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     if llm_client is None:
-        print("⚠️ LLM Client (HfInference) was not initialized. The agent will not work.")
         print("   Please check if you need to set the HF_TOKEN secret in your Space settings,")
         print(f"   and ensure the model '{LLM_MODEL}' is accessible via the Inference API.")
     else:

 import re # For parsing LLM output
 # --- HF Inference API for LLM ---
+from huggingface_hub import InferenceClient # Corrected import
 # You can choose a different model, but make sure it's good at instruction following and ReAct-style prompting.
 LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" # or "mistralai/Mistral-7B-Instruct-v0.2"
 try:
     hf_token = os.getenv("HF_TOKEN")
+    # Initialize with the corrected InferenceClient
     llm_client = InferenceClient(model=LLM_MODEL, token=hf_token)
 except Exception as e:
+    print(f"Error initializing InferenceClient: {e}")
     llm_client = None
 # --- Tools ---
     Args:
         query (str): The search query.
     Returns:
+        str: A string containing the search results, or an error/status message.
     """
     print(f"Tool: search_tool, Query: {query}")
     try:
             if results:
                 return "\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
             else:
+                # Provide a more informative message if no results are found
+                return "No results found for your query. This might mean the query returned no relevant documents, or there could be a temporary issue (e.g., rate limit)."
     except Exception as e:
         print(f"Error in search_tool: {e}")
+        # Make the error message slightly more informative about potential causes
+        return f"Error performing search: {str(e)}. This could be due to a network issue, an invalid query, or a rate limit."
 # 2. Calculator Tool
 def calculator_tool(expression: str) -> str:
     """
     print(f"Tool: calculator_tool, Expression: {expression}")
     try:
         # A slightly safer eval using a limited global scope
         result = eval(expression, {"__builtins__": {}}, {"sqrt": lambda x: x**0.5, "pi": 3.1415926535})
         return str(result)
     except Exception as e:
         self.max_iterations = max_iterations
         self.stop_pattern = "Final Answer:"
         self.tool_descriptions = "\n".join([
             f"- {name}: {inspect.getdoc(func)}"
             for name, func in tools.items()
         ])
         self.tool_names = ", ".join(tools.keys())
         self.react_prompt_template = inspect.cleandoc(f"""
             You are a helpful and observant AI assistant. Your goal is to answer the following question accurately.
             You must use a step-by-step thinking process (Thought, Action, Observation).
     def run_llm(self, prompt: str) -> str:
         try:
             response = self.llm.text_generation(
                 prompt,
+                max_new_tokens=512,
+                temperature=0.2,
+                do_sample=True,
             )
             return response.strip()
         except Exception as e:
             print(f"Error during LLM call: {e}")
             print(f"\nIteration {i+1}")
             llm_output = self.run_llm(current_prompt)
+            if not llm_output:
                 print("LLM returned empty or error, stopping.")
                 return "Agent Error: LLM failed to respond."
+            scratchpad += llm_output + "\n"
             final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_output, re.DOTALL | re.IGNORECASE)
             if final_answer_match:
                 answer = final_answer_match.group(1).strip()
                 print(f"Found Final Answer: {answer}")
                 return answer
             action_match = re.search(r"Action:\s*([a-zA-Z_0-9]+)\[(.*?)\]", llm_output, re.DOTALL)
             if action_match:
                 tool_name = action_match.group(1).strip()
                         observation = self.tools[tool_name](tool_input)
                     except Exception as e:
                         observation = f"Error executing tool {tool_name}: {e}"
+                    print(f"Observation: {observation[:200]}...")
                     scratchpad += f"Observation: {observation}\n"
                 else:
                     print(f"Unknown tool: {tool_name}")
                     scratchpad += f"Observation: Error - Unknown tool '{tool_name}'. Available tools: {self.tool_names}\n"
             else:
                 print("No valid action found in LLM output for this iteration.")
             current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
+        # Fallback if max_iterations is reached without a "Final Answer:"
+        print(f"Max iterations reached for question (first 50 chars): {question[:50]}...")
+        standard_failure_message = "Agent could not determine an answer within the allowed steps."
+        print(f"Returning standard failure message: {standard_failure_message}")
+        return standard_failure_message
 # --- Constants (from template) ---
 # --- Main Execution Logic (from template, modified to use ReActAgent) ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     try:
         available_tools = {
             "search_tool": search_tool,
             "calculator_tool": calculator_tool,
         }
+        if llm_client is None:
              return "LLM Client could not be initialized. Check logs and HF_TOKEN.", None
         agent = ReActAgent(llm_client=llm_client, tools=available_tools)
     except Exception as e:
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code not available (SPACE_ID not set)"
     print(f"Agent code link: {agent_code}")
     print(f"Fetching questions from: {questions_url}")
     try:
+        response = requests.get(questions_url, timeout=20)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
         print(f"Response text: {response.text[:500]}")
         return f"Error decoding server response for questions: {e}", None
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     if llm_client is None:
+        print("⚠️ LLM Client (InferenceClient) was not initialized. The agent will not work.")
         print("   Please check if you need to set the HF_TOKEN secret in your Space settings,")
         print(f"   and ensure the model '{LLM_MODEL}' is accessible via the Inference API.")
     else: