Final_Assignment_Template_Final

Sleeping

App Files Files Community

mujtabarizvi commited on May 17, 2025

Commit

d75d9a2

verified ·

1 Parent(s): 93dbedf

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -17

app.py CHANGED Viewed

@@ -6,14 +6,13 @@ import pandas as pd
 import re # For parsing LLM output
 # --- HF Inference API for LLM ---
-from huggingface_hub import InferenceClient # Corrected import
 # You can choose a different model, but make sure it's good at instruction following and ReAct-style prompting.
 LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" # or "mistralai/Mistral-7B-Instruct-v0.2"
 try:
     hf_token = os.getenv("HF_TOKEN")
-    # Initialize with the corrected InferenceClient
     llm_client = InferenceClient(model=LLM_MODEL, token=hf_token)
 except Exception as e:
     print(f"Error initializing InferenceClient: {e}")
@@ -38,11 +37,9 @@ def search_tool(query: str) -> str:
             if results:
                 return "\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
             else:
-                # Provide a more informative message if no results are found
                 return "No results found for your query. This might mean the query returned no relevant documents, or there could be a temporary issue (e.g., rate limit)."
     except Exception as e:
         print(f"Error in search_tool: {e}")
-        # Make the error message slightly more informative about potential causes
         return f"Error performing search: {str(e)}. This could be due to a network issue, an invalid query, or a rate limit."
 # 2. Calculator Tool
@@ -57,7 +54,6 @@ def calculator_tool(expression: str) -> str:
     """
     print(f"Tool: calculator_tool, Expression: {expression}")
     try:
-        # A slightly safer eval using a limited global scope
         result = eval(expression, {"__builtins__": {}}, {"sqrt": lambda x: x**0.5, "pi": 3.1415926535})
         return str(result)
     except Exception as e:
@@ -66,7 +62,7 @@ def calculator_tool(expression: str) -> str:
 # --- Agent Definition ---
 class ReActAgent:
-    def __init__(self, llm_client, tools: dict, max_iterations=7):
         print("ReActAgent initialized.")
         if llm_client is None:
             raise ValueError("LLM client not initialized. Check HF_TOKEN and model availability.")
@@ -81,9 +77,11 @@ class ReActAgent:
         ])
         self.tool_names = ", ".join(tools.keys())
         self.react_prompt_template = inspect.cleandoc(f"""
             You are a helpful and observant AI assistant. Your goal is to answer the following question accurately.
             You must use a step-by-step thinking process (Thought, Action, Observation).
             Available tools:
             {self.tool_descriptions}
@@ -95,7 +93,7 @@ class ReActAgent:
             Observation: The result of the action.
             ... (this Thought/Action/Observation sequence can repeat up to {self.max_iterations} times)
             Thought: I now know the final answer.
-            Final Answer: The final answer to the original input question.
             Begin!
         """) + "\nQuestion: {question}\n{scratchpad}"
@@ -105,9 +103,11 @@ class ReActAgent:
         try:
             response = self.llm.text_generation(
                 prompt,
-                max_new_tokens=512,
-                temperature=0.2,
-                do_sample=True,
             )
             return response.strip()
         except Exception as e:
@@ -118,24 +118,32 @@ class ReActAgent:
         print(f"ReActAgent received question (first 100 chars): {question[:100]}...")
         scratchpad = ""
         current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
         for i in range(self.max_iterations):
             print(f"\nIteration {i+1}")
             llm_output = self.run_llm(current_prompt)
             if not llm_output:
                 print("LLM returned empty or error, stopping.")
                 return "Agent Error: LLM failed to respond."
             scratchpad += llm_output + "\n"
             final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_output, re.DOTALL | re.IGNORECASE)
             if final_answer_match:
                 answer = final_answer_match.group(1).strip()
-                print(f"Found Final Answer: {answer}")
-                return answer
             action_match = re.search(r"Action:\s*([a-zA-Z_0-9]+)\[(.*?)\]", llm_output, re.DOTALL)
             if action_match:
                 tool_name = action_match.group(1).strip()
@@ -147,16 +155,20 @@ class ReActAgent:
                         observation = self.tools[tool_name](tool_input)
                     except Exception as e:
                         observation = f"Error executing tool {tool_name}: {e}"
-                    print(f"Observation: {observation[:200]}...")
-                    scratchpad += f"Observation: {observation}\n"
                 else:
                     print(f"Unknown tool: {tool_name}")
                     scratchpad += f"Observation: Error - Unknown tool '{tool_name}'. Available tools: {self.tool_names}\n"
             else:
-                print("No valid action found in LLM output for this iteration.")
             current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
         # Fallback if max_iterations is reached without a "Final Answer:"
         print(f"Max iterations reached for question (first 50 chars): {question[:50]}...")
         standard_failure_message = "Agent could not determine an answer within the allowed steps."
@@ -227,7 +239,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-            print(f"Agent answer for task {task_id}: {submitted_answer[:100]}...")
         except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -291,7 +303,7 @@ with gr.Blocks() as demo:
         1.  This Space implements a ReAct (Reasoning-Action) agent using an LLM from the Hugging Face Inference API.
         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        4.  The agent uses a search tool (DuckDuckGo) and a calculator tool.
         ---
         **Disclaimers:**
         * LLM responses can be slow, and running through all questions will take time.

 import re # For parsing LLM output
 # --- HF Inference API for LLM ---
+from huggingface_hub import InferenceClient
 # You can choose a different model, but make sure it's good at instruction following and ReAct-style prompting.
 LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" # or "mistralai/Mistral-7B-Instruct-v0.2"
 try:
     hf_token = os.getenv("HF_TOKEN")
     llm_client = InferenceClient(model=LLM_MODEL, token=hf_token)
 except Exception as e:
     print(f"Error initializing InferenceClient: {e}")
             if results:
                 return "\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
             else:
                 return "No results found for your query. This might mean the query returned no relevant documents, or there could be a temporary issue (e.g., rate limit)."
     except Exception as e:
         print(f"Error in search_tool: {e}")
         return f"Error performing search: {str(e)}. This could be due to a network issue, an invalid query, or a rate limit."
 # 2. Calculator Tool
     """
     print(f"Tool: calculator_tool, Expression: {expression}")
     try:
         result = eval(expression, {"__builtins__": {}}, {"sqrt": lambda x: x**0.5, "pi": 3.1415926535})
         return str(result)
     except Exception as e:
 # --- Agent Definition ---
 class ReActAgent:
+    def __init__(self, llm_client, tools: dict, max_iterations=7): # max_iterations can be tuned
         print("ReActAgent initialized.")
         if llm_client is None:
             raise ValueError("LLM client not initialized. Check HF_TOKEN and model availability.")
         ])
         self.tool_names = ", ".join(tools.keys())
+        # Refined ReAct prompt template for exact match answers
         self.react_prompt_template = inspect.cleandoc(f"""
             You are a helpful and observant AI assistant. Your goal is to answer the following question accurately.
             You must use a step-by-step thinking process (Thought, Action, Observation).
+            The final answer submitted must be an EXACT match to the correct response, without any extra explanations or prefixes being part of the answer itself.
             Available tools:
             {self.tool_descriptions}
             Observation: The result of the action.
             ... (this Thought/Action/Observation sequence can repeat up to {self.max_iterations} times)
             Thought: I now know the final answer.
+            Final Answer: [Provide ONLY the precise answer here. For example, if the question is "What is 2+2?", the Final Answer should be just "4". Do not include any other text or explanations in the answer part itself.]
             Begin!
         """) + "\nQuestion: {question}\n{scratchpad}"
         try:
             response = self.llm.text_generation(
                 prompt,
+                max_new_tokens=512, # Adjust if LLM needs more space for thought process
+                temperature=0.1,    # Lower temperature for more deterministic and precise answers
+                do_sample=True,     # Often needed if temperature is not 1.0
+                                    # Using temperature < 1.0 makes it do_sample=True by default in many HuggingFace implementations
+                # stop_sequences=["Observation:"] # Can help, but might prematurely stop LLM. Parsing is more robust.
             )
             return response.strip()
         except Exception as e:
         print(f"ReActAgent received question (first 100 chars): {question[:100]}...")
         scratchpad = ""
+        # Initial prompt construction for the first turn
         current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
         for i in range(self.max_iterations):
             print(f"\nIteration {i+1}")
+            # Note: The scratchpad builds up. Ensure the LLM prompt correctly handles cumulative context.
+            # The current template appends the new LLM output and observation to the scratchpad.
+            # current_prompt is reconstructed each time using the *updated* scratchpad.
             llm_output = self.run_llm(current_prompt)
             if not llm_output:
                 print("LLM returned empty or error, stopping.")
                 return "Agent Error: LLM failed to respond."
+            # Append the LLM's full response (thought and potentially action or final answer) to scratchpad
             scratchpad += llm_output + "\n"
+            # Check for "Final Answer:" in the LLM's *current* output
             final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_output, re.DOTALL | re.IGNORECASE)
             if final_answer_match:
                 answer = final_answer_match.group(1).strip()
+                print(f"Found Final Answer in LLM output: '{answer}'")
+                return answer # This is the clean answer
+            # Parse Action from the LLM's *current* output
             action_match = re.search(r"Action:\s*([a-zA-Z_0-9]+)\[(.*?)\]", llm_output, re.DOTALL)
             if action_match:
                 tool_name = action_match.group(1).strip()
                         observation = self.tools[tool_name](tool_input)
                     except Exception as e:
                         observation = f"Error executing tool {tool_name}: {e}"
+                    print(f"Observation: {observation[:200]}...") # Print truncated observation
+                    scratchpad += f"Observation: {observation}\n" # Add observation to scratchpad
                 else:
                     print(f"Unknown tool: {tool_name}")
                     scratchpad += f"Observation: Error - Unknown tool '{tool_name}'. Available tools: {self.tool_names}\n"
             else:
+                # If no action and no Final Answer, it implies the LLM might be just thinking,
+                # or the output is malformed. The loop will continue, using the updated scratchpad.
+                print("No valid action found in LLM output for this iteration. LLM might be thinking or output is malformed.")
+            # Reconstruct the prompt for the next iteration with the updated scratchpad
             current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
         # Fallback if max_iterations is reached without a "Final Answer:"
         print(f"Max iterations reached for question (first 50 chars): {question[:50]}...")
         standard_failure_message = "Agent could not determine an answer within the allowed steps."
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+            print(f"Agent answer for task {task_id}: '{submitted_answer[:100]}...'") # Added quotes for clarity
         except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
         1.  This Space implements a ReAct (Reasoning-Action) agent using an LLM from the Hugging Face Inference API.
         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        4.  The agent uses a search tool (DuckDuckGo) and a calculator tool. The prompt has been refined to encourage EXACT MATCH answers.
         ---
         **Disclaimers:**
         * LLM responses can be slow, and running through all questions will take time.