Final_Assignment_Template_Final

Sleeping

App Files Files Community

mujtabarizvi commited on May 17, 2025

Commit

d4303f4

verified ·

1 Parent(s): cfce637

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -30

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import re # For parsing LLM output
 # --- HF Inference API for LLM ---
 from huggingface_hub import InferenceClient
-LLM_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 try:
     hf_token = os.getenv("HF_TOKEN")
@@ -58,25 +58,29 @@ class ReActAgent:
         ])
         self.tool_names = ", ".join(tools.keys())
         self.react_prompt_template = inspect.cleandoc(f"""
-            You are a helpful AI assistant. Your goal is to answer the CURRENT question accurately.
-            Focus ONLY on the provided "Question:". Do not generate new questions or continue a dialogue beyond answering the current question.
-            You must use a step-by-step thinking process (Thought, Action, Observation) for the current question.
-            The final answer submitted must be an EXACT match to the correct response, without any extra explanations or prefixes being part of the answer itself.
             Available tools:
             {self.tool_descriptions}
             Use the following format FOR THE CURRENT QUESTION ONLY:
             Question: the input question you must answer
-            Thought: Your reasoning and plan for the current question.
-            Action: The action to take for the current question, should be one of [{self.tool_names}]. Input to the tool is between brackets. E.g., search_tool[query] or calculator_tool[expression].
-            Observation: The result of the action for the current question.
-            ... (this Thought/Action/Observation sequence can repeat for the current question)
-            Thought: I now have enough information to answer the current question.
-            Final Answer: [Provide ONLY the precise answer to the CURRENT question here. For example, if the question is "What is 2+2?", the Final Answer should be just "4". Do not include any other text, reasoning, or new questions after this line.]
-            Let's begin with the current question.
         """) + "\nQuestion: {question}\n{scratchpad}"
@@ -84,7 +88,7 @@ class ReActAgent:
         try:
             response = self.llm.text_generation(
                 prompt,
-                max_new_tokens=512,
                 temperature=0.1,
                 do_sample=True,
             )
@@ -102,27 +106,27 @@ class ReActAgent:
             print(f"\nIteration {i+1}")
             llm_output = self.run_llm(current_prompt)
             if not llm_output:
                 print("LLM returned empty or error, stopping.")
-                return "Agent Error: LLM failed to respond."
             scratchpad += llm_output + "\n"
             all_final_answers = re.findall(r"Final Answer:\s*(.*)", llm_output, re.DOTALL | re.IGNORECASE)
             if all_final_answers:
-                answer = all_final_answers[-1].strip() # Get the last "Final Answer:"
-                # Further clean up common patterns of LLM over-generation within the answer
-                if "Thought:" in answer:
-                    answer = answer.split("Thought:")[0].strip()
-                if "Action:" in answer:
-                    answer = answer.split("Action:")[0].strip()
-                if "Observation:" in answer:
-                    answer = answer.split("Observation:")[0].strip()
-                if "Question:" in answer: # If it starts generating a new question within the answer
-                    answer = answer.split("Question:")[0].strip()
-                # Handle nested "Final Answer:" in the extracted part
                 inner_final_answers = re.findall(r"Final Answer:\s*(.*)", answer, re.DOTALL | re.IGNORECASE)
                 if inner_final_answers:
                     answer = inner_final_answers[-1].strip()
@@ -146,7 +150,11 @@ class ReActAgent:
                     print(f"Unknown tool: {tool_name}")
                     scratchpad += f"Observation: Error - Unknown tool '{tool_name}'. Available tools: {self.tool_names}\n"
             else:
-                print("No valid action found in LLM output for this iteration.")
             current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
@@ -200,7 +208,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
             print(f"Agent answer for task {task_id}: '{submitted_answer[:100]}...'")
         except Exception as e:
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
@@ -229,7 +241,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown("# ReAct Agent Evaluation Runner (GAIA Modified)")
-    gr.Markdown("Instructions and disclaimers...") # Keep your existing markdown or customize
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
@@ -238,11 +256,13 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Startup messages (space_host, space_id, llm_client status)
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup: print(f"✅ SPACE_HOST found: {space_host_startup}")
     if space_id_startup: print(f"✅ SPACE_ID found: {space_id_startup}")
     if llm_client is None: print("⚠️ LLM Client (InferenceClient) was not initialized.")
     else: print(f"✅ LLM Client initialized with model: {LLM_MODEL}")
     print("-"*(60 + len(" App Starting ")) + "\n")

 # --- HF Inference API for LLM ---
 from huggingface_hub import InferenceClient
+LLM_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1" # Using Mixtral
 try:
     hf_token = os.getenv("HF_TOKEN")
         ])
         self.tool_names = ", ".join(tools.keys())
+        # Further strengthened ReAct prompt
         self.react_prompt_template = inspect.cleandoc(f"""
+            You are a helpful AI assistant. Your primary goal is to answer the CURRENT question accurately by strictly following a step-by-step reasoning process.
+            Focus ONLY on the provided "Question:". Do not generate new questions or answer unrelated ones.
+            You MUST begin your response with a "Thought:" section, outlining your plan.
+            Following the "Thought:", you MUST specify an "Action:". This action should be one of the available tools (e.g., search_tool[query]) or "Action: None" if no tool is immediately necessary based on your thought.
+            Only AFTER an "Action:" and its corresponding "Observation:" (or if "Action: None" was used, after further "Thought:") can you consider providing a "Final Answer:".
+            DO NOT output "Final Answer:" as your very first step or before going through at least one Thought/Action/Observation cycle if tools or reasoning are required.
+            The final answer itself (the text after "Final Answer:") must be an EXACT match to the correct response, without any extra explanations, apologies, or prefixes.
             Available tools:
             {self.tool_descriptions}
             Use the following format FOR THE CURRENT QUESTION ONLY:
             Question: the input question you must answer
+            Thought: Your reasoning and plan for the current question. This MUST be your first step.
+            Action: The action to take. Choose from [{self.tool_names}] with input in brackets (e.g., search_tool[query]), or use "Action: None" if no tool is needed for this immediate step. This MUST follow your Thought.
+            Observation: The result of the action. If Action was None, state "Observation: No action taken, proceeding with reasoning." or similar. This MUST follow your Action.
+            Thought: Further reasoning based on the observation or your initial thought process. You may loop through Thought/Action/Observation multiple times.
+            Final Answer: [Provide ONLY the precise answer to the CURRENT question here. For example, if the question is "What is 2+2?", the Final Answer should be just "4". Use this ONLY when all reasoning is complete and you are certain of the answer.]
+            Let's begin by thinking about the current question.
         """) + "\nQuestion: {question}\n{scratchpad}"
         try:
             response = self.llm.text_generation(
                 prompt,
+                max_new_tokens=512, # Consider increasing if logs show truncation of ReAct steps
                 temperature=0.1,
                 do_sample=True,
             )
             print(f"\nIteration {i+1}")
             llm_output = self.run_llm(current_prompt)
+            # ---- START: Added for debugging ----
+            print(f"--- RAW LLM OUTPUT (Iteration {i+1}) ---")
+            print(llm_output)
+            print(f"--- END RAW LLM OUTPUT (Iteration {i+1}) ---")
+            # ---- END: Added for debugging ----
             if not llm_output:
                 print("LLM returned empty or error, stopping.")
+                return "Agent could not determine an answer within the allowed steps." # Consistent failure message
             scratchpad += llm_output + "\n"
             all_final_answers = re.findall(r"Final Answer:\s*(.*)", llm_output, re.DOTALL | re.IGNORECASE)
             if all_final_answers:
+                answer = all_final_answers[-1].strip()
+                if "Thought:" in answer: answer = answer.split("Thought:")[0].strip()
+                if "Action:" in answer: answer = answer.split("Action:")[0].strip()
+                if "Observation:" in answer: answer = answer.split("Observation:")[0].strip()
+                if "Question:" in answer: answer = answer.split("Question:")[0].strip()
                 inner_final_answers = re.findall(r"Final Answer:\s*(.*)", answer, re.DOTALL | re.IGNORECASE)
                 if inner_final_answers:
                     answer = inner_final_answers[-1].strip()
                     print(f"Unknown tool: {tool_name}")
                     scratchpad += f"Observation: Error - Unknown tool '{tool_name}'. Available tools: {self.tool_names}\n"
             else:
+                # If the LLM output does not contain "Final Answer:" and also does not contain a valid "Action:",
+                # it means the LLM is likely just "thinking" or its output is malformed for ReAct.
+                # We add its output to scratchpad and let it try again in the next iteration.
+                print("No valid Action or Final Answer found in LLM output for this iteration. LLM might be thinking or output is malformed.")
             current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
             print(f"Agent answer for task {task_id}: '{submitted_answer[:100]}...'")
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}") # Log specific agent error
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+            # Still add a payload so the task is marked as attempted, with an error message.
+            answers_payload.append({"task_id": task_id, "submitted_answer": "Agent execution error."})
     if not answers_payload:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown("# ReAct Agent Evaluation Runner (GAIA Modified)")
+    gr.Markdown( # Shortened for brevity, keep your detailed markdown
+        """
+        **Instructions & Disclaimers:**
+        Login, then click 'Run Evaluation'. This uses Mixtral and a refined ReAct prompt.
+        Check logs for RAW LLM OUTPUT for debugging.
+        """
+    )
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup: print(f"✅ SPACE_HOST found: {space_host_startup}")
+    # else: print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
     if space_id_startup: print(f"✅ SPACE_ID found: {space_id_startup}")
+    # else: print("ℹ️  SPACE_ID environment variable not found (running locally?).")
     if llm_client is None: print("⚠️ LLM Client (InferenceClient) was not initialized.")
     else: print(f"✅ LLM Client initialized with model: {LLM_MODEL}")
     print("-"*(60 + len(" App Starting ")) + "\n")