Final_Assignment_Template_Final

Sleeping

App Files Files Community

mujtabarizvi commited on May 17, 2025

Commit

c1f3f5c

verified ·

1 Parent(s): cdedb37

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -77

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import re # For parsing LLM output
 # --- HF Inference API for LLM ---
 from huggingface_hub import InferenceClient
-LLM_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1" # Using Mixtral
 try:
     hf_token = os.getenv("HF_TOKEN")
@@ -24,7 +24,7 @@ def search_tool(query: str) -> str:
     print(f"Tool: search_tool, Query: {query}")
     try:
         with DDGS() as ddgs:
-            results = ddgs.text(query, max_results=3)
             if results:
                 return "\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
             else:
@@ -36,15 +36,30 @@ def search_tool(query: str) -> str:
 def calculator_tool(expression: str) -> str:
     print(f"Tool: calculator_tool, Expression: {expression}")
     try:
-        result = eval(expression, {"__builtins__": {}}, {"sqrt": lambda x: x**0.5, "pi": 3.1415926535})
         return str(result)
     except Exception as e:
         print(f"Error in calculator_tool: {e}")
-        return f"Error calculating: {str(e)}. Ensure the expression is valid math."
 # --- Agent Definition ---
 class ReActAgent:
-    def __init__(self, llm_client, tools: dict, max_iterations=7): # Iteration 1 for T/A, Iteration 2 for T/FA minimum
         print("ReActAgent initialized.")
         if llm_client is None:
             raise ValueError("LLM client not initialized.")
@@ -58,17 +73,21 @@ class ReActAgent:
         ])
         self.tool_names = ", ".join(tools.keys())
         self.react_prompt_template = inspect.cleandoc(f"""
             You are a helpful AI assistant. Your primary goal is to answer the CURRENT question accurately by strictly following a step-by-step reasoning process.
             Focus ONLY on the provided "Question:". Do not generate new questions or answer unrelated ones.
             You will proceed in a Thought, Action, Observation loop.
             1. First, provide a "Thought:" explaining your reasoning for the current question.
-            2. Next, provide an "Action:". This can be using a tool (e.g., search_tool[query]) or "Action: None" if no tool is needed for this step.
             3. AFTER YOU PROVIDE THE ACTION, YOU MUST STOP. The system will then provide an "Observation:".
             4. Based on the "Observation:", you will continue with another "Thought:", followed by another "Action:" (and then STOP), or if you have enough information, a "Final Answer:".
-            The final answer itself (the text after "Final Answer:") must be an EXACT match to the correct response, without any extra explanations, apologies, or prefixes.
             Available tools:
             {self.tool_descriptions}
@@ -76,41 +95,35 @@ class ReActAgent:
             Use the following format FOR THE CURRENT QUESTION ONLY:
             Question: the input question you must answer
-            Thought: Your reasoning and plan for the current question.
-            Action: The action to take (e.g., search_tool[query] or calculator_tool[expression] or Action: None). AFTER THIS, STOP.
-            Observation: [The system will provide this. Do NOT generate this part.]
-            Thought: Your reasoning based on the previous observation.
-            Action: (Another action, or Action: None). AFTER THIS, STOP.
             Observation: [The system will provide this. Do NOT generate this part.]
             ... (Repeat Thought/Action/STOP/Observation as needed)
             Thought: I have sufficient information to answer the current question.
-            Final Answer: [Provide ONLY the precise answer. For example, if the question is "What is 2+2?", your Final Answer should be just "4".]
-            Let's begin with the current question.
-        """) + "\nQuestion: {question}\n{scratchpad}"
     def run_llm(self, prompt: str) -> str:
         try:
-            # Define stop sequences to make the LLM pause after an Action
-            # or when it's about to give a Final Answer.
-            stop_sequences = [
                 "\nObservation:", "Observation:",
-                # "\nThought:", # Removing this as a primary stop, LLM should produce Thought then Action.
-                                # If it stops at Thought, it means it didn't reach Action.
                 "\nFinal Answer:", "Final Answer:"
             ]
-            # Adding "\nThought:" as a stop might be too aggressive if the LLM wants to write a thought
-            # *before* an action in its first turn. The prompt guides it to do T then A.
-            # The main goal is to stop it *before* it hallucinates an Observation.
             response = self.llm.text_generation(
                 prompt,
-                max_new_tokens=350, # Reduced slightly as each turn should be shorter. Was 512.
-                temperature=0.1,
-                do_sample=True,
-                stop_sequences=stop_sequences, # Key addition
-                # return_full_text=False # Ensure this is False or default if supported, to not include prompt in response
             )
             return response.strip()
         except Exception as e:
@@ -119,35 +132,54 @@ class ReActAgent:
     def __call__(self, question: str) -> str:
         print(f"ReActAgent received question (first 100 chars): {question[:100]}...")
-        scratchpad = ""
         for i in range(self.max_iterations):
             print(f"\nIteration {i+1}")
-            current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
-            llm_output = self.run_llm(current_prompt)
             print(f"--- RAW LLM OUTPUT (Iteration {i+1}) ---")
-            print(llm_output)
             print(f"--- END RAW LLM OUTPUT (Iteration {i+1}) ---")
-            if not llm_output:
                 print("LLM returned empty or error, stopping.")
                 return "Agent could not determine an answer within the allowed steps."
-            # Append only the LLM's actual generation for this turn to scratchpad
-            # If llm_output includes a stop sequence like "Observation:", we might not want to add that part yet.
-            # However, the prompt structure expects the scratchpad to be a coherent dialogue.
-            # Let's add the raw llm_output, then the observation will be added explicitly.
-            # Check if llm_output ends with a stop sequence and trim if necessary before adding to scratchpad,
-            # or ensure the next parts of the logic handle it.
-            # For now, add the raw output. The next prompt will contain it.
-            # The key is that the *next* part of the scratchpad will be a *real* observation.
-            # If LLM output already contains "Final Answer:", extract and return
-            all_final_answers = re.findall(r"Final Answer:\s*(.*)", llm_output, re.DOTALL | re.IGNORECASE)
             if all_final_answers:
                 answer = all_final_answers[-1].strip()
                 if "Thought:" in answer: answer = answer.split("Thought:")[0].strip()
                 if "Action:" in answer: answer = answer.split("Action:")[0].strip()
                 if "Observation:" in answer: answer = answer.split("Observation:")[0].strip()
@@ -155,19 +187,17 @@ class ReActAgent:
                 inner_final_answers = re.findall(r"Final Answer:\s*(.*)", answer, re.DOTALL | re.IGNORECASE)
                 if inner_final_answers: answer = inner_final_answers[-1].strip()
-                print(f"Found and extracted Final Answer from LLM output: '{answer}'")
-                scratchpad += llm_output + "\n" # Add the final thought/answer block
-                return answer
-            # If not Final Answer, add the current llm_output (Thought & Action) to scratchpad
-            scratchpad += llm_output # LLM output should be Thought \n Action
-            if not llm_output.endswith("\n"):
-                scratchpad += "\n"
-            # Parse Action from the LLM's *current* output
-            action_match = re.search(r"Action:\s*([a-zA-Z_0-9]+)\[(.*?)\]", llm_output, re.DOTALL)
-            action_none_match = re.search(r"Action:\s*None", llm_output, re.IGNORECASE)
             if action_match:
                 tool_name = action_match.group(1).strip()
@@ -179,30 +209,20 @@ class ReActAgent:
                     except Exception as e:
                         observation_content = f"Error executing tool {tool_name}: {e}"
                     print(f"Observation content: {observation_content[:200]}...")
-                    scratchpad += f"Observation: {observation_content}\n"
                 else:
                     print(f"Unknown tool: {tool_name}")
-                    scratchpad += f"Observation: Error - Unknown tool '{tool_name}'. Available tools: {self.tool_names}\n"
             elif action_none_match:
                 print("Action: None detected.")
-                scratchpad += f"Observation: No action taken, proceeding with reasoning.\n"
             else:
-                # LLM didn't output a valid Action or "Final Answer:". It might be just a "Thought:".
-                # Or it might be a malformed output. Let the loop continue, it will use this partial output in the next prompt.
-                print("No valid Action (tool use or None) or Final Answer found in LLM output for this iteration. LLM might be thinking or output is malformed.")
-                # If it's just a thought, the scratchpad has it. Next iteration will prompt with it.
-                # If no action and no final answer, we might want to consider it a failed step if it persists.
-                # For now, we assume the LLM might be in a multi-step thought process not requiring immediate action.
-                # However, the prompt now *requires* an Action (even "Action: None").
-                # So, if we reach here, the LLM is not perfectly following the format.
-                # We might add a generic "Observation: LLM did not provide a valid action." to prompt for recovery.
-                # This is less critical if the stop sequences work well.
-                # If the LLM stops generating *before* an action, this branch will also be hit.
-                # The raw LLM output log will be key here.
-                if not llm_output.strip().startswith("Thought:"): # If it's not even a thought, it's very off.
-                     scratchpad += "Observation: LLM output was not a valid Thought/Action or Final Answer. Please try again adhering to the format.\n"
-            # current_prompt for next iteration is reconstructed outside the loop start
         print(f"Max iterations reached for question (first 50 chars): {question[:50]}...")
         standard_failure_message = "Agent could not determine an answer within the allowed steps."
@@ -268,13 +288,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
-        final_status = (
             f"Submission Successful!\nUser: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
-        return final_status, pd.DataFrame(results_log)
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try: error_detail += f" Detail: {e.response.json().get('detail', e.response.text)}"
@@ -290,7 +310,7 @@ with gr.Blocks() as demo:
         """
         **Instructions & Disclaimers:**
         Login, then click 'Run Evaluation'. This uses Mixtral and a refined ReAct prompt with stop sequences.
-        Check logs for RAW LLM OUTPUT for debugging.
         """
     )
     gr.LoginButton()

 # --- HF Inference API for LLM ---
 from huggingface_hub import InferenceClient
+LLM_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 try:
     hf_token = os.getenv("HF_TOKEN")
     print(f"Tool: search_tool, Query: {query}")
     try:
         with DDGS() as ddgs:
+            results = ddgs.text(query, max_results=3) # Fewer results to be less verbose
             if results:
                 return "\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
             else:
 def calculator_tool(expression: str) -> str:
     print(f"Tool: calculator_tool, Expression: {expression}")
     try:
+        # Basic check for safety, though a proper parser is better for production
+        if not re.match(r"^[0-9\s\+\-\*\/\(\)\.\%sqrtpijabsindcostanlog]+$", expression):
+            # Add more functions as needed, e.g. math.sqrt, math.pi etc.
+            # For simplicity, we are keeping a limited set here.
+            if expression not in ["pi", "sqrt"] and not any(op in expression for op in ['+', '-', '*', '/']):
+                 return f"Error: Invalid characters in expression. Only numbers, basic operators, sqrt, pi allowed. Expression: {expression}"
+        # Using a more controlled eval
+        allowed_names = {"sqrt": lambda x: x**0.5, "pi": 3.1415926535} # Add more safe functions
+        code = compile(expression, "<string>", "eval")
+        for name in code.co_names:
+            if name not in allowed_names and name not in __builtins__:
+                raise NameError(f"Use of {name} is not allowed")
+        result = eval(code, {"__builtins__": {}}, allowed_names)
         return str(result)
     except Exception as e:
         print(f"Error in calculator_tool: {e}")
+        return f"Error calculating: {str(e)}. Ensure the expression is valid and uses allowed functions/operators."
 # --- Agent Definition ---
 class ReActAgent:
+    def __init__(self, llm_client, tools: dict, max_iterations=7):
         print("ReActAgent initialized.")
         if llm_client is None:
             raise ValueError("LLM client not initialized.")
         ])
         self.tool_names = ", ".join(tools.keys())
+        # Refined prompt for better tool usage and stopping
         self.react_prompt_template = inspect.cleandoc(f"""
             You are a helpful AI assistant. Your primary goal is to answer the CURRENT question accurately by strictly following a step-by-step reasoning process.
             Focus ONLY on the provided "Question:". Do not generate new questions or answer unrelated ones.
             You will proceed in a Thought, Action, Observation loop.
             1. First, provide a "Thought:" explaining your reasoning for the current question.
+            2. Next, provide an "Action:".
+                - If you need to search the web, use search_tool[query].
+                - If you need to perform a calculation (e.g., arithmetic like 5*5, or math expressions), use calculator_tool[expression].
+                - If no tool is needed for this immediate step based on your current thought and the information available, use "Action: None". Only use Action: None if you are certain no tool can help or is required for the current step.
             3. AFTER YOU PROVIDE THE ACTION, YOU MUST STOP. The system will then provide an "Observation:".
             4. Based on the "Observation:", you will continue with another "Thought:", followed by another "Action:" (and then STOP), or if you have enough information, a "Final Answer:".
+            The final answer itself (the text after "Final Answer:") must be an EXACT, non-empty match to the correct response, without any extra explanations, apologies, or prefixes.
             Available tools:
             {self.tool_descriptions}
             Use the following format FOR THE CURRENT QUESTION ONLY:
             Question: the input question you must answer
+            {'{scratchpad}'}
+            Thought: [Your reasoning and plan for the current question. If continuing from an observation, reason about that observation.]
+            Action: [search_tool[query_for_search] OR calculator_tool[math_expression_to_calculate] OR Action: None]. AFTER THIS, STOP.
             Observation: [The system will provide this. Do NOT generate this part.]
+            Thought: [Your reasoning based on the previous observation.]
+            Action: [Another action or Action: None]. AFTER THIS, STOP.
+            Observation: [The system will provide this.]
             ... (Repeat Thought/Action/STOP/Observation as needed)
             Thought: I have sufficient information to answer the current question.
+            Final Answer: [Provide ONLY the precise, non-empty answer. For example, if the question is "What is 2+2?", your Final Answer should be just "4".]
+            Start your response for the current turn with "Thought:".
+        """) # Removed initial "Question: {question}" here, it's now part of the formatted prompt
     def run_llm(self, prompt: str) -> str:
         try:
+            stop_tokens = [
                 "\nObservation:", "Observation:",
                 "\nFinal Answer:", "Final Answer:"
             ]
             response = self.llm.text_generation(
                 prompt,
+                max_new_tokens=350,
+                temperature=0.05, # Lowered further for more determinism
+                do_sample=True,   # Important if temperature < 1.0
+                stop=stop_tokens, # Using `stop` as per FutureWarning
             )
             return response.strip()
         except Exception as e:
     def __call__(self, question: str) -> str:
         print(f"ReActAgent received question (first 100 chars): {question[:100]}...")
+        scratchpad_history = ""
         for i in range(self.max_iterations):
             print(f"\nIteration {i+1}")
+            # Construct the prompt for the LLM for the current turn
+            # The template now has {scratchpad} in the middle, then format instructions, then prompts for Thought/Action.
+            # We ensure the LLM starts its generation with a Thought.
+            # The initial prompt will be the template + Question + "Thought:"
+            # Subsequent prompts will be template + Question + scratchpad_history + "Thought:"
+            # The main instruction block, question, and current scratchpad history
+            current_prompt_base = self.react_prompt_template.format(scratchpad=scratchpad_history).split("Thought:")[0]
+            current_prompt_text = f"Question: {question}\n" + current_prompt_base
+            if not scratchpad_history: # First turn
+                 current_prompt_text += "Thought:" # Prime for the first thought
+            else: # Subsequent turns, scratchpad_history has previous T/A/O
+                 current_prompt_text += scratchpad_history + "\nThought:" # Prime for next thought after observation
+            print(f"--- PROMPT FOR LLM (Iteration {i+1}, last 300 chars) ---\n...{current_prompt_text[-300:]}\n--- END PROMPT ---")
+            llm_output_this_turn = self.run_llm(current_prompt_text)
             print(f"--- RAW LLM OUTPUT (Iteration {i+1}) ---")
+            print(llm_output_this_turn)
             print(f"--- END RAW LLM OUTPUT (Iteration {i+1}) ---")
+            if not llm_output_this_turn:
                 print("LLM returned empty or error, stopping.")
                 return "Agent could not determine an answer within the allowed steps."
+            # Prepend "Thought:" if LLM didn't include it (due to priming)
+            # This ensures scratchpad consistency if the LLM directly starts with the thought content.
+            actual_llm_generation = llm_output_this_turn
+            if not llm_output_this_turn.strip().startswith("Thought:") and \
+               (scratchpad_history.strip().endswith("Observation:") or not scratchpad_history):
+                actual_llm_generation = "Thought: " + llm_output_this_turn
+            scratchpad_history += actual_llm_generation + "\n"
+            # Check for Final Answer in the llm_output_this_turn
+            # The llm_output_this_turn could be "Thought: ... Final Answer: ..." if no tool was needed.
+            final_answer_segment = actual_llm_generation # Check the full segment for Final Answer
+            all_final_answers = re.findall(r"Final Answer:\s*(.*)", final_answer_segment, re.DOTALL | re.IGNORECASE)
             if all_final_answers:
                 answer = all_final_answers[-1].strip()
+                # Clean common contamination
                 if "Thought:" in answer: answer = answer.split("Thought:")[0].strip()
                 if "Action:" in answer: answer = answer.split("Action:")[0].strip()
                 if "Observation:" in answer: answer = answer.split("Observation:")[0].strip()
                 inner_final_answers = re.findall(r"Final Answer:\s*(.*)", answer, re.DOTALL | re.IGNORECASE)
                 if inner_final_answers: answer = inner_final_answers[-1].strip()
+                if answer: # Only if the answer is not empty after cleaning
+                    print(f"Found and extracted Final Answer: '{answer}'")
+                    return answer
+                else:
+                    print("LLM produced 'Final Answer:' but the content was empty or invalid after cleaning. Continuing.")
+                    # Scratchpad already has this turn's problematic output. Loop continues.
+            # Parse Action from llm_output_this_turn (or actual_llm_generation)
+            action_segment = actual_llm_generation # Check the full segment for Action
+            action_match = re.search(r"Action:\s*([a-zA-Z_0-9]+)\[(.*?)\]", action_segment, re.DOTALL)
+            action_none_match = re.search(r"Action:\s*None", action_segment, re.IGNORECASE)
             if action_match:
                 tool_name = action_match.group(1).strip()
                     except Exception as e:
                         observation_content = f"Error executing tool {tool_name}: {e}"
                     print(f"Observation content: {observation_content[:200]}...")
+                    scratchpad_history += f"Observation: {observation_content}\n"
                 else:
                     print(f"Unknown tool: {tool_name}")
+                    scratchpad_history += f"Observation: Error - Unknown tool '{tool_name}'. Available tools: {self.tool_names}\n"
             elif action_none_match:
                 print("Action: None detected.")
+                scratchpad_history += f"Observation: No action taken, proceeding with reasoning.\n"
             else:
+                print("No valid Action (tool use or None) found in LLM output for this turn. LLM might be thinking or its format is off.")
+                # If the LLM is supposed to always output an Action (even None) but doesn't,
+                # it's a deviation. We add a generic observation to try and get it back on track.
+                # This can happen if it only outputs a Thought.
+                scratchpad_history += "Observation: LLM did not provide an Action in the expected format. Please provide a Thought and then an Action (or Action: None).\n"
         print(f"Max iterations reached for question (first 50 chars): {question[:50]}...")
         standard_failure_message = "Agent could not determine an answer within the allowed steps."
         response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
+        final__status = ( # Renamed to avoid conflict
             f"Submission Successful!\nUser: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
+        return final_status, pd.DataFrame(results_log) # Corrected variable name
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try: error_detail += f" Detail: {e.response.json().get('detail', e.response.text)}"
         """
         **Instructions & Disclaimers:**
         Login, then click 'Run Evaluation'. This uses Mixtral and a refined ReAct prompt with stop sequences.
+        Check logs for RAW LLM OUTPUT and PROMPT FOR LLM for debugging.
         """
     )
     gr.LoginButton()