New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 2, 2025

Commit

1bc8bac

1 Parent(s): 9a37625

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -43

app.py CHANGED Viewed

@@ -27,32 +27,32 @@ llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
 agent = create_react_agent(model=llm, tools=tool_node)
 def plan_node(state: AgentState) -> AgentState:
     """
-    Decide which tool (if any) to call. The state is expected to contain:
-      - state["messages"]: a list of BaseMessage (SystemMessage, HumanMessage, etc.)
-      - state["user_input"]: the raw user question (string)
-    We append a new HumanMessage(user_input) to messages, then ask the LLM
-    (via ChatOpenAI) to return exactly one key: web_search_query, ocr_path,
-    excel_path (with excel_sheet_name), or final_answer. The LLM must reply
-    with a bare Python‐dict literal.
-    We then return a new partial AgentState that always includes an updated
-    "messages" list plus exactly one of those tool‐request keys (or final_answer).
     """
-    # 1) Pull user_input out of state
-    user_input = state.get("user_input", "")
-    # 2) Grab prior chat history, which should already be a list of BaseMessage
     prior_msgs = state.get("messages", [])
-    # 3) Append the new user message as a HumanMessage
-    new_history = prior_msgs + [HumanMessage(content=user_input)]
-    # 4) Build a prompt that explains how to choose exactly one key
-    #    We leave new_history as a list of BaseMessage; LLM expects that format.
     explanation = SystemMessage(
         content=(
-            "You can set exactly one of the following keys (in a Python dict) and nothing else:\n"
             "  • web_search_query: <search terms>  \n"
             "  • ocr_path: <path to an image file>  \n"
             "  • excel_path: <path to a .xlsx file>  \n"
@@ -63,20 +63,16 @@ def plan_node(state: AgentState) -> AgentState:
         )
     )
-    # 5) Combine the user conversation with our explanation
     prompt_messages = new_history + [explanation]
-    # 6) Call the LLM. Because prompt_messages is a list of BaseMessage,
-    #    ChatOpenAI will return an AIMessage.
     llm_response = llm(prompt_messages)
     llm_out = llm_response.content.strip()
-    # 7) Try to eval the LLM response as a Python dict
     try:
         parsed = eval(llm_out, {}, {})
         if isinstance(parsed, dict):
-            # Build a new state: keep our updated messages, plus exactly one key
-            new_state: AgentState = {"messages": new_history}
             allowed = {
                 "web_search_query",
                 "ocr_path",
@@ -86,41 +82,44 @@ def plan_node(state: AgentState) -> AgentState:
             }
             for k, v in parsed.items():
                 if k in allowed:
-                    new_state[k] = v
-            return new_state
     except Exception:
         pass
-    # 8) Fallback if parsing failed: keep messages, set a generic final_answer
     return {
         "messages": new_history,
         "final_answer": "Sorry, I could not parse your intent."
     }
-# ─── 5) Define “finalize” node: compose the final answer using any tool results ───
 def finalize_node(state: AgentState) -> AgentState:
     """
-    After any tool results exist in state, or if final_answer was already set,
-    ask the LLM to produce the final answer.
     """
-    parts = state.get("messages", [])
     if "web_search_result" in state and state["web_search_result"] is not None:
-        parts.append(f"WEB_SEARCH_RESULT: {state['web_search_result']}")
     if "ocr_result" in state and state["ocr_result"] is not None:
-        parts.append(f"OCR_RESULT: {state['ocr_result']}")
     if "excel_result" in state and state["excel_result"] is not None:
-        parts.append(f"EXCEL_RESULT: {state['excel_result']}")
-    # If plan already set final_answer, skip calling the LLM again
     if state.get("final_answer") is not None:
         return {"final_answer": state["final_answer"]}
-    parts.append("ASSISTANT: Please provide the final answer now.")
-    print("finalize_node  content problem: ", parts)
-    llm_out = llm(parts).content.strip()
-    print("finalize_node  passed")
-    return {"final_answer": llm_out}
 tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])

 agent = create_react_agent(model=llm, tools=tool_node)
+# ─── Revised plan_node with NO extra arguments ───
 def plan_node(state: AgentState) -> AgentState:
     """
+    Assumes that `state["messages"]` already ends with a HumanMessage of the user’s question.
+    We look at that last HumanMessage, append it to our new history, and ask the LLM
+    to set exactly one key in a Python dict: web_search_query, ocr_path,
+    excel_path (+ excel_sheet_name), or final_answer.
     """
+    # 1) Grab all prior BaseMessage objects (SystemMessage/HumanMessage/AIMessage) from state
     prior_msgs = state.get("messages", [])
+    # 2) Find the very last HumanMessage (the user_input). We assume the last message is one.
+    #    If there is no HumanMessage, we treat user_input as empty.
+    user_input = ""
+    for msg in reversed(prior_msgs):
+        if isinstance(msg, HumanMessage):
+            user_input = msg.content
+            break
+    # 3) Build our new chat history by re‐using prior_msgs. It already includes that HumanMessage.
+    new_history = prior_msgs.copy()
+    # 4) Add a SystemMessage that instructs the LLM how to choose exactly one key
     explanation = SystemMessage(
         content=(
+            "You can set exactly one of the following keys in a Python dict, and nothing else:\n"
             "  • web_search_query: <search terms>  \n"
             "  • ocr_path: <path to an image file>  \n"
             "  • excel_path: <path to a .xlsx file>  \n"
         )
     )
+    # 5) Compose the prompt as a list of BaseMessage, then call the LLM
     prompt_messages = new_history + [explanation]
     llm_response = llm(prompt_messages)
     llm_out = llm_response.content.strip()
+    # 6) Parse the LLM’s output as a dict
     try:
         parsed = eval(llm_out, {}, {})
         if isinstance(parsed, dict):
+            partial: AgentState = {"messages": new_history}
             allowed = {
                 "web_search_query",
                 "ocr_path",
             }
             for k, v in parsed.items():
                 if k in allowed:
+                    partial[k] = v
+            return partial
     except Exception:
         pass
+    # 7) Fallback if parsing failed
     return {
         "messages": new_history,
         "final_answer": "Sorry, I could not parse your intent."
     }
+# ─── Revised finalize_node with NO extra arguments ───
 def finalize_node(state: AgentState) -> AgentState:
     """
+    Assumes that `state['messages']` is a list of BaseMessage, possibly ending in an AIMessage
+    (or plan_node may have set final_answer directly). We append any tool results
+    as SystemMessages, then prompt the LLM for one final answer.
     """
+    # 1) Copy the existing BaseMessage list
+    history = state.get("messages", []).copy()
+    # 2) If any tool-result fields exist, append them as SystemMessages
     if "web_search_result" in state and state["web_search_result"] is not None:
+        history.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {state['web_search_result']}"))
     if "ocr_result" in state and state["ocr_result"] is not None:
+        history.append(SystemMessage(content=f"OCR_RESULT: {state['ocr_result']}"))
     if "excel_result" in state and state["excel_result"] is not None:
+        history.append(SystemMessage(content=f"EXCEL_RESULT: {state['excel_result']}"))
+    # 3) If plan_node already set final_answer, just return it:
     if state.get("final_answer") is not None:
         return {"final_answer": state["final_answer"]}
+    # 4) Otherwise, ask the LLM to give the final answer now
+    history.append(SystemMessage(content="Please provide the final answer now."))
+    llm_response = llm(history)
+    return {"final_answer": llm_response.content.strip()}
 tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])