Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,32 +27,32 @@ llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
|
|
| 27 |
|
| 28 |
agent = create_react_agent(model=llm, tools=tool_node)
|
| 29 |
|
|
|
|
| 30 |
def plan_node(state: AgentState) -> AgentState:
|
| 31 |
"""
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
We append a new HumanMessage(user_input) to messages, then ask the LLM
|
| 37 |
-
(via ChatOpenAI) to return exactly one key: web_search_query, ocr_path,
|
| 38 |
-
excel_path (with excel_sheet_name), or final_answer. The LLM must reply
|
| 39 |
-
with a bare Python‐dict literal.
|
| 40 |
-
|
| 41 |
-
We then return a new partial AgentState that always includes an updated
|
| 42 |
-
"messages" list plus exactly one of those tool‐request keys (or final_answer).
|
| 43 |
"""
|
| 44 |
-
# 1)
|
| 45 |
-
user_input = state.get("user_input", "")
|
| 46 |
-
# 2) Grab prior chat history, which should already be a list of BaseMessage
|
| 47 |
prior_msgs = state.get("messages", [])
|
| 48 |
-
# 3) Append the new user message as a HumanMessage
|
| 49 |
-
new_history = prior_msgs + [HumanMessage(content=user_input)]
|
| 50 |
|
| 51 |
-
#
|
| 52 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
explanation = SystemMessage(
|
| 54 |
content=(
|
| 55 |
-
"You can set exactly one of the following keys
|
| 56 |
" • web_search_query: <search terms> \n"
|
| 57 |
" • ocr_path: <path to an image file> \n"
|
| 58 |
" • excel_path: <path to a .xlsx file> \n"
|
|
@@ -63,20 +63,16 @@ def plan_node(state: AgentState) -> AgentState:
|
|
| 63 |
)
|
| 64 |
)
|
| 65 |
|
| 66 |
-
# 5)
|
| 67 |
prompt_messages = new_history + [explanation]
|
| 68 |
-
|
| 69 |
-
# 6) Call the LLM. Because prompt_messages is a list of BaseMessage,
|
| 70 |
-
# ChatOpenAI will return an AIMessage.
|
| 71 |
llm_response = llm(prompt_messages)
|
| 72 |
llm_out = llm_response.content.strip()
|
| 73 |
|
| 74 |
-
#
|
| 75 |
try:
|
| 76 |
parsed = eval(llm_out, {}, {})
|
| 77 |
if isinstance(parsed, dict):
|
| 78 |
-
|
| 79 |
-
new_state: AgentState = {"messages": new_history}
|
| 80 |
allowed = {
|
| 81 |
"web_search_query",
|
| 82 |
"ocr_path",
|
|
@@ -86,41 +82,44 @@ def plan_node(state: AgentState) -> AgentState:
|
|
| 86 |
}
|
| 87 |
for k, v in parsed.items():
|
| 88 |
if k in allowed:
|
| 89 |
-
|
| 90 |
-
return
|
| 91 |
except Exception:
|
| 92 |
pass
|
| 93 |
|
| 94 |
-
#
|
| 95 |
return {
|
| 96 |
"messages": new_history,
|
| 97 |
"final_answer": "Sorry, I could not parse your intent."
|
| 98 |
}
|
| 99 |
|
| 100 |
|
| 101 |
-
# ───
|
| 102 |
def finalize_node(state: AgentState) -> AgentState:
|
| 103 |
"""
|
| 104 |
-
|
| 105 |
-
|
|
|
|
| 106 |
"""
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
| 108 |
if "web_search_result" in state and state["web_search_result"] is not None:
|
| 109 |
-
|
| 110 |
if "ocr_result" in state and state["ocr_result"] is not None:
|
| 111 |
-
|
| 112 |
if "excel_result" in state and state["excel_result"] is not None:
|
| 113 |
-
|
| 114 |
-
|
|
|
|
| 115 |
if state.get("final_answer") is not None:
|
| 116 |
return {"final_answer": state["final_answer"]}
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
return {"final_answer": llm_out}
|
| 123 |
-
|
| 124 |
|
| 125 |
tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
|
| 126 |
|
|
|
|
| 27 |
|
| 28 |
agent = create_react_agent(model=llm, tools=tool_node)
|
| 29 |
|
| 30 |
+
# ─── Revised plan_node with NO extra arguments ───
|
| 31 |
def plan_node(state: AgentState) -> AgentState:
|
| 32 |
"""
|
| 33 |
+
Assumes that `state["messages"]` already ends with a HumanMessage of the user’s question.
|
| 34 |
+
We look at that last HumanMessage, append it to our new history, and ask the LLM
|
| 35 |
+
to set exactly one key in a Python dict: web_search_query, ocr_path,
|
| 36 |
+
excel_path (+ excel_sheet_name), or final_answer.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
+
# 1) Grab all prior BaseMessage objects (SystemMessage/HumanMessage/AIMessage) from state
|
|
|
|
|
|
|
| 39 |
prior_msgs = state.get("messages", [])
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
# 2) Find the very last HumanMessage (the user_input). We assume the last message is one.
|
| 42 |
+
# If there is no HumanMessage, we treat user_input as empty.
|
| 43 |
+
user_input = ""
|
| 44 |
+
for msg in reversed(prior_msgs):
|
| 45 |
+
if isinstance(msg, HumanMessage):
|
| 46 |
+
user_input = msg.content
|
| 47 |
+
break
|
| 48 |
+
|
| 49 |
+
# 3) Build our new chat history by re‐using prior_msgs. It already includes that HumanMessage.
|
| 50 |
+
new_history = prior_msgs.copy()
|
| 51 |
+
|
| 52 |
+
# 4) Add a SystemMessage that instructs the LLM how to choose exactly one key
|
| 53 |
explanation = SystemMessage(
|
| 54 |
content=(
|
| 55 |
+
"You can set exactly one of the following keys in a Python dict, and nothing else:\n"
|
| 56 |
" • web_search_query: <search terms> \n"
|
| 57 |
" • ocr_path: <path to an image file> \n"
|
| 58 |
" • excel_path: <path to a .xlsx file> \n"
|
|
|
|
| 63 |
)
|
| 64 |
)
|
| 65 |
|
| 66 |
+
# 5) Compose the prompt as a list of BaseMessage, then call the LLM
|
| 67 |
prompt_messages = new_history + [explanation]
|
|
|
|
|
|
|
|
|
|
| 68 |
llm_response = llm(prompt_messages)
|
| 69 |
llm_out = llm_response.content.strip()
|
| 70 |
|
| 71 |
+
# 6) Parse the LLM’s output as a dict
|
| 72 |
try:
|
| 73 |
parsed = eval(llm_out, {}, {})
|
| 74 |
if isinstance(parsed, dict):
|
| 75 |
+
partial: AgentState = {"messages": new_history}
|
|
|
|
| 76 |
allowed = {
|
| 77 |
"web_search_query",
|
| 78 |
"ocr_path",
|
|
|
|
| 82 |
}
|
| 83 |
for k, v in parsed.items():
|
| 84 |
if k in allowed:
|
| 85 |
+
partial[k] = v
|
| 86 |
+
return partial
|
| 87 |
except Exception:
|
| 88 |
pass
|
| 89 |
|
| 90 |
+
# 7) Fallback if parsing failed
|
| 91 |
return {
|
| 92 |
"messages": new_history,
|
| 93 |
"final_answer": "Sorry, I could not parse your intent."
|
| 94 |
}
|
| 95 |
|
| 96 |
|
| 97 |
+
# ─── Revised finalize_node with NO extra arguments ───
|
| 98 |
def finalize_node(state: AgentState) -> AgentState:
|
| 99 |
"""
|
| 100 |
+
Assumes that `state['messages']` is a list of BaseMessage, possibly ending in an AIMessage
|
| 101 |
+
(or plan_node may have set final_answer directly). We append any tool results
|
| 102 |
+
as SystemMessages, then prompt the LLM for one final answer.
|
| 103 |
"""
|
| 104 |
+
# 1) Copy the existing BaseMessage list
|
| 105 |
+
history = state.get("messages", []).copy()
|
| 106 |
+
|
| 107 |
+
# 2) If any tool-result fields exist, append them as SystemMessages
|
| 108 |
if "web_search_result" in state and state["web_search_result"] is not None:
|
| 109 |
+
history.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {state['web_search_result']}"))
|
| 110 |
if "ocr_result" in state and state["ocr_result"] is not None:
|
| 111 |
+
history.append(SystemMessage(content=f"OCR_RESULT: {state['ocr_result']}"))
|
| 112 |
if "excel_result" in state and state["excel_result"] is not None:
|
| 113 |
+
history.append(SystemMessage(content=f"EXCEL_RESULT: {state['excel_result']}"))
|
| 114 |
+
|
| 115 |
+
# 3) If plan_node already set final_answer, just return it:
|
| 116 |
if state.get("final_answer") is not None:
|
| 117 |
return {"final_answer": state["final_answer"]}
|
| 118 |
|
| 119 |
+
# 4) Otherwise, ask the LLM to give the final answer now
|
| 120 |
+
history.append(SystemMessage(content="Please provide the final answer now."))
|
| 121 |
+
llm_response = llm(history)
|
| 122 |
+
return {"final_answer": llm_response.content.strip()}
|
|
|
|
|
|
|
| 123 |
|
| 124 |
tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
|
| 125 |
|