New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 1, 2025

Commit

0e29657

1 Parent(s): 4eea303

no_json

Browse files

Files changed (2) hide show

app.py +147 -101
tools.py +57 -64

app.py CHANGED Viewed

@@ -16,10 +16,21 @@ from langchain.schema import HumanMessage, AIMessage, SystemMessage
 # Create a ToolNode that knows about your web_search function
 import json
-# (Keep Constan
-#
-#
-# ts as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -29,108 +40,143 @@ tool_node = ToolNode([ocr_image, parse_excel, web_search])
 agent = create_react_agent(model=llm, tools=tool_node)
 # 2) Build a two‐edge graph:
-graph = StateGraph(dict)
-graph.add_node("agent", agent)
-graph.add_edge(START, "agent")
-graph.add_edge("agent", END)
 compiled_graph = graph.compile()
 def respond_to_input(user_input: str) -> str:
-    # 1) Build a SystemMessage that insists on bare JSON if calling a tool
-    system_msg = SystemMessage(
-        content=(
-            "You are an assistant with access to exactly these tools:\n"
-            "  1) web_search(query:str)\n"
-            "  2) parse_excel(path:str,sheet_name:str)\n"
-            "  3) ocr_image(path:str)\n\n"
-            "⚠️ **MANDATORY** ⚠️: If (and only if) you need to call a tool, your entire response MUST be exactly ONE JSON OBJECT and NOTHING ELSE.  \n"
-            "For example, if you want to call web_search, you must respond with exactly:\n"
-            "```json\n"
-            '{"tool":"web_search","query":"Mercedes Sosa studio albums 2000-2009"}\n'
-            "```\n"
-            "That JSON string must start at the very first character of your response and end at the very last character—"
-            "no surrounding quotes, no markdown fences, no explanatory text.  \n\n"
-            "If you do NOT need to call any tool, then you must respond with your final answer as plain text (no JSON)."
-        )
-    )
-    # 2) Initialize state with just that SystemMessage
-    initial_state = {
-        "messages": [
-            system_msg,
-            HumanMessage(content=user_input)
-        ]
-    }
-    # C) FIRST PASS: invoke with only initial_state (no second argument!)
-    try:
-        first_pass = compiled_graph.invoke(initial_state)
-    except Exception as e:
-        print("‼️ ERROR during first invoke:", repr(e))
-        return ""  # return fallback
-    # D) Log the AIMessage(s) from first_pass
-    print("===== AGENT MESSAGES (First Pass) =====")
-    for idx, msg in enumerate(first_pass["messages"]):
-        if isinstance(msg, AIMessage):
-            print(f"[AIMessage #{idx}]: {repr(msg.content)}")
-    print("=========================================")
-    # E) Find the very last AIMessage content
-    last_msg = None
-    for msg in reversed(first_pass["messages"]):
-        if isinstance(msg, AIMessage):
-            last_msg = msg.content
-            break
-    # F) Attempt to parse last_msg as JSON for a tool call (inline, no parse_tool_json)
-    tool_dict = None
-    t = (last_msg or "").strip()
-    if (t.startswith('"') and t.endswith('"')) or (t.startswith("'") and t.endswith("'")):
-        t = t[1:-1]
-    try:
-        obj = json.loads(t)
-        if isinstance(obj, dict) and "tool" in obj:
-            tool_dict = obj
-    except Exception:
-        tool_dict = None
-    if tool_dict:
-        # G) If valid JSON, run the tool
-        print(">> Parsed tool call:", tool_dict)
-        tool_result = tool_node.run(tool_dict)
-        print(f">> Tool '{tool_dict['tool']}' returned: {repr(tool_result)}")
-        # H) SECOND PASS: feed the tool's output back in as an AIMessage,
-        #    with no new human input
-        continuation_state = {
-            "messages": [
-                *first_pass["messages"],
-                AIMessage(content=tool_result)
-            ]
-        }
-        try:
-            second_pass = compiled_graph.invoke(continuation_state)
-        except Exception as e2:
-            print("‼️ ERROR during second invoke:", repr(e2))
-            return ""
-        # I) Log second_pass AIMessage(s)
-        print("===== AGENT MESSAGES (Second Pass) =====")
-        for idx, msg in enumerate(second_pass["messages"]):
-            if isinstance(msg, AIMessage):
-                print(f"[AIMessage2 #{idx}]: {repr(msg.content)}")
-        print("=========================================")
-        # J) Return the final AIMessage from second_pass
-        for msg in reversed(second_pass["messages"]):
-            if isinstance(msg, AIMessage):
-                return msg.content or ""
-        return ""
-    else:
-        # K) If not JSON → treat last_msg as plain text final answer
-        return last_msg or ""
 class BasicAgent:
     def __init__(self):

 # Create a ToolNode that knows about your web_search function
 import json
+from typing import TypedDict, Annotated
+class AgentState(TypedDict, total=False):
+    messages: Annotated[list, add_messages]
+    # Fields that the agent node can set to request a tool
+    web_search_query: str
+    ocr_path: str
+    excel_path: str
+    excel_sheet_name: str
+    # Fields to hold the tool outputs
+    web_search_result: str
+    ocr_result: str
+    excel_result: str
+    # A “final_answer” field that the last agent node will fill
+    final_answer: str# (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 agent = create_react_agent(model=llm, tools=tool_node)
 # 2) Build a two‐edge graph:
+def plan_node(state: AgentState, user_input: str) -> AgentState:
+    """
+    Reads state['messages'] + user_input and decides:
+      • If it needs to call web_search, set state['web_search_query'] to a query.
+      • Else if it needs to call ocr, set state['ocr_path'] to the image path.
+      • Else if it needs Excel, set state['excel_path'] and 'excel_sheet_name'.
+      • Otherwise, set state['final_answer'] to a plain text answer.
+    We also append user_input to state['messages'] so the LLM sees the full history.
+    """
+    # 4.a) Grab prior chat history, append user_input:
+    prior = state.get("messages", [])
+    chat_history = prior + [f"USER: {user_input}"]
+    # 4.b) Send that to the LLM with a prompt explaining the new schema:
+    prompt = chat_history + [
+        "ASSISTANT: You can set one of the following keys:\n"
+        "  • web_search_query: <string>  \n"
+        "  • ocr_path: <path>  \n"
+        "  • excel_path: <path>  \n"
+        "  • excel_sheet_name: <sheet>  \n"
+        "Or, if no tool is needed, set final_answer: <your answer>.\n"
+        "Respond with a Python‐dict literal that contains exactly one of those keys.\n"
+        "Example: {'web_search_query':'Mercedes Sosa discography'}\n"
+        "No additional text!"
+    ]
+    llm_out = llm(prompt).content.strip()
+    # 4.c) Try to eval as a Python dict:
+    try:
+        parsed = eval(llm_out, {}, {})  # trust that user obeyed instructions
+        if isinstance(parsed, dict):
+            # Only keep recognized keys, ignore anything else
+            new_state: AgentState = {"messages": chat_history}
+            allowed = {
+                "web_search_query",
+                "ocr_path",
+                "excel_path",
+                "excel_sheet_name",
+                "final_answer"
+            }
+            for k, v in parsed.items():
+                if k in allowed:
+                    new_state[k] = v
+            return new_state
+    except Exception:
+        pass
+    # 4.d) If parsing failed, or they returned something else, set a fallback
+    return {
+        "messages": chat_history,
+        "final_answer": "Sorry, I could not parse your intent."
+    }
+# ─── 5) Define “finalize” node: compose the final answer using any tool results ───
+def finalize_node(state: AgentState) -> AgentState:
+    """
+    By this point:
+      - state['messages'] contains the chat history (ending with how we requested a tool).
+      - One or more of web_search_result, ocr_result, excel_result might be filled.
+      - Or, state['final_answer'] is already set, meaning no tool was needed.
+    We ask the LLM to produce a final text answer.
+    """
+    # 5.a) Build a prompt listing any tool results:
+    parts = state.get("messages", [])
+    if "web_search_result" in state and state["web_search_result"] is not None:
+        parts.append(f"WEB_SEARCH_RESULT: {state['web_search_result']}")
+    if "ocr_result" in state and state["ocr_result"] is not None:
+        parts.append(f"OCR_RESULT: {state['ocr_result']}")
+    if "excel_result" in state and state["excel_result"] is not None:
+        parts.append(f"EXCEL_RESULT: {state['excel_result']}")
+    parts.append("ASSISTANT: Please provide the final answer now.")
+    llm_out = llm(parts).content.strip()
+    return {"final_answer": llm_out}
+graph = StateGraph(AgentState)
+# 6.a) Register nodes in order:
+graph.add_node("plan", plan_node)
+graph.add_node("tools", tool_node)
+graph.add_node("finalize", finalize_node)
+# 6.b) START → "plan"
+graph.add_edge(START, "plan")
+# 6.c) If plan_node sets a tool‐query key, go to "tools"; otherwise go to "finalize".
+def route_plan(state: AgentState, plan_out: AgentState) -> str:
+    # If plan_node placed a "web_search_query", "ocr_path", or "excel_path", go to tools.
+    # (Note: plan_out already replaced state["messages"])
+    if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
+        return "tools"
+    return "finalize"
+graph.add_conditional_edges(
+    "plan",
+    route_plan,
+    {"tools": "tools", "finalize": "finalize"}
+)
+def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
+    """
+    When a tool‐wrapper returns, it has already consumed the relevant key
+    (e.g. set web_search_query back to None) and added tool_result.
+    We just merge that into state.
+    """
+    new_state = {**state, **tool_out}
+    return new_state
+graph.add_edge("tools", "finalize", run_tools)
+# 6.e) "finalize" → END
+graph.add_edge("finalize", END)
 compiled_graph = graph.compile()
+# ─── 7) Define respond_to_input that drives the graph ───
 def respond_to_input(user_input: str) -> str:
+    # On first turn, messages=[], no query keys set.
+    initial_state: AgentState = {"messages": []}
+    final_state = compiled_graph.invoke(initial_state, user_input)
+    # final_state should have 'final_answer'
+    return final_state.get("final_answer", "Error: No final answer generated.")
 class BasicAgent:
     def __init__(self):

tools.py CHANGED Viewed

@@ -1,78 +1,71 @@
-from langchain_core.tools import tool
-from langchain_community.tools import DuckDuckGoSearchRun
-import pandas as pd
-@tool
-def web_search(query: str) -> str:
-    """
-    Search the web for information.
-    Args:
-        query: The query to search the web for.
-    Returns:
-        The search results.
-    """
-    print(f"Reached: web_search: {query}")
-    ddg = DuckDuckGoSearchRun()
-    return ddg.run(query)
-@tool
-def parse_excel(path: str, sheet_name: str = None) -> str:
-    """
-    Read in an Excel file at `path`, optionally select a sheet by name (or default to the first sheet),
-    then convert the DataFrame to a JSON-like string. Return that text so the LLM can reason over it.
-    Example return value (collapsed):
-      "[{'Name': 'Alice', 'Score': 95}, {'Name': 'Bob', 'Score': 88}, ...]"
-    """
-    # 1. Load the Excel workbook
-    print(f"Reached: parse_excel: {path} {sheet_name}")
-    try:
-        xls = pd.ExcelFile(path)
-    except FileNotFoundError:
-        return f"Error: could not find file at {path}."
-    # 2. Choose the sheet
-    if sheet_name and sheet_name in xls.sheet_names:
-        df = pd.read_excel(xls, sheet_name=sheet_name)
-    else:
-        # default to first sheet
-        df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
-    # 3. Option A: convert to JSON
-    records = df.to_dict(orient="records")
-    return str(records)
 # tools.py
 from pathlib import Path
 from PIL import Image
 import pytesseract
-@tool
-def ocr_image(path: str) -> str:
     """
-    Run OCR on the image at `path` and return the extracted text.
-    - Expects that Tesseract is installed on the host machine.
-    - If the file is missing or unreadable, returns an error string.
     """
-    print(f"Reached: ocr_image: {path}")
-    file = Path(path)
-    if not file.exists():
-        return f"Error: could not find image at {path}"
     try:
-        # Open image via PIL
-        img = Image.open(file)
     except Exception as e:
-        return f"Error: could not open image: {e}"
     try:
-        # Run pytesseract OCR
-        text = pytesseract.image_to_string(img)
     except Exception as e:
-        return f"Error: OCR failed: {e}"
-    return text.strip() or "(no visible text detected)"

 # tools.py
+import pandas as pd
+from langchain_community.tools import DuckDuckGoSearchRun
 from pathlib import Path
 from PIL import Image
 import pytesseract
+def web_search_tool(state: AgentState) -> AgentState:
+    """
+    Expects: state["web_search_query"] is a non‐empty string.
+    Returns: {"web_search_query": None, "web_search_result": <string>}
+    We also clear web_search_query so we don’t loop forever.
+    """
+    query = state.get("web_search_query", "")
+    if not query:
+        return {}  # nothing to do
+    # Run DuckDuckGo
+    ddg = DuckDuckGoSearchRun()
+    result_text = ddg.run(query)
+    return {
+        "web_search_query": None,
+        "web_search_result": result_text
+    }
+def ocr_image_tool(state: AgentState) -> AgentState:
     """
+    Expects: state["ocr_path"] is a path to an image file.
+    Returns: {"ocr_path": None, "ocr_result": <string>}.
     """
+    path = state.get("ocr_path", "")
+    if not path:
+        return {}
     try:
+        img = Image.open(path)
+        text = pytesseract.image_to_string(img)
+        text = text.strip() or "(no visible text)"
     except Exception as e:
+        text = f"Error during OCR: {e}"
+    return {
+        "ocr_path": None,
+        "ocr_result": text
+    }
+def parse_excel_tool(state: AgentState) -> AgentState:
+    """
+    Expects: state["excel_path"] is a path to an .xlsx file,
+             and state["excel_sheet_name"] optionally names a sheet.
+    Returns: {"excel_path": None, "excel_sheet_name": None, "excel_result": <string>}.
+    """
+    path = state.get("excel_path", "")
+    sheet = state.get("excel_sheet_name", "")
+    if not path:
+        return {}
     try:
+        xls = pd.ExcelFile(path)
+        if sheet and sheet in xls.sheet_names:
+            df = pd.read_excel(xls, sheet_name=sheet)
+        else:
+            df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
+        records = df.to_dict(orient="records")
+        text = str(records)
     except Exception as e:
+        text = f"Error reading Excel: {e}"
+    return {
+        "excel_path": None,
+        "excel_sheet_name": None,
+        "excel_result": text
+    }