File size: 9,107 Bytes
c1b893b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5555a89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1b893b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5555a89
c1b893b
 
 
 
 
 
 
 
5555a89
 
 
 
 
 
 
 
c1b893b
5555a89
 
 
 
 
 
 
c1b893b
5555a89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1b893b
 
5555a89
c1b893b
5555a89
c1b893b
 
 
5555a89
c1b893b
5555a89
 
c1b893b
5555a89
 
 
 
c1b893b
5555a89
c1b893b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
from langchain.chat_models import init_chat_model
from urllib.parse import urlparse
import os
import base64
import pandas as pd
import requests
from io import BytesIO, StringIO
from typing import Annotated
from tools import *
from typing_extensions import TypedDict

from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain_core.messages import SystemMessage

from langgraph.prebuilt import ToolNode, tools_condition


system_prompt = """You are a general AI assistant. I will ask you a question.

You must:
1. Think step-by-step (invisibly to the user).
2. End your visible answer with the final answer only β€” nothing else.

Rules for the final answer:
- If the answer is a number:
  β€’ No commas in the number.
  β€’ No units (e.g., $, %, km) unless the question explicitly asks for them.
- If the answer is a string:
  β€’ No articles ("a", "an", "the").
  β€’ No abbreviations (e.g., for city names).
  β€’ Write digits as plain words unless instructed otherwise.
- If the answer is a comma-separated list:
  β€’ Apply the above rules individually to each element.

IMPORTANT:
- Do not add any extra words before or after the final answer.
- Do not explain your reasoning to the user β€” keep it hidden.
- The output must be exactly the final answer following the above rules.

Examples:
Q: Who wrote the novel 1984?
A: George Orwell

Q: How many plays did Shakespeare write?
A: 38
"""


class State(TypedDict):
    messages: Annotated[list, add_messages]
    uploaded_filename: str
    uploaded_file: str

def _is_url(path_or_url: str) -> bool:
    try:
        result = urlparse(path_or_url)
        return result.scheme in ("http", "https")
    except:
        return False

_ARTICLES = {"a", "an", "the"}

def _sanitize_visible_answer(text: str) -> str:
    """Keep a single-line final answer; strip quotes and leftover tags."""
    if not text:
        return ""
    t = text.strip()

    if (t.startswith('"') and t.endswith('"')) or (t.startswith("'") and t.endswith("'")):
        t = t[1:-1].strip()

    lines = [ln.strip() for ln in t.splitlines() if ln.strip()]
    if lines:
        t = lines[-1]

    t = t.replace("[YOUR FINAL ANSWER]", "").strip()
    t = t.replace("Final answer: ", "").strip()


    t = re.sub(r"\s+", " ", t)
    t = re.sub(r"<[^>]*>", "", t)

    return t


def _is_number_token(s: str) -> bool:
    return bool(re.fullmatch(r"-?\d+(\.\d+)?", s))


def _has_units(s: str) -> bool:
    return bool(re.search(r"\d\s*[A-Za-z%$]", s))


def _has_commas_in_number(s: str) -> bool:
    return bool(re.search(r"\d,\d", s))


def _starts_with_article(s: str) -> bool:
    toks = re.split(r"[,\s]+", s.strip())
    return bool(toks) and toks[0].lower() in _ARTICLES


def _is_valid_final_answer(ans: str) -> bool:
    """Validate against your rules:
       - single line, non-empty
       - if numeric β†’ no commas, no units
       - if list β†’ each element validated as number or string
       - string β†’ no leading article
    """
    if not ans or "\n" in ans:
        return False

    if "," in ans:
        parts = [p.strip() for p in ans.split(",")]
        if any(not p for p in parts):
            return False
        for p in parts:
            if re.fullmatch(r".*\d.*", p):  # contains a digit β†’ treat as a number-like
                if not _is_number_token(p):
                    return False
                if _has_commas_in_number(p):
                    return False
                if _has_units(p):
                    return False
            else:
                if _starts_with_article(p):
                    return False
        return True

    if re.fullmatch(r".*\d.*", ans):  # number-like
        if not _is_number_token(ans):
            return False
        if _has_commas_in_number(ans):
            return False
        if _has_units(ans):
            return False
        return True
    else:
        if _starts_with_article(ans):
            return False
        return True


def _process_uploaded_file(file_name: str, file_path: str) -> str:
    """Process a single local file or file URL and return context for the question."""
    try:
        if _is_url(file_path):
            response = requests.get(file_path)
            response.raise_for_status()

            file_ext = os.path.splitext(file_name)[1].lower()
            content_bytes = response.content

            if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
                return f"[UPLOADED IMAGE: {file_name}] - URL: {file_path}"

            elif file_ext in ['.txt', '.md', '.py', '.js', '.html', '.css', '.json', '.xml']:
                content_text = content_bytes.decode('utf-8')
                return f"[Code Content:\n{content_text}"

            elif file_ext == '.csv':
                df = pd.read_csv(StringIO(content_bytes.decode('utf-8')))
                return f"[UPLOADED CSV FILE: {file_name}] : {df}"

            elif file_ext in ['.xlsx', '.xls']:
                df = pd.read_excel(BytesIO(content_bytes))
                return f"[EXCEL FILE DATAFRAME: {df}"

            else:
                return f"[UPLOADED FILE: {file_name}] - URL: {file_path}"

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return f"[ERROR PROCESSING FILE: {os.path.basename(file_path)}] - {str(e)}"

def build_and_compile():
    graph_builder = StateGraph(State)
    tools = [
    web_search,
    wiki_search,
    academic_search,
    python_code,
    image_info,
    read_mp3_transcript,
    ocr_image,
    math_solver,
    plot_data_tool,
    unit_converter,
    date_time_calculator,
    api_request_tool,
    html_table_extractor,
    multiply,
    add,
    subtract,
    divide,
    modulus,
    power,
    square_root
    ]


    llm = init_chat_model("openai:gpt-4.1-mini",temperature=0, seed=42)
    llm_with_tools = llm.bind_tools(tools)
    final_llm = llm.bind(response_format={"type": "json_object"})

    def chatbot(state: State):
        file_context = ""
        if "uploaded_file" in state and state["uploaded_file"]:
            file_context = "\n\nAdditional file context:\n" + _process_uploaded_file(file_name=state["uploaded_filename"],file_path=state["uploaded_file"])
        final_prompt = system_prompt + file_context
        return {"messages": [llm_with_tools.invoke([SystemMessage(final_prompt)] + state["messages"])]}

    def validator(state: State):
        """
        Ensure the last assistant message is a valid final answer per system rules.
        If invalid, rewrite once with final_llm (JSON) and output only final_answer.
        """
        # Get last assistant message text
        last = state["messages"][-1]
        text = getattr(last, "content", "") or str(last)

        # 1) sanitize
        clean = _sanitize_visible_answer(text)

        # 2) validate
        if _is_valid_final_answer(clean):
            # Replace the last message with the sanitized one-line answer
            return {"messages": [{"role": "assistant", "content": clean}]}

        # 3) one-shot fixer pass (no tools, JSON enforced)
        fix_instruction = (
            "Rewrite the final answer to comply with these rules:\n"
            "- Output only the final answer (single line), no extra words.\n"
            "- Numbers should always be expressed as digits.\n"
            "- If number: no commas, no units.\n"
            "- If string: no leading articles ('a','an','the'); no abbreviations.\n"
            "- If list: comma-separated; apply the same rules to each element.\n\n"
            "Return JSON: {\"final_answer\": \"...\"}."
        )
        msgs = [
            SystemMessage(system_prompt),
            {"role": "user", "content": fix_instruction + f"\n\nOriginal answer:\n{clean}"}
        ]
        fixed = final_llm.invoke(msgs)
        fixed_text = str(getattr(fixed, "content", "") or "").strip()
        try:
            obj = json.loads(fixed_text)
            fa = (obj.get("final_answer") or "").strip()
        except Exception:
            # fallback: keep sanitized original if JSON parsing fails
            fa = clean

        fa = _sanitize_visible_answer(fa)
        if not _is_valid_final_answer(fa):
            # last resort: keep last line of whatever we have
            fa = (fa or clean).splitlines()[-1].strip()

        return {"messages": [{"role": "assistant", "content": fa}]}

    graph_builder.add_node("chatbot", chatbot)
    tool_node = ToolNode(tools=tools)
    graph_builder.add_node("tools", tool_node)
    graph_builder.add_node("validator", validator)

    # If the model wants to call tools β†’ go to tools; else β†’ go to validator
    graph_builder.add_conditional_edges(
        "chatbot",
        tools_condition,
        {"tools": "tools", "__end__": "validator"},
    )

    # After tools run, go back to chatbot
    graph_builder.add_edge("tools", "chatbot")

    # After validator, we are done
    graph_builder.add_edge("validator", END)

    graph_builder.add_edge(START, "chatbot")

    graph = graph_builder.compile()
    return graph