Spaces:
Sleeping
Sleeping
File size: 9,107 Bytes
c1b893b 5555a89 c1b893b 5555a89 c1b893b 5555a89 c1b893b 5555a89 c1b893b 5555a89 c1b893b 5555a89 c1b893b 5555a89 c1b893b 5555a89 c1b893b 5555a89 c1b893b 5555a89 c1b893b 5555a89 c1b893b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 |
from langchain.chat_models import init_chat_model
from urllib.parse import urlparse
import os
import base64
import pandas as pd
import requests
from io import BytesIO, StringIO
from typing import Annotated
from tools import *
from typing_extensions import TypedDict
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain_core.messages import SystemMessage
from langgraph.prebuilt import ToolNode, tools_condition
system_prompt = """You are a general AI assistant. I will ask you a question.
You must:
1. Think step-by-step (invisibly to the user).
2. End your visible answer with the final answer only β nothing else.
Rules for the final answer:
- If the answer is a number:
β’ No commas in the number.
β’ No units (e.g., $, %, km) unless the question explicitly asks for them.
- If the answer is a string:
β’ No articles ("a", "an", "the").
β’ No abbreviations (e.g., for city names).
β’ Write digits as plain words unless instructed otherwise.
- If the answer is a comma-separated list:
β’ Apply the above rules individually to each element.
IMPORTANT:
- Do not add any extra words before or after the final answer.
- Do not explain your reasoning to the user β keep it hidden.
- The output must be exactly the final answer following the above rules.
Examples:
Q: Who wrote the novel 1984?
A: George Orwell
Q: How many plays did Shakespeare write?
A: 38
"""
class State(TypedDict):
messages: Annotated[list, add_messages]
uploaded_filename: str
uploaded_file: str
def _is_url(path_or_url: str) -> bool:
try:
result = urlparse(path_or_url)
return result.scheme in ("http", "https")
except:
return False
_ARTICLES = {"a", "an", "the"}
def _sanitize_visible_answer(text: str) -> str:
"""Keep a single-line final answer; strip quotes and leftover tags."""
if not text:
return ""
t = text.strip()
if (t.startswith('"') and t.endswith('"')) or (t.startswith("'") and t.endswith("'")):
t = t[1:-1].strip()
lines = [ln.strip() for ln in t.splitlines() if ln.strip()]
if lines:
t = lines[-1]
t = t.replace("[YOUR FINAL ANSWER]", "").strip()
t = t.replace("Final answer: ", "").strip()
t = re.sub(r"\s+", " ", t)
t = re.sub(r"<[^>]*>", "", t)
return t
def _is_number_token(s: str) -> bool:
return bool(re.fullmatch(r"-?\d+(\.\d+)?", s))
def _has_units(s: str) -> bool:
return bool(re.search(r"\d\s*[A-Za-z%$]", s))
def _has_commas_in_number(s: str) -> bool:
return bool(re.search(r"\d,\d", s))
def _starts_with_article(s: str) -> bool:
toks = re.split(r"[,\s]+", s.strip())
return bool(toks) and toks[0].lower() in _ARTICLES
def _is_valid_final_answer(ans: str) -> bool:
"""Validate against your rules:
- single line, non-empty
- if numeric β no commas, no units
- if list β each element validated as number or string
- string β no leading article
"""
if not ans or "\n" in ans:
return False
if "," in ans:
parts = [p.strip() for p in ans.split(",")]
if any(not p for p in parts):
return False
for p in parts:
if re.fullmatch(r".*\d.*", p): # contains a digit β treat as a number-like
if not _is_number_token(p):
return False
if _has_commas_in_number(p):
return False
if _has_units(p):
return False
else:
if _starts_with_article(p):
return False
return True
if re.fullmatch(r".*\d.*", ans): # number-like
if not _is_number_token(ans):
return False
if _has_commas_in_number(ans):
return False
if _has_units(ans):
return False
return True
else:
if _starts_with_article(ans):
return False
return True
def _process_uploaded_file(file_name: str, file_path: str) -> str:
"""Process a single local file or file URL and return context for the question."""
try:
if _is_url(file_path):
response = requests.get(file_path)
response.raise_for_status()
file_ext = os.path.splitext(file_name)[1].lower()
content_bytes = response.content
if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
return f"[UPLOADED IMAGE: {file_name}] - URL: {file_path}"
elif file_ext in ['.txt', '.md', '.py', '.js', '.html', '.css', '.json', '.xml']:
content_text = content_bytes.decode('utf-8')
return f"[Code Content:\n{content_text}"
elif file_ext == '.csv':
df = pd.read_csv(StringIO(content_bytes.decode('utf-8')))
return f"[UPLOADED CSV FILE: {file_name}] : {df}"
elif file_ext in ['.xlsx', '.xls']:
df = pd.read_excel(BytesIO(content_bytes))
return f"[EXCEL FILE DATAFRAME: {df}"
else:
return f"[UPLOADED FILE: {file_name}] - URL: {file_path}"
except Exception as e:
print(f"Error processing file {file_path}: {e}")
return f"[ERROR PROCESSING FILE: {os.path.basename(file_path)}] - {str(e)}"
def build_and_compile():
graph_builder = StateGraph(State)
tools = [
web_search,
wiki_search,
academic_search,
python_code,
image_info,
read_mp3_transcript,
ocr_image,
math_solver,
plot_data_tool,
unit_converter,
date_time_calculator,
api_request_tool,
html_table_extractor,
multiply,
add,
subtract,
divide,
modulus,
power,
square_root
]
llm = init_chat_model("openai:gpt-4.1-mini",temperature=0, seed=42)
llm_with_tools = llm.bind_tools(tools)
final_llm = llm.bind(response_format={"type": "json_object"})
def chatbot(state: State):
file_context = ""
if "uploaded_file" in state and state["uploaded_file"]:
file_context = "\n\nAdditional file context:\n" + _process_uploaded_file(file_name=state["uploaded_filename"],file_path=state["uploaded_file"])
final_prompt = system_prompt + file_context
return {"messages": [llm_with_tools.invoke([SystemMessage(final_prompt)] + state["messages"])]}
def validator(state: State):
"""
Ensure the last assistant message is a valid final answer per system rules.
If invalid, rewrite once with final_llm (JSON) and output only final_answer.
"""
# Get last assistant message text
last = state["messages"][-1]
text = getattr(last, "content", "") or str(last)
# 1) sanitize
clean = _sanitize_visible_answer(text)
# 2) validate
if _is_valid_final_answer(clean):
# Replace the last message with the sanitized one-line answer
return {"messages": [{"role": "assistant", "content": clean}]}
# 3) one-shot fixer pass (no tools, JSON enforced)
fix_instruction = (
"Rewrite the final answer to comply with these rules:\n"
"- Output only the final answer (single line), no extra words.\n"
"- Numbers should always be expressed as digits.\n"
"- If number: no commas, no units.\n"
"- If string: no leading articles ('a','an','the'); no abbreviations.\n"
"- If list: comma-separated; apply the same rules to each element.\n\n"
"Return JSON: {\"final_answer\": \"...\"}."
)
msgs = [
SystemMessage(system_prompt),
{"role": "user", "content": fix_instruction + f"\n\nOriginal answer:\n{clean}"}
]
fixed = final_llm.invoke(msgs)
fixed_text = str(getattr(fixed, "content", "") or "").strip()
try:
obj = json.loads(fixed_text)
fa = (obj.get("final_answer") or "").strip()
except Exception:
# fallback: keep sanitized original if JSON parsing fails
fa = clean
fa = _sanitize_visible_answer(fa)
if not _is_valid_final_answer(fa):
# last resort: keep last line of whatever we have
fa = (fa or clean).splitlines()[-1].strip()
return {"messages": [{"role": "assistant", "content": fa}]}
graph_builder.add_node("chatbot", chatbot)
tool_node = ToolNode(tools=tools)
graph_builder.add_node("tools", tool_node)
graph_builder.add_node("validator", validator)
# If the model wants to call tools β go to tools; else β go to validator
graph_builder.add_conditional_edges(
"chatbot",
tools_condition,
{"tools": "tools", "__end__": "validator"},
)
# After tools run, go back to chatbot
graph_builder.add_edge("tools", "chatbot")
# After validator, we are done
graph_builder.add_edge("validator", END)
graph_builder.add_edge(START, "chatbot")
graph = graph_builder.compile()
return graph
|