Update app.py
Browse files
app.py
CHANGED
|
@@ -75,7 +75,8 @@ class AgentState(TypedDict):
|
|
| 75 |
label: str
|
| 76 |
context: str
|
| 77 |
answer: str
|
| 78 |
-
task_id: str | None
|
|
|
|
| 79 |
|
| 80 |
|
| 81 |
|
|
@@ -161,61 +162,43 @@ def route_question(state: AgentState) -> AgentState:
|
|
| 161 |
|
| 162 |
def call_tools(state: AgentState) -> AgentState:
|
| 163 |
question, label, task_id = state["question"], state["label"], state["task_id"]
|
|
|
|
| 164 |
|
| 165 |
-
|
| 166 |
-
matched_obj = re.search(matched_pattern, question)
|
| 167 |
|
| 168 |
-
# ---- attachment
|
| 169 |
-
|
|
|
|
| 170 |
blob, ctype = _download_task_file(api_url=DEFAULT_API_URL, task_id=task_id)
|
| 171 |
-
|
| 172 |
if any([blob, ctype]):
|
| 173 |
-
|
| 174 |
-
|
| 175 |
if "python" in ctype:
|
| 176 |
print("[DEBUG] Working with a Python attachment file")
|
| 177 |
state["answer"] = run_python_file.invoke({"code": blob.decode("utf-8")})
|
| 178 |
state["label"] = "python_script"
|
| 179 |
return state
|
| 180 |
-
|
| 181 |
-
# ── Audio --------------------------------------------------------
|
| 182 |
if "audio" in ctype:
|
| 183 |
print("[DEBUG] Working with an audio attachment file")
|
| 184 |
state["context"] = transcribe_audio.invoke({"audio_bytes": blob})
|
| 185 |
state["label"] = "audio"
|
| 186 |
return state
|
| 187 |
-
|
| 188 |
-
# ── Image --------------------------------------------------------
|
| 189 |
if "image" in ctype:
|
| 190 |
print("[DEBUG] Working with an image attachment file")
|
| 191 |
-
state["answer"] = describe_image.invoke(
|
| 192 |
-
{"img_bytes": blob, "question": question}
|
| 193 |
-
)
|
| 194 |
state["label"] = "image"
|
| 195 |
return state
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
# if header_says_sheet or blob_says_sheet:
|
| 205 |
-
# if blob_says_sheet:
|
| 206 |
-
# print(f"[DEBUG] octet-stream sniffed as {sniff_excel_type(blob)}")
|
| 207 |
-
|
| 208 |
-
print("[DEBUG] Working with a Excel/CSV attachment file")
|
| 209 |
-
state["answer"] = read_task_file.invoke(
|
| 210 |
-
{"xls_bytes": blob}
|
| 211 |
-
)
|
| 212 |
-
state["label"] = "other_ext"
|
| 213 |
-
return state
|
| 214 |
-
|
| 215 |
-
elif label == "youtube":
|
| 216 |
print("[TOOL] youtube_transcript")
|
| 217 |
if matched_obj:
|
| 218 |
-
url = re.sub(r'[.,;:!?")]+$', '', matched_obj.group(0))
|
| 219 |
print(f"[TOOL] fetching transcript for: {url}")
|
| 220 |
state["context"] = get_youtube_transcript.invoke({"video_url": url})
|
| 221 |
else:
|
|
@@ -223,8 +206,10 @@ def call_tools(state: AgentState) -> AgentState:
|
|
| 223 |
state["context"] = web_search.invoke({"query": question})
|
| 224 |
elif label == "research":
|
| 225 |
print("[TOOL] web search")
|
| 226 |
-
|
| 227 |
-
|
|
|
|
|
|
|
| 228 |
focused_query = _llm_router.invoke(search_query_prompt).content.strip().strip('"')
|
| 229 |
print(f"[TOOL] search query: {focused_query}")
|
| 230 |
search_json = web_search.invoke({"query": focused_query})
|
|
@@ -321,14 +306,15 @@ class LGAgent:
|
|
| 321 |
_llm_answer = _llm_router
|
| 322 |
self.graph = build_graph()
|
| 323 |
|
| 324 |
-
def __call__(self, question: str, task_id: str | None = None) -> str:
|
| 325 |
-
try:
|
| 326 |
state: AgentState = {
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
|
|
|
| 332 |
}
|
| 333 |
final = self.graph.invoke(state)
|
| 334 |
|
|
@@ -398,7 +384,7 @@ def _answer_question(item: dict) -> str:
|
|
| 398 |
continue
|
| 399 |
for attempt in range(2):
|
| 400 |
try:
|
| 401 |
-
result = LGAgent(model_id=model_id)(augmented_question, task_id=task_id)
|
| 402 |
# Pause between questions to respect Groq's tokens/min limit
|
| 403 |
time.sleep(5)
|
| 404 |
return result
|
|
|
|
| 75 |
label: str
|
| 76 |
context: str
|
| 77 |
answer: str
|
| 78 |
+
task_id: str | None
|
| 79 |
+
file_name: str | None
|
| 80 |
|
| 81 |
|
| 82 |
|
|
|
|
| 162 |
|
| 163 |
def call_tools(state: AgentState) -> AgentState:
|
| 164 |
question, label, task_id = state["question"], state["label"], state["task_id"]
|
| 165 |
+
file_name = state.get("file_name") or ""
|
| 166 |
|
| 167 |
+
matched_obj = re.search(r"https?://\S+", question)
|
|
|
|
| 168 |
|
| 169 |
+
# ---- attachment (only when a file is actually attached to this task) -----
|
| 170 |
+
file_fetched = False
|
| 171 |
+
if task_id and file_name:
|
| 172 |
blob, ctype = _download_task_file(api_url=DEFAULT_API_URL, task_id=task_id)
|
|
|
|
| 173 |
if any([blob, ctype]):
|
| 174 |
+
file_fetched = True
|
| 175 |
+
print(f"[DEBUG] attachment type={ctype}")
|
| 176 |
if "python" in ctype:
|
| 177 |
print("[DEBUG] Working with a Python attachment file")
|
| 178 |
state["answer"] = run_python_file.invoke({"code": blob.decode("utf-8")})
|
| 179 |
state["label"] = "python_script"
|
| 180 |
return state
|
|
|
|
|
|
|
| 181 |
if "audio" in ctype:
|
| 182 |
print("[DEBUG] Working with an audio attachment file")
|
| 183 |
state["context"] = transcribe_audio.invoke({"audio_bytes": blob})
|
| 184 |
state["label"] = "audio"
|
| 185 |
return state
|
|
|
|
|
|
|
| 186 |
if "image" in ctype:
|
| 187 |
print("[DEBUG] Working with an image attachment file")
|
| 188 |
+
state["answer"] = describe_image.invoke({"img_bytes": blob, "question": question})
|
|
|
|
|
|
|
| 189 |
state["label"] = "image"
|
| 190 |
return state
|
| 191 |
+
# Excel / CSV / other binary
|
| 192 |
+
print("[DEBUG] Working with an Excel/CSV attachment file")
|
| 193 |
+
state["answer"] = read_task_file.invoke({"xls_bytes": blob})
|
| 194 |
+
state["label"] = "other_ext"
|
| 195 |
+
return state
|
| 196 |
+
|
| 197 |
+
# ---- label-based routing (always runs when no file was fetched) ----------
|
| 198 |
+
if label == "youtube":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
print("[TOOL] youtube_transcript")
|
| 200 |
if matched_obj:
|
| 201 |
+
url = re.sub(r'[.,;:!?")\]]+$', '', matched_obj.group(0))
|
| 202 |
print(f"[TOOL] fetching transcript for: {url}")
|
| 203 |
state["context"] = get_youtube_transcript.invoke({"video_url": url})
|
| 204 |
else:
|
|
|
|
| 206 |
state["context"] = web_search.invoke({"query": question})
|
| 207 |
elif label == "research":
|
| 208 |
print("[TOOL] web search")
|
| 209 |
+
search_query_prompt = (
|
| 210 |
+
"Write a short Google search query (max 10 words) to answer this question. "
|
| 211 |
+
"Output ONLY the query, nothing else.\n\nQuestion: " + question
|
| 212 |
+
)
|
| 213 |
focused_query = _llm_router.invoke(search_query_prompt).content.strip().strip('"')
|
| 214 |
print(f"[TOOL] search query: {focused_query}")
|
| 215 |
search_json = web_search.invoke({"query": focused_query})
|
|
|
|
| 306 |
_llm_answer = _llm_router
|
| 307 |
self.graph = build_graph()
|
| 308 |
|
| 309 |
+
def __call__(self, question: str, task_id: str | None = None, file_name: str | None = None) -> str:
|
| 310 |
+
try:
|
| 311 |
state: AgentState = {
|
| 312 |
+
"question": question,
|
| 313 |
+
"label": "general",
|
| 314 |
+
"context": "",
|
| 315 |
+
"answer": "",
|
| 316 |
+
"task_id": task_id,
|
| 317 |
+
"file_name": file_name,
|
| 318 |
}
|
| 319 |
final = self.graph.invoke(state)
|
| 320 |
|
|
|
|
| 384 |
continue
|
| 385 |
for attempt in range(2):
|
| 386 |
try:
|
| 387 |
+
result = LGAgent(model_id=model_id)(augmented_question, task_id=task_id, file_name=file_name)
|
| 388 |
# Pause between questions to respect Groq's tokens/min limit
|
| 389 |
time.sleep(5)
|
| 390 |
return result
|