Ghisalbertifederico commited on
Commit
747c5d8
·
verified ·
1 Parent(s): e193ac9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -46
app.py CHANGED
@@ -75,7 +75,8 @@ class AgentState(TypedDict):
75
  label: str
76
  context: str
77
  answer: str
78
- task_id: str | None = None
 
79
 
80
 
81
 
@@ -161,61 +162,43 @@ def route_question(state: AgentState) -> AgentState:
161
 
162
  def call_tools(state: AgentState) -> AgentState:
163
  question, label, task_id = state["question"], state["label"], state["task_id"]
 
164
 
165
- matched_pattern = r"https?://\S+"
166
- matched_obj = re.search(matched_pattern, question)
167
 
168
- # ---- attachment detection ------------------------------------------------
169
- if task_id:
 
170
  blob, ctype = _download_task_file(api_url=DEFAULT_API_URL, task_id=task_id)
171
-
172
  if any([blob, ctype]):
173
- print(f"[DEBUG] attachment type={ctype} ")
174
- # ── Python code ------------------------------------------------------
175
  if "python" in ctype:
176
  print("[DEBUG] Working with a Python attachment file")
177
  state["answer"] = run_python_file.invoke({"code": blob.decode("utf-8")})
178
  state["label"] = "python_script"
179
  return state
180
-
181
- # ── Audio --------------------------------------------------------
182
  if "audio" in ctype:
183
  print("[DEBUG] Working with an audio attachment file")
184
  state["context"] = transcribe_audio.invoke({"audio_bytes": blob})
185
  state["label"] = "audio"
186
  return state
187
-
188
- # ── Image --------------------------------------------------------
189
  if "image" in ctype:
190
  print("[DEBUG] Working with an image attachment file")
191
- state["answer"] = describe_image.invoke(
192
- {"img_bytes": blob, "question": question}
193
- )
194
  state["label"] = "image"
195
  return state
196
-
197
- # ── Excel / CSV ------------------------------------------------------
198
- # header_says_sheet = any(key in ctype for key in ("excel", "sheet", "csv"))
199
- # blob_says_sheet = sniff_excel_type(blob) in {"xlsx", "xls", "csv"}
200
-
201
- parsed = ("image", "audio", "python")
202
- if not any(word in ctype for word in parsed):
203
-
204
- # if header_says_sheet or blob_says_sheet:
205
- # if blob_says_sheet:
206
- # print(f"[DEBUG] octet-stream sniffed as {sniff_excel_type(blob)}")
207
-
208
- print("[DEBUG] Working with a Excel/CSV attachment file")
209
- state["answer"] = read_task_file.invoke(
210
- {"xls_bytes": blob}
211
- )
212
- state["label"] = "other_ext"
213
- return state
214
-
215
- elif label == "youtube":
216
  print("[TOOL] youtube_transcript")
217
  if matched_obj:
218
- url = re.sub(r'[.,;:!?")]+$', '', matched_obj.group(0))
219
  print(f"[TOOL] fetching transcript for: {url}")
220
  state["context"] = get_youtube_transcript.invoke({"video_url": url})
221
  else:
@@ -223,8 +206,10 @@ def call_tools(state: AgentState) -> AgentState:
223
  state["context"] = web_search.invoke({"query": question})
224
  elif label == "research":
225
  print("[TOOL] web search")
226
- # Generate a focused search query (not the full question text)
227
- search_query_prompt = f"Write a short Google search query (max 10 words) to answer this question. Output ONLY the query, nothing else.\n\nQuestion: {question}"
 
 
228
  focused_query = _llm_router.invoke(search_query_prompt).content.strip().strip('"')
229
  print(f"[TOOL] search query: {focused_query}")
230
  search_json = web_search.invoke({"query": focused_query})
@@ -321,14 +306,15 @@ class LGAgent:
321
  _llm_answer = _llm_router
322
  self.graph = build_graph()
323
 
324
- def __call__(self, question: str, task_id: str | None = None) -> str:
325
- try:
326
  state: AgentState = {
327
- "question": question,
328
- "label": "general",
329
- "context": "",
330
- "answer": "",
331
- "task_id": task_id,
 
332
  }
333
  final = self.graph.invoke(state)
334
 
@@ -398,7 +384,7 @@ def _answer_question(item: dict) -> str:
398
  continue
399
  for attempt in range(2):
400
  try:
401
- result = LGAgent(model_id=model_id)(augmented_question, task_id=task_id)
402
  # Pause between questions to respect Groq's tokens/min limit
403
  time.sleep(5)
404
  return result
 
75
  label: str
76
  context: str
77
  answer: str
78
+ task_id: str | None
79
+ file_name: str | None
80
 
81
 
82
 
 
162
 
163
  def call_tools(state: AgentState) -> AgentState:
164
  question, label, task_id = state["question"], state["label"], state["task_id"]
165
+ file_name = state.get("file_name") or ""
166
 
167
+ matched_obj = re.search(r"https?://\S+", question)
 
168
 
169
+ # ---- attachment (only when a file is actually attached to this task) -----
170
+ file_fetched = False
171
+ if task_id and file_name:
172
  blob, ctype = _download_task_file(api_url=DEFAULT_API_URL, task_id=task_id)
 
173
  if any([blob, ctype]):
174
+ file_fetched = True
175
+ print(f"[DEBUG] attachment type={ctype}")
176
  if "python" in ctype:
177
  print("[DEBUG] Working with a Python attachment file")
178
  state["answer"] = run_python_file.invoke({"code": blob.decode("utf-8")})
179
  state["label"] = "python_script"
180
  return state
 
 
181
  if "audio" in ctype:
182
  print("[DEBUG] Working with an audio attachment file")
183
  state["context"] = transcribe_audio.invoke({"audio_bytes": blob})
184
  state["label"] = "audio"
185
  return state
 
 
186
  if "image" in ctype:
187
  print("[DEBUG] Working with an image attachment file")
188
+ state["answer"] = describe_image.invoke({"img_bytes": blob, "question": question})
 
 
189
  state["label"] = "image"
190
  return state
191
+ # Excel / CSV / other binary
192
+ print("[DEBUG] Working with an Excel/CSV attachment file")
193
+ state["answer"] = read_task_file.invoke({"xls_bytes": blob})
194
+ state["label"] = "other_ext"
195
+ return state
196
+
197
+ # ---- label-based routing (always runs when no file was fetched) ----------
198
+ if label == "youtube":
 
 
 
 
 
 
 
 
 
 
 
 
199
  print("[TOOL] youtube_transcript")
200
  if matched_obj:
201
+ url = re.sub(r'[.,;:!?")\]]+$', '', matched_obj.group(0))
202
  print(f"[TOOL] fetching transcript for: {url}")
203
  state["context"] = get_youtube_transcript.invoke({"video_url": url})
204
  else:
 
206
  state["context"] = web_search.invoke({"query": question})
207
  elif label == "research":
208
  print("[TOOL] web search")
209
+ search_query_prompt = (
210
+ "Write a short Google search query (max 10 words) to answer this question. "
211
+ "Output ONLY the query, nothing else.\n\nQuestion: " + question
212
+ )
213
  focused_query = _llm_router.invoke(search_query_prompt).content.strip().strip('"')
214
  print(f"[TOOL] search query: {focused_query}")
215
  search_json = web_search.invoke({"query": focused_query})
 
306
  _llm_answer = _llm_router
307
  self.graph = build_graph()
308
 
309
+ def __call__(self, question: str, task_id: str | None = None, file_name: str | None = None) -> str:
310
+ try:
311
  state: AgentState = {
312
+ "question": question,
313
+ "label": "general",
314
+ "context": "",
315
+ "answer": "",
316
+ "task_id": task_id,
317
+ "file_name": file_name,
318
  }
319
  final = self.graph.invoke(state)
320
 
 
384
  continue
385
  for attempt in range(2):
386
  try:
387
+ result = LGAgent(model_id=model_id)(augmented_question, task_id=task_id, file_name=file_name)
388
  # Pause between questions to respect Groq's tokens/min limit
389
  time.sleep(5)
390
  return result