Spaces:
Sleeping
Sleeping
Commit ·
2caa35b
1
Parent(s): a46409c
qwen
Browse files- agents/agent.py +13 -75
- app.py +16 -26
agents/agent.py
CHANGED
|
@@ -16,29 +16,6 @@ from dotenv import load_dotenv
|
|
| 16 |
load_dotenv()
|
| 17 |
|
| 18 |
|
| 19 |
-
# Question type analyzer function
|
| 20 |
-
def analyze_question_type(question: str):
|
| 21 |
-
"""Analyze question type and determine appropriate tool strategy"""
|
| 22 |
-
question_lower = question.lower()
|
| 23 |
-
|
| 24 |
-
# Questions requiring web search
|
| 25 |
-
if any(word in question_lower for word in ["wikipedia", "arxiv", "article", "paper", "website"]):
|
| 26 |
-
return "web_search_required"
|
| 27 |
-
|
| 28 |
-
# Math questions
|
| 29 |
-
if any(word in question_lower for word in ["calculate", "how many", "percentage", "multiply", "divide"]):
|
| 30 |
-
return "math_required"
|
| 31 |
-
|
| 32 |
-
# File processing questions
|
| 33 |
-
if any(word in question_lower for word in ["file", "spreadsheet", "csv", "excel", "attached"]):
|
| 34 |
-
return "file_processing_required"
|
| 35 |
-
|
| 36 |
-
# Image analysis questions
|
| 37 |
-
if any(word in question_lower for word in ["image", "picture", "photo", "visual"]):
|
| 38 |
-
return "image_analysis_required"
|
| 39 |
-
|
| 40 |
-
return "general"
|
| 41 |
-
|
| 42 |
|
| 43 |
# load the system prompt from the file
|
| 44 |
with open("system_prompt.txt", "r", encoding="utf-8") as f:
|
|
@@ -84,71 +61,31 @@ def build_graph():
|
|
| 84 |
max_tokens=8192,
|
| 85 |
timeout=120
|
| 86 |
)
|
| 87 |
-
|
| 88 |
# Bind tools to LLM
|
| 89 |
llm_with_tools = llm.bind_tools(tools)
|
| 90 |
|
| 91 |
-
|
|
|
|
|
|
|
| 92 |
def assistant(state: MessagesState):
|
| 93 |
"""Assistant node with GAIA format compliance"""
|
| 94 |
response = llm_with_tools.invoke(state["messages"])
|
| 95 |
return {"messages": [response]}
|
| 96 |
-
|
| 97 |
-
# Extract pure answer for GAIA format
|
| 98 |
-
content = response.content
|
| 99 |
-
|
| 100 |
-
# Look for final answer in various formats
|
| 101 |
-
if "Final Answer:" in content or "Final answer:" in content:
|
| 102 |
-
parts = content.split("Final Answer:" if "Final Answer:" in content else "Final answer:")
|
| 103 |
-
final_answer = parts[-1].strip()
|
| 104 |
-
elif "The answer is:" in content:
|
| 105 |
-
final_answer = content.split("The answer is:")[-1].strip()
|
| 106 |
-
elif "Therefore:" in content:
|
| 107 |
-
final_answer = content.split("Therefore:")[-1].strip()
|
| 108 |
-
else:
|
| 109 |
-
# If no explicit format, try to extract the last line/sentence
|
| 110 |
-
lines = content.strip().split('\n')
|
| 111 |
-
final_answer = lines[-1].strip()
|
| 112 |
-
|
| 113 |
-
# Clean the answer (remove explanations)
|
| 114 |
-
if '.' in final_answer:
|
| 115 |
-
# Take only first sentence if multiple sentences
|
| 116 |
-
final_answer = final_answer.split('.')[0].strip() + '.'
|
| 117 |
-
|
| 118 |
-
# Return clean answer
|
| 119 |
-
response.content = final_answer
|
| 120 |
-
return {"messages": [response]}
|
| 121 |
|
| 122 |
|
| 123 |
|
| 124 |
def retriever(state: MessagesState):
|
| 125 |
-
"""Retriever node
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
similar_question = vector_store.similarity_search(question)
|
| 130 |
-
|
| 131 |
-
# Add specific instructions based on question type
|
| 132 |
-
type_instructions = {
|
| 133 |
-
"web_search_required": "Use web_search tool to find specific information. Be precise with search queries.",
|
| 134 |
-
"math_required": "Use math tools (add, subtract, multiply, divide) for ALL calculations. Show your work.",
|
| 135 |
-
"file_processing_required": "Use file reading tools to process attached files. Extract exact data.",
|
| 136 |
-
"image_analysis_required": "Use image analysis tools to examine visual content carefully.",
|
| 137 |
-
"general": "Think step by step and use appropriate tools."
|
| 138 |
-
}
|
| 139 |
-
|
| 140 |
-
instruction_msg = HumanMessage(
|
| 141 |
-
content=f"Question type: {question_type}. {type_instructions[question_type]}"
|
| 142 |
-
)
|
| 143 |
-
|
| 144 |
-
if similar_question:
|
| 145 |
example_msg = HumanMessage(
|
| 146 |
-
content=f"
|
| 147 |
)
|
| 148 |
-
return {"messages": [sys_msg] + state["messages"] + [
|
| 149 |
else:
|
| 150 |
-
|
| 151 |
-
|
| 152 |
|
| 153 |
|
| 154 |
|
|
@@ -164,8 +101,9 @@ def build_graph():
|
|
| 164 |
)
|
| 165 |
builder.add_edge("tools", "assistant")
|
| 166 |
|
|
|
|
| 167 |
# Compile graph
|
| 168 |
return builder.compile()
|
| 169 |
|
| 170 |
# Add recursion limit
|
| 171 |
-
return builder.compile(checkpointer=None, recursion_limit=
|
|
|
|
| 16 |
load_dotenv()
|
| 17 |
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
# load the system prompt from the file
|
| 21 |
with open("system_prompt.txt", "r", encoding="utf-8") as f:
|
|
|
|
| 61 |
max_tokens=8192,
|
| 62 |
timeout=120
|
| 63 |
)
|
|
|
|
| 64 |
# Bind tools to LLM
|
| 65 |
llm_with_tools = llm.bind_tools(tools)
|
| 66 |
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# Nodes
|
| 70 |
def assistant(state: MessagesState):
|
| 71 |
"""Assistant node with GAIA format compliance"""
|
| 72 |
response = llm_with_tools.invoke(state["messages"])
|
| 73 |
return {"messages": [response]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
|
| 76 |
|
| 77 |
def retriever(state: MessagesState):
|
| 78 |
+
"""Retriever node"""
|
| 79 |
+
similar_question = vector_store.similarity_search(state["messages"][0].content)
|
| 80 |
+
|
| 81 |
+
if similar_question: # Check if the list is not empty
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
example_msg = HumanMessage(
|
| 83 |
+
content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
|
| 84 |
)
|
| 85 |
+
return {"messages": [sys_msg] + state["messages"] + [example_msg]}
|
| 86 |
else:
|
| 87 |
+
# Handle the case when no similar questions are found
|
| 88 |
+
return {"messages": [sys_msg] + state["messages"]}
|
| 89 |
|
| 90 |
|
| 91 |
|
|
|
|
| 101 |
)
|
| 102 |
builder.add_edge("tools", "assistant")
|
| 103 |
|
| 104 |
+
|
| 105 |
# Compile graph
|
| 106 |
return builder.compile()
|
| 107 |
|
| 108 |
# Add recursion limit
|
| 109 |
+
return builder.compile() #checkpointer=None, recursion_limit=20
|
app.py
CHANGED
|
@@ -67,42 +67,32 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 67 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
| 68 |
print(agent_code)
|
| 69 |
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
print(f"Fetching questions from: {questions_url}")
|
| 72 |
-
|
| 73 |
-
# Retry
|
| 74 |
-
session = requests.Session()
|
| 75 |
-
retry_strategy = Retry(
|
| 76 |
-
total=5,
|
| 77 |
-
status_forcelist=[429, 500, 502, 503, 504],
|
| 78 |
-
backoff_factor=1,
|
| 79 |
-
respect_retry_after_header=True
|
| 80 |
-
)
|
| 81 |
-
adapter = HTTPAdapter(max_retries=retry_strategy)
|
| 82 |
-
session.mount("http://", adapter)
|
| 83 |
-
session.mount("https://", adapter)
|
| 84 |
-
|
| 85 |
try:
|
| 86 |
-
|
| 87 |
-
time.sleep(2)
|
| 88 |
-
response = session.get(questions_url, timeout=30)
|
| 89 |
response.raise_for_status()
|
| 90 |
questions_data = response.json()
|
| 91 |
if not questions_data:
|
| 92 |
-
|
| 93 |
-
|
| 94 |
print(f"Fetched {len(questions_data)} questions.")
|
| 95 |
except requests.exceptions.RequestException as e:
|
| 96 |
print(f"Error fetching questions: {e}")
|
| 97 |
return f"Error fetching questions: {e}", None
|
| 98 |
except requests.exceptions.JSONDecodeError as e:
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
except Exception as e:
|
| 103 |
print(f"An unexpected error occurred fetching questions: {e}")
|
| 104 |
return f"An unexpected error occurred fetching questions: {e}", None
|
| 105 |
|
|
|
|
|
|
|
|
|
|
| 106 |
# 3. Run your Agent
|
| 107 |
results_log = []
|
| 108 |
answers_payload = []
|
|
@@ -126,18 +116,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 126 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 127 |
|
| 128 |
|
|
|
|
| 129 |
# 4. Prepare Submission
|
| 130 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 131 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 132 |
print(status_update)
|
| 133 |
|
| 134 |
|
| 135 |
-
# 5. Submit
|
| 136 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 137 |
try:
|
| 138 |
-
|
| 139 |
-
time.sleep(2)
|
| 140 |
-
response = session.post(submit_url, json=submission_data, timeout=120)
|
| 141 |
response.raise_for_status()
|
| 142 |
result_data = response.json()
|
| 143 |
final_status = (
|
|
@@ -179,6 +168,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 179 |
|
| 180 |
|
| 181 |
|
|
|
|
| 182 |
# --- Build Gradio Interface using Blocks ---
|
| 183 |
with gr.Blocks() as demo:
|
| 184 |
gr.Markdown("# Basic Agent Evaluation Runner")
|
|
|
|
| 67 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
| 68 |
print(agent_code)
|
| 69 |
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# 2. Fetch Questions
|
| 73 |
print(f"Fetching questions from: {questions_url}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
try:
|
| 75 |
+
response = requests.get(questions_url, timeout=15)
|
|
|
|
|
|
|
| 76 |
response.raise_for_status()
|
| 77 |
questions_data = response.json()
|
| 78 |
if not questions_data:
|
| 79 |
+
print("Fetched questions list is empty.")
|
| 80 |
+
return "Fetched questions list is empty or invalid format.", None
|
| 81 |
print(f"Fetched {len(questions_data)} questions.")
|
| 82 |
except requests.exceptions.RequestException as e:
|
| 83 |
print(f"Error fetching questions: {e}")
|
| 84 |
return f"Error fetching questions: {e}", None
|
| 85 |
except requests.exceptions.JSONDecodeError as e:
|
| 86 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
| 87 |
+
print(f"Response text: {response.text[:500]}")
|
| 88 |
+
return f"Error decoding server response for questions: {e}", None
|
| 89 |
except Exception as e:
|
| 90 |
print(f"An unexpected error occurred fetching questions: {e}")
|
| 91 |
return f"An unexpected error occurred fetching questions: {e}", None
|
| 92 |
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
|
| 96 |
# 3. Run your Agent
|
| 97 |
results_log = []
|
| 98 |
answers_payload = []
|
|
|
|
| 116 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 117 |
|
| 118 |
|
| 119 |
+
# --- Submission Process ---
|
| 120 |
# 4. Prepare Submission
|
| 121 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 122 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 123 |
print(status_update)
|
| 124 |
|
| 125 |
|
| 126 |
+
# 5. Submit
|
| 127 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 128 |
try:
|
| 129 |
+
response = requests.post(submit_url, json=submission_data, timeout=60)
|
|
|
|
|
|
|
| 130 |
response.raise_for_status()
|
| 131 |
result_data = response.json()
|
| 132 |
final_status = (
|
|
|
|
| 168 |
|
| 169 |
|
| 170 |
|
| 171 |
+
|
| 172 |
# --- Build Gradio Interface using Blocks ---
|
| 173 |
with gr.Blocks() as demo:
|
| 174 |
gr.Markdown("# Basic Agent Evaluation Runner")
|