i-dhilip commited on
Commit
80b9c59
·
verified ·
1 Parent(s): 21be9fb

Upload 4 files

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -35
  2. .gitignore +116 -116
  3. app.py +302 -302
  4. main.py +185 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -1,116 +1,116 @@
1
- # Byte-compiled / optimized / DLL files
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
5
- *.so
6
-
7
- # Distribution / packaging
8
- .Python
9
- build/
10
- develop-eggs/
11
- dist/
12
- downloads/
13
- eggs/
14
- .eggs/
15
- lib/
16
- lib64/
17
- parts/
18
- sdist/
19
- var/
20
- wheels/
21
- *.egg-info/
22
- .installed.cfg
23
- *.egg
24
-
25
- # Virtual environments
26
- venv/
27
- ENV/
28
- env/
29
- .env
30
- .venv
31
- env.bak/
32
- venv.bak/
33
- .python-version
34
-
35
- # Unit test / coverage reports
36
- htmlcov/
37
- .tox/
38
- .nox/
39
- .coverage
40
- .coverage.*
41
- .cache
42
- nosetests.xml
43
- coverage.xml
44
- *.cover
45
- .hypothesis/
46
- .pytest_cache/
47
- pytest-*.xml
48
-
49
- # Jupyter Notebook
50
- .ipynb_checkpoints
51
-
52
- # IPython
53
- profile_default/
54
- ipython_config.py
55
-
56
- # Logs
57
- *.log
58
- logs/
59
- log/
60
-
61
- # IDE specific files
62
- .idea/
63
- .vscode/
64
- *.swp
65
- *.swo
66
- *~
67
- .DS_Store
68
- .project
69
- .pydevproject
70
- .settings/
71
- .vs/
72
- *.sublime-project
73
- *.sublime-workspace
74
-
75
- # Database
76
- *.db
77
- *.rdb
78
- *.sqlite
79
- *.sqlite3
80
-
81
- # Environment variables
82
- .env
83
- .env.local
84
- .env.development.local
85
- .env.test.local
86
- .env.production.local
87
-
88
- # macOS specific
89
- .DS_Store
90
- .AppleDouble
91
- .LSOverride
92
- Icon
93
- ._*
94
- .DocumentRevisions-V100
95
- .fseventsd
96
- .Spotlight-V100
97
- .TemporaryItems
98
- .Trashes
99
- .VolumeIcon.icns
100
- .com.apple.timemachine.donotpresent
101
-
102
- # AI/model files
103
- *.h5
104
- *.pb
105
- *.onnx
106
- *.tflite
107
- *.pt
108
- *.pth
109
- *.weights
110
-
111
- # Temporary files
112
- tmp/
113
- temp/
114
- .tmp
115
- *.tmp
116
-
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+
7
+ # Distribution / packaging
8
+ .Python
9
+ build/
10
+ develop-eggs/
11
+ dist/
12
+ downloads/
13
+ eggs/
14
+ .eggs/
15
+ lib/
16
+ lib64/
17
+ parts/
18
+ sdist/
19
+ var/
20
+ wheels/
21
+ *.egg-info/
22
+ .installed.cfg
23
+ *.egg
24
+
25
+ # Virtual environments
26
+ venv/
27
+ ENV/
28
+ env/
29
+ .env
30
+ .venv
31
+ env.bak/
32
+ venv.bak/
33
+ .python-version
34
+
35
+ # Unit test / coverage reports
36
+ htmlcov/
37
+ .tox/
38
+ .nox/
39
+ .coverage
40
+ .coverage.*
41
+ .cache
42
+ nosetests.xml
43
+ coverage.xml
44
+ *.cover
45
+ .hypothesis/
46
+ .pytest_cache/
47
+ pytest-*.xml
48
+
49
+ # Jupyter Notebook
50
+ .ipynb_checkpoints
51
+
52
+ # IPython
53
+ profile_default/
54
+ ipython_config.py
55
+
56
+ # Logs
57
+ *.log
58
+ logs/
59
+ log/
60
+
61
+ # IDE specific files
62
+ .idea/
63
+ .vscode/
64
+ *.swp
65
+ *.swo
66
+ *~
67
+ .DS_Store
68
+ .project
69
+ .pydevproject
70
+ .settings/
71
+ .vs/
72
+ *.sublime-project
73
+ *.sublime-workspace
74
+
75
+ # Database
76
+ *.db
77
+ *.rdb
78
+ *.sqlite
79
+ *.sqlite3
80
+
81
+ # Environment variables
82
+ .env
83
+ .env.local
84
+ .env.development.local
85
+ .env.test.local
86
+ .env.production.local
87
+
88
+ # macOS specific
89
+ .DS_Store
90
+ .AppleDouble
91
+ .LSOverride
92
+ Icon
93
+ ._*
94
+ .DocumentRevisions-V100
95
+ .fseventsd
96
+ .Spotlight-V100
97
+ .TemporaryItems
98
+ .Trashes
99
+ .VolumeIcon.icns
100
+ .com.apple.timemachine.donotpresent
101
+
102
+ # AI/model files
103
+ *.h5
104
+ *.pb
105
+ *.onnx
106
+ *.tflite
107
+ *.pt
108
+ *.pth
109
+ *.weights
110
+
111
+ # Temporary files
112
+ tmp/
113
+ temp/
114
+ .tmp
115
+ *.tmp
116
+
app.py CHANGED
@@ -1,302 +1,302 @@
1
- import os
2
- import gradio as gr
3
- import requests
4
- import pandas as pd
5
- from datetime import datetime
6
- from transformers import pipeline
7
- from langchain_community.llms import HuggingFaceTextGenInference
8
- from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
9
- from langchain.chains import LLMChain
10
- from langchain.agents import Tool
11
- from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
12
- from langchain_community.utilities import TextRequestsWrapper
13
- from langchain_community.embeddings import HuggingFaceEmbeddings
14
- from langchain_community.vectorstores import Chroma
15
-
16
- # --- Constants ---
17
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
- MAX_ANSWER_LENGTH = 50
19
-
20
- # --- LLM Setup ---
21
- # Using Hugging Face Text Generation Inference API instead of loading model locally
22
- # This connects to a more powerful open source model through HF's inference API
23
- llm = HuggingFaceTextGenInference(
24
- inference_server_url="https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
25
- max_new_tokens=256,
26
- temperature=0.1,
27
- repetition_penalty=1.03,
28
- top_k=10,
29
- top_p=0.95,
30
- timeout=120,
31
- streaming=False,
32
- huggingface_api_key=os.getenv("HF_API_TOKEN", None), # Set your HF API token in environment variables
33
- )
34
-
35
- # --- System Message ---
36
- system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
37
- Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
38
- FINAL ANSWER: [YOUR FINAL ANSWER].
39
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations, and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
40
- system_message_prompt = SystemMessagePromptTemplate.from_template(system_prompt)
41
-
42
- # --- Tools ---
43
- ddg = DuckDuckGoSearchAPIWrapper()
44
- requests_wrapper = TextRequestsWrapper()
45
-
46
- def wiki_search(query):
47
- """Search Wikipedia for a query and return maximum 2 results."""
48
- search_results = ddg.run(query)
49
- return f"Wikipedia search results for '{query}': {search_results}"
50
-
51
- def web_search(query):
52
- """Search DuckDuckGo for a query and return maximum 3 results."""
53
- search_results = ddg.run(query)
54
- return f"Web search results for '{query}': {search_results}"
55
-
56
- def arxiv_search(query):
57
- """Search Arxiv for a query and return maximum 3 results."""
58
- try:
59
- url = f"https://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=3"
60
- response = requests_wrapper.get(url)
61
- return f"Arxiv search results for '{query}': {response.text[:500]}..." # Truncate for readability
62
- except Exception as e:
63
- return f"Error searching Arxiv: {str(e)}"
64
-
65
- # --- Fallback for Chroma DB if not initialized ---
66
- try:
67
- # --- Chroma DB Setup ---
68
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
69
- vector_store = Chroma(
70
- embedding_function=embeddings,
71
- persist_directory="./chroma_db"
72
- )
73
-
74
- def create_retriever_tool(query):
75
- """A tool to retrieve similar questions from a vector store."""
76
- try:
77
- similar_question = vector_store.similarity_search(query)
78
- if similar_question and len(similar_question) > 0:
79
- return f"Similar question found: {similar_question[0].page_content}"
80
- return "No similar questions found in the database."
81
- except Exception as e:
82
- return f"Error using retriever: {str(e)}"
83
- except Exception as e:
84
- print(f"Warning: Could not initialize Chroma DB: {e}")
85
- def create_retriever_tool(query):
86
- return "Retriever tool is not available."
87
-
88
- # Define the tools
89
- tools = [
90
- Tool(
91
- name="Wikipedia Search",
92
- func=wiki_search,
93
- description="Search Wikipedia for a query and return maximum 2 results."
94
- ),
95
- Tool(
96
- name="Web Search",
97
- func=web_search,
98
- description="Search DuckDuckGo for a query and return maximum 3 results."
99
- ),
100
- Tool(
101
- name="Arxiv Search",
102
- func=arxiv_search,
103
- description="Search Arxiv for a query and return maximum 3 results."
104
- ),
105
- Tool(
106
- name="Retriever",
107
- func=create_retriever_tool,
108
- description="A tool to retrieve similar questions from a vector store."
109
- )
110
- ]
111
-
112
- def create_agent(llm, tools):
113
- """Create an agent with the specified tools."""
114
- prompt = ChatPromptTemplate.from_messages([
115
- system_message_prompt,
116
- HumanMessagePromptTemplate.from_template("{input}")
117
- ])
118
- llm_chain = LLMChain(llm=llm, prompt=prompt)
119
- return llm_chain
120
-
121
- def extract_final_answer(full_response):
122
- """Extract only the final answer from the agent's response."""
123
- if "FINAL ANSWER:" in full_response:
124
- return full_response.split("FINAL ANSWER:")[1].strip()
125
- return full_response.strip()
126
-
127
- def run_and_submit_all(profile: gr.OAuthProfile | None):
128
- """
129
- Fetches all questions, runs the EnhancedAgent on them, submits all answers,
130
- and displays the results.
131
- """
132
- # --- Determine HF Space Runtime URL and Repo URL ---
133
- space_id = os.getenv("SPACE_ID")
134
-
135
- if profile:
136
- username = f"{profile.username}"
137
- print(f"User logged in: {username}")
138
- else:
139
- print("User not logged in.")
140
- return "Please Login to Hugging Face with the button.", None
141
-
142
- api_url = DEFAULT_API_URL
143
- questions_url = f"{api_url}/questions"
144
- submit_url = f"{api_url}/submit"
145
-
146
- # 1. Instantiate Agent
147
- try:
148
- agent = create_agent(llm, tools)
149
- except Exception as e:
150
- print(f"Error instantiating agent: {e}")
151
- return f"Error initializing agent: {e}", None
152
-
153
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
154
- print(agent_code)
155
-
156
- # 2. Fetch Questions
157
- print(f"Fetching questions from: {questions_url}")
158
- try:
159
- response = requests.get(questions_url, timeout=15)
160
- response.raise_for_status()
161
- questions_data = response.json()
162
- if not questions_data:
163
- print("Fetched questions list is empty.")
164
- return "Fetched questions list is empty or invalid format.", None
165
- print(f"Fetched {len(questions_data)} questions.")
166
- except requests.exceptions.RequestException as e:
167
- print(f"Error fetching questions: {e}")
168
- return f"Error fetching questions: {e}", None
169
- except Exception as e:
170
- print(f"An unexpected error occurred fetching questions: {e}")
171
- return f"An unexpected error occurred fetching questions: {e}", None
172
-
173
- # 3. Run your Agent
174
- results_log = []
175
- answers_payload = []
176
- print(f"Running agent on {len(questions_data)} questions...")
177
-
178
- # Define a fallback answer function in case the main agent fails
179
- def get_simple_answer(question):
180
- """Provide a simple answer when the main agent fails"""
181
- # Very basic responses for common question types
182
- if "capital" in question.lower():
183
- return "Unknown"
184
- elif "population" in question.lower() or "how many" in question.lower():
185
- return "0"
186
- elif "when" in question.lower():
187
- return "Unknown"
188
- elif "where" in question.lower():
189
- return "Unknown"
190
- elif "who" in question.lower():
191
- return "Unknown"
192
- elif "true or false" in question.lower():
193
- return "True"
194
- else:
195
- return "Unknown"
196
-
197
- for item in questions_data:
198
- task_id = item.get("task_id")
199
- question_text = item.get("question")
200
- if not task_id or question_text is None:
201
- print(f"Skipping item with missing task_id or question: {item}")
202
- continue
203
-
204
- try:
205
- print(f"Processing question: {question_text}")
206
- # Get the response from the agent
207
- agent_response = agent.run(question_text)
208
- print(f"Agent response: {agent_response}")
209
-
210
- # Extract just the final answer part
211
- final_answer = extract_final_answer(agent_response)
212
-
213
- # Make sure the answer isn't too long - truncate if needed
214
- if len(final_answer) > MAX_ANSWER_LENGTH:
215
- final_answer = final_answer[:MAX_ANSWER_LENGTH]
216
- print(f"Warning: Answer truncated to {MAX_ANSWER_LENGTH} characters")
217
-
218
- # Add to payload for submission
219
- answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})
220
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer})
221
- print(f"Task {task_id}: Processed answer: {final_answer}")
222
-
223
- except Exception as e:
224
- print(f"Error running agent on task {task_id}: {e}")
225
-
226
- # Use fallback strategy
227
- fallback_answer = get_simple_answer(question_text)
228
- answers_payload.append({"task_id": task_id, "submitted_answer": fallback_answer})
229
- results_log.append({
230
- "Task ID": task_id,
231
- "Question": question_text,
232
- "Submitted Answer": f"{fallback_answer} (FALLBACK)"
233
- })
234
- print(f"Task {task_id}: Used fallback answer: {fallback_answer}")
235
-
236
- if not answers_payload:
237
- print("Agent did not produce any answers to submit.")
238
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
239
-
240
- # 4. Prepare Submission
241
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
242
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
243
- print(status_update)
244
-
245
- # 5. Submit
246
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
247
- try:
248
- response = requests.post(submit_url, json=submission_data, timeout=60)
249
- response.raise_for_status()
250
- result_data = response.json()
251
- final_status = (
252
- f"Submission Successful!\n"
253
- f"User: {result_data.get('username')}\n"
254
- f"Overall Score: {result_data.get('score', 'N/A')}% "
255
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
256
- f"Message: {result_data.get('message', 'No message received.')}"
257
- )
258
- print("Submission successful.")
259
- results_df = pd.DataFrame(results_log)
260
- return final_status, results_df
261
- except Exception as e:
262
- status_message = f"Submission Failed: {e}"
263
- print(status_message)
264
- results_df = pd.DataFrame(results_log)
265
- return status_message, results_df
266
-
267
- # --- Build Gradio Interface using Blocks ---
268
- with gr.Blocks() as demo:
269
- gr.Markdown("# GAIA Evaluation Agent using Multiple Search Tools")
270
- gr.Markdown(
271
- """
272
- **Instructions:**
273
- 1. Clone this space and modify the agent's logic and tools as needed.
274
- 2. Log in with your Hugging Face account.
275
- 3. Click 'Run Evaluation & Submit All Answers' to test your agent.
276
- """
277
- )
278
-
279
- gr.LoginButton()
280
-
281
- run_button = gr.Button("Run Evaluation & Submit All Answers")
282
-
283
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
284
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
285
-
286
- run_button.click(
287
- fn=run_and_submit_all,
288
- outputs=[status_output, results_table]
289
- )
290
-
291
- if __name__ == "__main__":
292
- print("\n" + "-"*30 + " App Starting " + "-"*30)
293
- space_id_startup = os.getenv("SPACE_ID")
294
-
295
- if space_id_startup:
296
- print(f"✅ SPACE_ID found: {space_id_startup}")
297
- else:
298
- print("ℹ️ SPACE_ID environment variable not found (running locally?).")
299
-
300
- print("-"*(60 + len(" App Starting ")) + "\n")
301
- print("Launching Gradio Interface...")
302
- demo.launch(debug=True, share=False)
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+ from datetime import datetime
6
+ from transformers import pipeline
7
+ from langchain_community.llms import HuggingFaceTextGenInference
8
+ from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
9
+ from langchain.chains import LLMChain
10
+ from langchain.agents import Tool
11
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
12
+ from langchain_community.utilities import TextRequestsWrapper
13
+ from langchain_community.embeddings import HuggingFaceEmbeddings
14
+ from langchain_community.vectorstores import Chroma
15
+
16
+ # --- Constants ---
17
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
+ MAX_ANSWER_LENGTH = 50
19
+
20
+ # --- LLM Setup ---
21
+ # Using Hugging Face Text Generation Inference API instead of loading model locally
22
+ # This connects to a more powerful open source model through HF's inference API
23
+ llm = HuggingFaceTextGenInference(
24
+ inference_server_url="https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
25
+ max_new_tokens=256,
26
+ temperature=0.1,
27
+ repetition_penalty=1.03,
28
+ top_k=10,
29
+ top_p=0.95,
30
+ timeout=120,
31
+ streaming=False,
32
+ huggingface_api_key=os.getenv("HF_API_TOKEN", None), # Set your HF API token in environment variables
33
+ )
34
+
35
+ # --- System Message ---
36
+ system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
37
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
38
+ FINAL ANSWER: [YOUR FINAL ANSWER].
39
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations, and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
40
+ system_message_prompt = SystemMessagePromptTemplate.from_template(system_prompt)
41
+
42
+ # --- Tools ---
43
+ ddg = DuckDuckGoSearchAPIWrapper()
44
+ requests_wrapper = TextRequestsWrapper()
45
+
46
+ def wiki_search(query):
47
+ """Search Wikipedia for a query and return maximum 2 results."""
48
+ search_results = ddg.run(query)
49
+ return f"Wikipedia search results for '{query}': {search_results}"
50
+
51
+ def web_search(query):
52
+ """Search DuckDuckGo for a query and return maximum 3 results."""
53
+ search_results = ddg.run(query)
54
+ return f"Web search results for '{query}': {search_results}"
55
+
56
+ def arxiv_search(query):
57
+ """Search Arxiv for a query and return maximum 3 results."""
58
+ try:
59
+ url = f"https://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=3"
60
+ response = requests_wrapper.get(url)
61
+ return f"Arxiv search results for '{query}': {response.text[:500]}..." # Truncate for readability
62
+ except Exception as e:
63
+ return f"Error searching Arxiv: {str(e)}"
64
+
65
+ # --- Fallback for Chroma DB if not initialized ---
66
+ try:
67
+ # --- Chroma DB Setup ---
68
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
69
+ vector_store = Chroma(
70
+ embedding_function=embeddings,
71
+ persist_directory="./chroma_db"
72
+ )
73
+
74
+ def create_retriever_tool(query):
75
+ """A tool to retrieve similar questions from a vector store."""
76
+ try:
77
+ similar_question = vector_store.similarity_search(query)
78
+ if similar_question and len(similar_question) > 0:
79
+ return f"Similar question found: {similar_question[0].page_content}"
80
+ return "No similar questions found in the database."
81
+ except Exception as e:
82
+ return f"Error using retriever: {str(e)}"
83
+ except Exception as e:
84
+ print(f"Warning: Could not initialize Chroma DB: {e}")
85
+ def create_retriever_tool(query):
86
+ return "Retriever tool is not available."
87
+
88
+ # Define the tools
89
+ tools = [
90
+ Tool(
91
+ name="Wikipedia Search",
92
+ func=wiki_search,
93
+ description="Search Wikipedia for a query and return maximum 2 results."
94
+ ),
95
+ Tool(
96
+ name="Web Search",
97
+ func=web_search,
98
+ description="Search DuckDuckGo for a query and return maximum 3 results."
99
+ ),
100
+ Tool(
101
+ name="Arxiv Search",
102
+ func=arxiv_search,
103
+ description="Search Arxiv for a query and return maximum 3 results."
104
+ ),
105
+ Tool(
106
+ name="Retriever",
107
+ func=create_retriever_tool,
108
+ description="A tool to retrieve similar questions from a vector store."
109
+ )
110
+ ]
111
+
112
+ def create_agent(llm, tools):
113
+ """Create an agent with the specified tools."""
114
+ prompt = ChatPromptTemplate.from_messages([
115
+ system_message_prompt,
116
+ HumanMessagePromptTemplate.from_template("{input}")
117
+ ])
118
+ llm_chain = LLMChain(llm=llm, prompt=prompt)
119
+ return llm_chain
120
+
121
+ def extract_final_answer(full_response):
122
+ """Extract only the final answer from the agent's response."""
123
+ if "FINAL ANSWER:" in full_response:
124
+ return full_response.split("FINAL ANSWER:")[1].strip()
125
+ return full_response.strip()
126
+
127
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
128
+ """
129
+ Fetches all questions, runs the EnhancedAgent on them, submits all answers,
130
+ and displays the results.
131
+ """
132
+ # --- Determine HF Space Runtime URL and Repo URL ---
133
+ space_id = os.getenv("SPACE_ID")
134
+
135
+ if profile:
136
+ username = f"{profile.username}"
137
+ print(f"User logged in: {username}")
138
+ else:
139
+ print("User not logged in.")
140
+ return "Please Login to Hugging Face with the button.", None
141
+
142
+ api_url = DEFAULT_API_URL
143
+ questions_url = f"{api_url}/questions"
144
+ submit_url = f"{api_url}/submit"
145
+
146
+ # 1. Instantiate Agent
147
+ try:
148
+ agent = create_agent(llm, tools)
149
+ except Exception as e:
150
+ print(f"Error instantiating agent: {e}")
151
+ return f"Error initializing agent: {e}", None
152
+
153
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
154
+ print(agent_code)
155
+
156
+ # 2. Fetch Questions
157
+ print(f"Fetching questions from: {questions_url}")
158
+ try:
159
+ response = requests.get(questions_url, timeout=15)
160
+ response.raise_for_status()
161
+ questions_data = response.json()
162
+ if not questions_data:
163
+ print("Fetched questions list is empty.")
164
+ return "Fetched questions list is empty or invalid format.", None
165
+ print(f"Fetched {len(questions_data)} questions.")
166
+ except requests.exceptions.RequestException as e:
167
+ print(f"Error fetching questions: {e}")
168
+ return f"Error fetching questions: {e}", None
169
+ except Exception as e:
170
+ print(f"An unexpected error occurred fetching questions: {e}")
171
+ return f"An unexpected error occurred fetching questions: {e}", None
172
+
173
+ # 3. Run your Agent
174
+ results_log = []
175
+ answers_payload = []
176
+ print(f"Running agent on {len(questions_data)} questions...")
177
+
178
+ # Define a fallback answer function in case the main agent fails
179
+ def get_simple_answer(question):
180
+ """Provide a simple answer when the main agent fails"""
181
+ # Very basic responses for common question types
182
+ if "capital" in question.lower():
183
+ return "Unknown"
184
+ elif "population" in question.lower() or "how many" in question.lower():
185
+ return "0"
186
+ elif "when" in question.lower():
187
+ return "Unknown"
188
+ elif "where" in question.lower():
189
+ return "Unknown"
190
+ elif "who" in question.lower():
191
+ return "Unknown"
192
+ elif "true or false" in question.lower():
193
+ return "True"
194
+ else:
195
+ return "Unknown"
196
+
197
+ for item in questions_data:
198
+ task_id = item.get("task_id")
199
+ question_text = item.get("question")
200
+ if not task_id or question_text is None:
201
+ print(f"Skipping item with missing task_id or question: {item}")
202
+ continue
203
+
204
+ try:
205
+ print(f"Processing question: {question_text}")
206
+ # Get the response from the agent
207
+ agent_response = agent.run(question_text)
208
+ print(f"Agent response: {agent_response}")
209
+
210
+ # Extract just the final answer part
211
+ final_answer = extract_final_answer(agent_response)
212
+
213
+ # Make sure the answer isn't too long - truncate if needed
214
+ if len(final_answer) > MAX_ANSWER_LENGTH:
215
+ final_answer = final_answer[:MAX_ANSWER_LENGTH]
216
+ print(f"Warning: Answer truncated to {MAX_ANSWER_LENGTH} characters")
217
+
218
+ # Add to payload for submission
219
+ answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})
220
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer})
221
+ print(f"Task {task_id}: Processed answer: {final_answer}")
222
+
223
+ except Exception as e:
224
+ print(f"Error running agent on task {task_id}: {e}")
225
+
226
+ # Use fallback strategy
227
+ fallback_answer = get_simple_answer(question_text)
228
+ answers_payload.append({"task_id": task_id, "submitted_answer": fallback_answer})
229
+ results_log.append({
230
+ "Task ID": task_id,
231
+ "Question": question_text,
232
+ "Submitted Answer": f"{fallback_answer} (FALLBACK)"
233
+ })
234
+ print(f"Task {task_id}: Used fallback answer: {fallback_answer}")
235
+
236
+ if not answers_payload:
237
+ print("Agent did not produce any answers to submit.")
238
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
239
+
240
+ # 4. Prepare Submission
241
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
242
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
243
+ print(status_update)
244
+
245
+ # 5. Submit
246
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
247
+ try:
248
+ response = requests.post(submit_url, json=submission_data, timeout=60)
249
+ response.raise_for_status()
250
+ result_data = response.json()
251
+ final_status = (
252
+ f"Submission Successful!\n"
253
+ f"User: {result_data.get('username')}\n"
254
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
255
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
256
+ f"Message: {result_data.get('message', 'No message received.')}"
257
+ )
258
+ print("Submission successful.")
259
+ results_df = pd.DataFrame(results_log)
260
+ return final_status, results_df
261
+ except Exception as e:
262
+ status_message = f"Submission Failed: {e}"
263
+ print(status_message)
264
+ results_df = pd.DataFrame(results_log)
265
+ return status_message, results_df
266
+
267
+ # --- Build Gradio Interface using Blocks ---
268
+ with gr.Blocks() as demo:
269
+ gr.Markdown("# GAIA Evaluation Agent using Multiple Search Tools")
270
+ gr.Markdown(
271
+ """
272
+ **Instructions:**
273
+ 1. Clone this space and modify the agent's logic and tools as needed.
274
+ 2. Log in with your Hugging Face account.
275
+ 3. Click 'Run Evaluation & Submit All Answers' to test your agent.
276
+ """
277
+ )
278
+
279
+ gr.LoginButton()
280
+
281
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
282
+
283
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
284
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
285
+
286
+ run_button.click(
287
+ fn=run_and_submit_all,
288
+ outputs=[status_output, results_table]
289
+ )
290
+
291
+ if __name__ == "__main__":
292
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
293
+ space_id_startup = os.getenv("SPACE_ID")
294
+
295
+ if space_id_startup:
296
+ print(f"✅ SPACE_ID found: {space_id_startup}")
297
+ else:
298
+ print("ℹ️ SPACE_ID environment variable not found (running locally?).")
299
+
300
+ print("-"*(60 + len(" App Starting ")) + "\n")
301
+ print("Launching Gradio Interface...")
302
+ demo.launch(debug=True, share=True)
main.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+ from datetime import datetime
6
+ from transformers import pipeline
7
+ from langchain_community.llms import HuggingFaceTextGenInference
8
+ from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
9
+ from langchain.chains import LLMChain
10
+ from langchain.agents import Tool, initialize_agent, AgentType
11
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
12
+ from langchain_community.utilities import TextRequestsWrapper
13
+ from langchain_community.embeddings import HuggingFaceEmbeddings
14
+ from langchain_community.vectorstores import Chroma
15
+
16
+ # --- Constants ---
17
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
+ MAX_ANSWER_LENGTH = 50
19
+
20
+ # --- LLM Setup ---
21
+ llm = HuggingFaceTextGenInference(
22
+ inference_server_url="https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
23
+ max_new_tokens=256,
24
+ temperature=0.1,
25
+ repetition_penalty=1.03,
26
+ top_k=10,
27
+ top_p=0.95,
28
+ timeout=120,
29
+ streaming=False,
30
+ huggingface_api_key=os.getenv("HF_API_TOKEN"), # Required environment variable
31
+ )
32
+
33
+ # --- System Message ---
34
+ system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
35
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
36
+ FINAL ANSWER: [YOUR FINAL ANSWER].
37
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations, and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
38
+ system_message_prompt = SystemMessagePromptTemplate.from_template(system_prompt)
39
+
40
+ # --- Tools ---
41
+ ddg = DuckDuckGoSearchAPIWrapper()
42
+ requests_wrapper = TextRequestsWrapper()
43
+
44
+ def wiki_search(query):
45
+ search_results = ddg.run(query)
46
+ return f"Wikipedia search results: {search_results[:2]}" # Return max 2 results
47
+
48
+ def web_search(query):
49
+ search_results = ddg.run(query)
50
+ return f"Web search results: {search_results[:3]}" # Return max 3 results
51
+
52
+ def arxiv_search(query):
53
+ try:
54
+ url = f"https://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=3"
55
+ response = requests_wrapper.get(url)
56
+ return f"Arxiv search results: {response.text[:500]}..." # Truncate for readability
57
+ except Exception as e:
58
+ return f"Arxiv search error: {str(e)}"
59
+
60
+ # --- Chroma DB Fallback ---
61
+ try:
62
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
63
+ vector_store = Chroma(
64
+ embedding_function=embeddings,
65
+ persist_directory="./chroma_db"
66
+ )
67
+
68
+ def create_retriever_tool(query):
69
+ try:
70
+ similar_question = vector_store.similarity_search(query)
71
+ return f"Similar question: {similar_question[0].page_content}" if similar_question else "No similar questions"
72
+ except:
73
+ return "Retriever unavailable"
74
+ except:
75
+ def create_retriever_tool(query):
76
+ return "Retriever unavailable"
77
+
78
+ tools = [
79
+ Tool(name="Wikipedia Search", func=wiki_search, description="Search Wikipedia (max 2 results)"),
80
+ Tool(name="Web Search", func=web_search, description="General web search (max 3 results)"),
81
+ Tool(name="Arxiv Search", func=arxiv_search, description="Academic paper search"),
82
+ Tool(name="Retriever", func=create_retriever_tool, description="Find similar previous questions")
83
+ ]
84
+
85
+ def create_agent(llm, tools):
86
+ """Create an agent that can use tools"""
87
+ return initialize_agent(
88
+ tools,
89
+ llm,
90
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
91
+ verbose=True,
92
+ max_iterations=3,
93
+ handle_parsing_errors=True,
94
+ )
95
+
96
+ def extract_final_answer(response):
97
+ return response.split("FINAL ANSWER:")[-1].strip() if "FINAL ANSWER:" in response else response.strip()
98
+
99
+ def run_and_submit_all(profile):
100
+ if not profile:
101
+ return "Please login to Hugging Face", None
102
+
103
+ username = profile.username
104
+ space_id = os.getenv("SPACE_ID", "local-test")
105
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
106
+
107
+ try:
108
+ agent = create_agent(llm, tools)
109
+ except Exception as e:
110
+ return f"Agent initialization failed: {str(e)}", None
111
+
112
+ try:
113
+ response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
114
+ response.raise_for_status()
115
+ questions = response.json()
116
+ except:
117
+ return "Failed to fetch questions", None
118
+
119
+ results = []
120
+ answers = []
121
+
122
+ for q in questions:
123
+ task_id = q.get("task_id")
124
+ question = q.get("question", "")
125
+
126
+ try:
127
+ answer = agent.run(question)
128
+ final_answer = extract_final_answer(answer)[:MAX_ANSWER_LENGTH]
129
+ results.append({"Task ID": task_id, "Question": question, "Answer": final_answer})
130
+ answers.append({"task_id": task_id, "submitted_answer": final_answer})
131
+ except:
132
+ results.append({"Task ID": task_id, "Question": question, "Answer": "ERROR"})
133
+ answers.append({"task_id": task_id, "submitted_answer": "ERROR"})
134
+
135
+ try:
136
+ submission = {
137
+ "username": username,
138
+ "agent_code": agent_code,
139
+ "answers": answers
140
+ }
141
+ response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
142
+ response.raise_for_status()
143
+ result_data = response.json()
144
+ status = f"Score: {result_data.get('score', 'N/A')}% ({result_data.get('correct_count', 0)}/{len(answers)})"
145
+ except:
146
+ status = "Submission failed"
147
+
148
+ return status, pd.DataFrame(results)
149
+
150
+ # --- Build Gradio Interface using Blocks ---
151
+ with gr.Blocks() as demo:
152
+ gr.Markdown("# GAIA Evaluation Agent using Multiple Search Tools")
153
+ gr.Markdown(
154
+ """
155
+ **Instructions:**
156
+ 1. Clone this space and modify the agent's logic and tools as needed.
157
+ 2. Log in with your Hugging Face account.
158
+ 3. Click 'Run Evaluation & Submit All Answers' to test your agent.
159
+ """
160
+ )
161
+
162
+ gr.LoginButton()
163
+
164
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
165
+
166
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
167
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
+
169
+ run_button.click(
170
+ fn=run_and_submit_all,
171
+ outputs=[status_output, results_table]
172
+ )
173
+
174
+ if __name__ == "__main__":
175
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
176
+ space_id_startup = os.getenv("SPACE_ID")
177
+
178
+ if space_id_startup:
179
+ print(f"✅ SPACE_ID found: {space_id_startup}")
180
+ else:
181
+ print("ℹ️ SPACE_ID environment variable not found (running locally?).")
182
+
183
+ print("-"*(60 + len(" App Starting ")) + "\n")
184
+ print("Launching Gradio Interface...")
185
+ demo.launch(debug=True, share=True)