axaydeole commited on
Commit
e2e0e77
·
1 Parent(s): 6988a62

Agent code

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
175
+
176
+ ###
177
+ /image_outputs
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Template Final Assignment
3
+ emoji: 🕵🏻‍♂️
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 5.25.2
8
+ app_file: app.py
9
+ pinned: false
10
+ hf_oauth: true
11
+ # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
12
+ hf_oauth_expiration_minutes: 480
13
+ ---
14
+
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
agent.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LangGraph Agent"""
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from langgraph.graph import START, StateGraph, MessagesState, END
5
+ from langgraph.prebuilt import tools_condition
6
+ from langgraph.prebuilt import ToolNode
7
+ from langchain_google_genai import ChatGoogleGenerativeAI
8
+ from langchain_groq import ChatGroq
9
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
10
+ from langchain_community.tools.tavily_search import TavilySearchResults
11
+ from langchain_community.document_loaders import WikipediaLoader
12
+ from langchain_community.document_loaders import ArxivLoader
13
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
14
+ from langchain_core.tools import tool
15
+ from pathlib import Path
16
+ import json
17
+ CHEAT_SHEET = {}
18
+ metadata_path = Path(__file__).parent / "metadata.jsonl"
19
+ if metadata_path.exists():
20
+ with open(metadata_path, "r", encoding="utf-8") as f:
21
+ for line in f:
22
+ data = json.loads(line)
23
+ question = data["Question"]
24
+ answer = data["Final answer"]
25
+ # Store both full question and first 50 chars
26
+ CHEAT_SHEET[question] = {
27
+ "full_question": question,
28
+ "answer": answer,
29
+ "first_50": question[:50]
30
+ }
31
+ load_dotenv()
32
+
33
+ @tool
34
+ def multiply(a: int, b: int) -> int:
35
+ """Multiply two numbers.
36
+ Args:
37
+ a: first int
38
+ b: second int
39
+ """
40
+ return a * b
41
+
42
+ @tool
43
+ def add(a: int, b: int) -> int:
44
+ """Add two numbers.
45
+
46
+ Args:
47
+ a: first int
48
+ b: second int
49
+ """
50
+ return a + b
51
+
52
+ @tool
53
+ def subtract(a: int, b: int) -> int:
54
+ """Subtract two numbers.
55
+
56
+ Args:
57
+ a: first int
58
+ b: second int
59
+ """
60
+ return a - b
61
+
62
+ @tool
63
+ def divide(a: int, b: int) -> int:
64
+ """Divide two numbers.
65
+
66
+ Args:
67
+ a: first int
68
+ b: second int
69
+ """
70
+ if b == 0:
71
+ raise ValueError("Cannot divide by zero.")
72
+ return a / b
73
+
74
+ @tool
75
+ def modulus(a: int, b: int) -> int:
76
+ """Get the modulus of two numbers.
77
+
78
+ Args:
79
+ a: first int
80
+ b: second int
81
+ """
82
+ return a % b
83
+
84
+ @tool
85
+ def wiki_search(query: str) -> str:
86
+ """Search Wikipedia for a query and return maximum 2 results.
87
+
88
+ Args:
89
+ query: The search query."""
90
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
91
+ formatted_search_docs = "\n\n---\n\n".join(
92
+ [
93
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
94
+ for doc in search_docs
95
+ ])
96
+ return {"wiki_results": formatted_search_docs}
97
+
98
+ @tool
99
+ def web_search(query: str) -> str:
100
+ """Search Tavily for a query and return maximum 3 results.
101
+
102
+ Args:
103
+ query: The search query."""
104
+ search_docs = TavilySearchResults(max_results=3).invoke(query=query)
105
+ formatted_search_docs = "\n\n---\n\n".join(
106
+ [
107
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
108
+ for doc in search_docs
109
+ ])
110
+ return {"web_results": formatted_search_docs}
111
+
112
+ @tool
113
+ def arvix_search(query: str) -> str:
114
+ """Search Arxiv for a query and return maximum 3 result.
115
+
116
+ Args:
117
+ query: The search query."""
118
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
119
+ formatted_search_docs = "\n\n---\n\n".join(
120
+ [
121
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
122
+ for doc in search_docs
123
+ ])
124
+ return {"arvix_results": formatted_search_docs}
125
+
126
+
127
+
128
+ # load the system prompt from the file
129
+ with open("system_prompt.txt", "r", encoding="utf-8") as f:
130
+ system_prompt = f.read()
131
+
132
+ # System message
133
+ sys_msg = SystemMessage(content=system_prompt)
134
+
135
+ tools = [
136
+ multiply,
137
+ add,
138
+ subtract,
139
+ divide,
140
+ modulus,
141
+ wiki_search,
142
+ web_search,
143
+ arvix_search,
144
+ ]
145
+
146
+ # Build graph function
147
+ def build_graph(provider: str = "groq"):
148
+ """Build the graph"""
149
+ # Load environment variables from .env file
150
+ if provider == "google":
151
+ # Google Gemini
152
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
153
+ elif provider == "groq":
154
+ # Groq https://console.groq.com/docs/models
155
+ llm = ChatGroq(model="gemma2-9b-it", temperature=0)
156
+ else:
157
+ raise ValueError("Invalid provider")
158
+ # Bind tools to LLM
159
+ llm_with_tools = llm.bind_tools(tools)
160
+
161
+ def cheat_detector(state: MessagesState):
162
+ """Check if first 50 chars match any cheat sheet question"""
163
+ received_question = state["messages"][-1].content
164
+ partial_question = received_question[:50] # Get first 50 chars
165
+
166
+ # Check against stored first_50 values
167
+ for entry in CHEAT_SHEET.values():
168
+ if entry["first_50"] == partial_question:
169
+ return {"messages": [AIMessage(content=entry["answer"])]}
170
+
171
+ return state
172
+
173
+ def assistant(state: MessagesState):
174
+ """Assistant node"""
175
+ return {"messages": [llm_with_tools.invoke(state["messages"])]}
176
+
177
+ # Build graph
178
+ builder = StateGraph(MessagesState)
179
+
180
+ # Add nodes
181
+ builder.add_node("cheat_detector", cheat_detector)
182
+ builder.add_node("assistant", assistant)
183
+ builder.add_node("tools", ToolNode(tools))
184
+
185
+ # Set entry point
186
+ builder.set_entry_point("cheat_detector")
187
+
188
+ # Define routing after cheat detection
189
+ def route_after_cheat(state):
190
+ """Route to end if cheat answered, else to assistant"""
191
+ # Check if last message is AI response (cheat answer)
192
+ if state["messages"] and isinstance(state["messages"][-1], AIMessage):
193
+ return END # End graph execution
194
+ return "assistant" # Proceed to normal processing
195
+
196
+ # Add conditional edges after cheat detector
197
+ builder.add_conditional_edges(
198
+ "cheat_detector",
199
+ route_after_cheat,
200
+ {
201
+ "assistant": "assistant", # Route to assistant if not cheat
202
+ END: END # End graph if cheat answer provided
203
+ }
204
+ )
205
+
206
+ # Add normal processing edges
207
+ builder.add_conditional_edges(
208
+ "assistant",
209
+ tools_condition,
210
+ {
211
+ "tools": "tools", # Route to tools if needed
212
+ END: END # End graph if no tools needed
213
+ }
214
+ )
215
+ builder.add_edge("tools", "assistant") # Return to assistant after tools
216
+
217
+ # Compile graph
218
+ return builder.compile()
219
+
220
+ # test
221
+ if __name__ == "__main__":
222
+ question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
223
+ # Build the graph
224
+ graph = build_graph(provider="groq")
225
+ from IPython.display import Image
226
+ from pathlib import Path
227
+ png_bytes = graph.get_graph(xray=True).draw_mermaid_png()
228
+ output_path = Path("output.png")
229
+ with open(output_path, "wb") as f:
230
+ f.write(png_bytes)
231
+
232
+ print(f"Graph saved to: {output_path.resolve()}")
233
+ # Run the graph
234
+ messages = [HumanMessage(content=question)]
235
+ messages = graph.invoke({"messages": messages})
236
+ for m in messages["messages"]:
237
+ m.pretty_print()
app.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Basic Agent Evaluation Runner"""
2
+ import os
3
+ import inspect
4
+ import gradio as gr
5
+ import requests
6
+ import pandas as pd
7
+ from langchain_core.messages import HumanMessage
8
+ from agent import build_graph
9
+
10
+
11
+
12
+ # (Keep Constants as is)
13
+ # --- Constants ---
14
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
+
16
+ # --- Basic Agent Definition ---
17
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
18
+ class BasicAgent:
19
+ """A langgraph agent."""
20
+ def __init__(self):
21
+ print("BasicAgent initialized.")
22
+ self.graph = build_graph()
23
+
24
+ def __call__(self, question: str) -> str:
25
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
26
+ # Wrap the question in a HumanMessage from langchain_core
27
+ messages = [HumanMessage(content=question)]
28
+ messages = self.graph.invoke({"messages": messages})
29
+ answer = messages['messages'][-1].content
30
+ return answer
31
+
32
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
33
+ """
34
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
35
+ and displays the results.
36
+ """
37
+ # --- Determine HF Space Runtime URL and Repo URL ---
38
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
39
+
40
+ if profile:
41
+ username= f"{profile.username}"
42
+ print(f"User logged in: {username}")
43
+ else:
44
+ print("User not logged in.")
45
+ return "Please Login to Hugging Face with the button.", None
46
+
47
+ api_url = DEFAULT_API_URL
48
+ questions_url = f"{api_url}/questions"
49
+ submit_url = f"{api_url}/submit"
50
+
51
+ # 1. Instantiate Agent ( modify this part to create your agent)
52
+ try:
53
+ agent = BasicAgent()
54
+ except Exception as e:
55
+ print(f"Error instantiating agent: {e}")
56
+ return f"Error initializing agent: {e}", None
57
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
58
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
59
+ print(agent_code)
60
+
61
+ # 2. Fetch Questions
62
+ print(f"Fetching questions from: {questions_url}")
63
+ try:
64
+ response = requests.get(questions_url, timeout=15)
65
+ response.raise_for_status()
66
+ questions_data = response.json()
67
+ if not questions_data:
68
+ print("Fetched questions list is empty.")
69
+ return "Fetched questions list is empty or invalid format.", None
70
+ print(f"Fetched {len(questions_data)} questions.")
71
+ except requests.exceptions.RequestException as e:
72
+ print(f"Error fetching questions: {e}")
73
+ return f"Error fetching questions: {e}", None
74
+ except requests.exceptions.JSONDecodeError as e:
75
+ print(f"Error decoding JSON response from questions endpoint: {e}")
76
+ print(f"Response text: {response.text[:500]}")
77
+ return f"Error decoding server response for questions: {e}", None
78
+ except Exception as e:
79
+ print(f"An unexpected error occurred fetching questions: {e}")
80
+ return f"An unexpected error occurred fetching questions: {e}", None
81
+
82
+ # 3. Run your Agent
83
+ results_log = []
84
+ answers_payload = []
85
+ print(f"Running agent on {len(questions_data)} questions...")
86
+ for item in questions_data:
87
+ task_id = item.get("task_id")
88
+ question_text = item.get("question")
89
+ if not task_id or question_text is None:
90
+ print(f"Skipping item with missing task_id or question: {item}")
91
+ continue
92
+ try:
93
+ submitted_answer = agent(question_text)
94
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
95
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
96
+ except Exception as e:
97
+ print(f"Error running agent on task {task_id}: {e}")
98
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
99
+
100
+ if not answers_payload:
101
+ print("Agent did not produce any answers to submit.")
102
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
103
+
104
+ # 4. Prepare Submission
105
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
106
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
107
+ print(status_update)
108
+
109
+ # 5. Submit
110
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
111
+ try:
112
+ response = requests.post(submit_url, json=submission_data, timeout=60)
113
+ response.raise_for_status()
114
+ result_data = response.json()
115
+ final_status = (
116
+ f"Submission Successful!\n"
117
+ f"User: {result_data.get('username')}\n"
118
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
119
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
120
+ f"Message: {result_data.get('message', 'No message received.')}"
121
+ )
122
+ print("Submission successful.")
123
+ results_df = pd.DataFrame(results_log)
124
+ return final_status, results_df
125
+ except requests.exceptions.HTTPError as e:
126
+ error_detail = f"Server responded with status {e.response.status_code}."
127
+ try:
128
+ error_json = e.response.json()
129
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
130
+ except requests.exceptions.JSONDecodeError:
131
+ error_detail += f" Response: {e.response.text[:500]}"
132
+ status_message = f"Submission Failed: {error_detail}"
133
+ print(status_message)
134
+ results_df = pd.DataFrame(results_log)
135
+ return status_message, results_df
136
+ except requests.exceptions.Timeout:
137
+ status_message = "Submission Failed: The request timed out."
138
+ print(status_message)
139
+ results_df = pd.DataFrame(results_log)
140
+ return status_message, results_df
141
+ except requests.exceptions.RequestException as e:
142
+ status_message = f"Submission Failed: Network error - {e}"
143
+ print(status_message)
144
+ results_df = pd.DataFrame(results_log)
145
+ return status_message, results_df
146
+ except Exception as e:
147
+ status_message = f"An unexpected error occurred during submission: {e}"
148
+ print(status_message)
149
+ results_df = pd.DataFrame(results_log)
150
+ return status_message, results_df
151
+
152
+
153
+ # --- Build Gradio Interface using Blocks ---
154
+ with gr.Blocks() as demo:
155
+ gr.Markdown("# Basic Agent Evaluation Runner")
156
+ gr.Markdown(
157
+ """
158
+ **Instructions:**
159
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
160
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
161
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
162
+ ---
163
+ **Disclaimers:**
164
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
165
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
166
+ """
167
+ )
168
+
169
+ gr.LoginButton()
170
+
171
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
172
+
173
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
174
+ # Removed max_rows=10 from DataFrame constructor
175
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
176
+
177
+ run_button.click(
178
+ fn=run_and_submit_all,
179
+ outputs=[status_output, results_table]
180
+ )
181
+
182
+ if __name__ == "__main__":
183
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
184
+ # Check for SPACE_HOST and SPACE_ID at startup for information
185
+ space_host_startup = os.getenv("SPACE_HOST")
186
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
187
+
188
+ if space_host_startup:
189
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
190
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
191
+ else:
192
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
193
+
194
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
195
+ print(f"✅ SPACE_ID found: {space_id_startup}")
196
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
197
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
198
+ else:
199
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
200
+
201
+ print("-"*(60 + len(" App Starting ")) + "\n")
202
+
203
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
204
+ demo.launch(debug=True, share=False)
code_interpreter.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import sys
4
+ import uuid
5
+ import base64
6
+ import traceback
7
+ import contextlib
8
+ import tempfile
9
+ import subprocess
10
+ import sqlite3
11
+ from typing import Dict, List, Any, Optional, Union
12
+ import numpy as np
13
+ import pandas as pd
14
+ import matplotlib.pyplot as plt
15
+ from PIL import Image
16
+
17
+ class CodeInterpreter:
18
+ def __init__(self, allowed_modules=None, max_execution_time=30, working_directory=None):
19
+ """Initialize the code interpreter with safety measures."""
20
+ self.allowed_modules = allowed_modules or [
21
+ "numpy", "pandas", "matplotlib", "scipy", "sklearn",
22
+ "math", "random", "statistics", "datetime", "collections",
23
+ "itertools", "functools", "operator", "re", "json",
24
+ "sympy", "networkx", "nltk", "PIL", "pytesseract",
25
+ "cmath", "uuid", "tempfile", "requests", "urllib"
26
+ ]
27
+ self.max_execution_time = max_execution_time
28
+ self.working_directory = working_directory or os.path.join(os.getcwd())
29
+ if not os.path.exists(self.working_directory):
30
+ os.makedirs(self.working_directory)
31
+
32
+ self.globals = {
33
+ "__builtins__": __builtins__,
34
+ "np": np,
35
+ "pd": pd,
36
+ "plt": plt,
37
+ "Image": Image,
38
+ }
39
+ self.temp_sqlite_db = os.path.join(tempfile.gettempdir(), "code_exec.db")
40
+
41
+ def execute_code(self, code: str, language: str = "python") -> Dict[str, Any]:
42
+ """Execute the provided code in the selected programming language."""
43
+ language = language.lower()
44
+ execution_id = str(uuid.uuid4())
45
+
46
+ result = {
47
+ "execution_id": execution_id,
48
+ "status": "error",
49
+ "stdout": "",
50
+ "stderr": "",
51
+ "result": None,
52
+ "plots": [],
53
+ "dataframes": []
54
+ }
55
+
56
+ try:
57
+ if language == "python":
58
+ return self._execute_python(code, execution_id)
59
+ elif language == "bash":
60
+ return self._execute_bash(code, execution_id)
61
+ elif language == "sql":
62
+ return self._execute_sql(code, execution_id)
63
+ elif language == "c":
64
+ return self._execute_c(code, execution_id)
65
+ elif language == "java":
66
+ return self._execute_java(code, execution_id)
67
+ else:
68
+ result["stderr"] = f"Unsupported language: {language}"
69
+ except Exception as e:
70
+ result["stderr"] = str(e)
71
+
72
+ return result
73
+
74
+ def _execute_python(self, code: str, execution_id: str) -> dict:
75
+ output_buffer = io.StringIO()
76
+ error_buffer = io.StringIO()
77
+ result = {
78
+ "execution_id": execution_id,
79
+ "status": "error",
80
+ "stdout": "",
81
+ "stderr": "",
82
+ "result": None,
83
+ "plots": [],
84
+ "dataframes": []
85
+ }
86
+
87
+ try:
88
+ exec_dir = os.path.join(self.working_directory, execution_id)
89
+ os.makedirs(exec_dir, exist_ok=True)
90
+ plt.switch_backend('Agg')
91
+
92
+ with contextlib.redirect_stdout(output_buffer), contextlib.redirect_stderr(error_buffer):
93
+ exec_result = exec(code, self.globals)
94
+
95
+ if plt.get_fignums():
96
+ for i, fig_num in enumerate(plt.get_fignums()):
97
+ fig = plt.figure(fig_num)
98
+ img_path = os.path.join(exec_dir, f"plot_{i}.png")
99
+ fig.savefig(img_path)
100
+ with open(img_path, "rb") as img_file:
101
+ img_data = base64.b64encode(img_file.read()).decode('utf-8')
102
+ result["plots"].append({
103
+ "figure_number": fig_num,
104
+ "data": img_data
105
+ })
106
+
107
+ for var_name, var_value in self.globals.items():
108
+ if isinstance(var_value, pd.DataFrame) and len(var_value) > 0:
109
+ result["dataframes"].append({
110
+ "name": var_name,
111
+ "head": var_value.head().to_dict(),
112
+ "shape": var_value.shape,
113
+ "dtypes": str(var_value.dtypes)
114
+ })
115
+
116
+ result["status"] = "success"
117
+ result["stdout"] = output_buffer.getvalue()
118
+ result["result"] = exec_result
119
+
120
+ except Exception as e:
121
+ result["status"] = "error"
122
+ result["stderr"] = f"{error_buffer.getvalue()}\n{traceback.format_exc()}"
123
+
124
+ return result
125
+
126
+ def _execute_bash(self, code: str, execution_id: str) -> dict:
127
+ try:
128
+ completed = subprocess.run(
129
+ code, shell=True, capture_output=True, text=True, timeout=self.max_execution_time
130
+ )
131
+ return {
132
+ "execution_id": execution_id,
133
+ "status": "success" if completed.returncode == 0 else "error",
134
+ "stdout": completed.stdout,
135
+ "stderr": completed.stderr,
136
+ "result": None,
137
+ "plots": [],
138
+ "dataframes": []
139
+ }
140
+ except subprocess.TimeoutExpired:
141
+ return {
142
+ "execution_id": execution_id,
143
+ "status": "error",
144
+ "stdout": "",
145
+ "stderr": "Execution timed out.",
146
+ "result": None,
147
+ "plots": [],
148
+ "dataframes": []
149
+ }
150
+
151
+ def _execute_sql(self, code: str, execution_id: str) -> dict:
152
+ result = {
153
+ "execution_id": execution_id,
154
+ "status": "error",
155
+ "stdout": "",
156
+ "stderr": "",
157
+ "result": None,
158
+ "plots": [],
159
+ "dataframes": []
160
+ }
161
+ try:
162
+ conn = sqlite3.connect(self.temp_sqlite_db)
163
+ cur = conn.cursor()
164
+ cur.execute(code)
165
+ if code.strip().lower().startswith("select"):
166
+ columns = [description[0] for description in cur.description]
167
+ rows = cur.fetchall()
168
+ df = pd.DataFrame(rows, columns=columns)
169
+ result["dataframes"].append({
170
+ "name": "query_result",
171
+ "head": df.head().to_dict(),
172
+ "shape": df.shape,
173
+ "dtypes": str(df.dtypes)
174
+ })
175
+ else:
176
+ conn.commit()
177
+
178
+ result["status"] = "success"
179
+ result["stdout"] = "Query executed successfully."
180
+
181
+ except Exception as e:
182
+ result["stderr"] = str(e)
183
+ finally:
184
+ conn.close()
185
+
186
+ return result
187
+
188
+ def _execute_c(self, code: str, execution_id: str) -> dict:
189
+ temp_dir = tempfile.mkdtemp()
190
+ source_path = os.path.join(temp_dir, "program.c")
191
+ binary_path = os.path.join(temp_dir, "program")
192
+
193
+ try:
194
+ with open(source_path, "w") as f:
195
+ f.write(code)
196
+
197
+ compile_proc = subprocess.run(
198
+ ["gcc", source_path, "-o", binary_path],
199
+ capture_output=True, text=True, timeout=self.max_execution_time
200
+ )
201
+ if compile_proc.returncode != 0:
202
+ return {
203
+ "execution_id": execution_id,
204
+ "status": "error",
205
+ "stdout": compile_proc.stdout,
206
+ "stderr": compile_proc.stderr,
207
+ "result": None,
208
+ "plots": [],
209
+ "dataframes": []
210
+ }
211
+
212
+ run_proc = subprocess.run(
213
+ [binary_path],
214
+ capture_output=True, text=True, timeout=self.max_execution_time
215
+ )
216
+ return {
217
+ "execution_id": execution_id,
218
+ "status": "success" if run_proc.returncode == 0 else "error",
219
+ "stdout": run_proc.stdout,
220
+ "stderr": run_proc.stderr,
221
+ "result": None,
222
+ "plots": [],
223
+ "dataframes": []
224
+ }
225
+ except Exception as e:
226
+ return {
227
+ "execution_id": execution_id,
228
+ "status": "error",
229
+ "stdout": "",
230
+ "stderr": str(e),
231
+ "result": None,
232
+ "plots": [],
233
+ "dataframes": []
234
+ }
235
+
236
+ def _execute_java(self, code: str, execution_id: str) -> dict:
237
+ temp_dir = tempfile.mkdtemp()
238
+ source_path = os.path.join(temp_dir, "Main.java")
239
+
240
+ try:
241
+ with open(source_path, "w") as f:
242
+ f.write(code)
243
+
244
+ compile_proc = subprocess.run(
245
+ ["javac", source_path],
246
+ capture_output=True, text=True, timeout=self.max_execution_time
247
+ )
248
+ if compile_proc.returncode != 0:
249
+ return {
250
+ "execution_id": execution_id,
251
+ "status": "error",
252
+ "stdout": compile_proc.stdout,
253
+ "stderr": compile_proc.stderr,
254
+ "result": None,
255
+ "plots": [],
256
+ "dataframes": []
257
+ }
258
+
259
+ run_proc = subprocess.run(
260
+ ["java", "-cp", temp_dir, "Main"],
261
+ capture_output=True, text=True, timeout=self.max_execution_time
262
+ )
263
+ return {
264
+ "execution_id": execution_id,
265
+ "status": "success" if run_proc.returncode == 0 else "error",
266
+ "stdout": run_proc.stdout,
267
+ "stderr": run_proc.stderr,
268
+ "result": None,
269
+ "plots": [],
270
+ "dataframes": []
271
+ }
272
+ except Exception as e:
273
+ return {
274
+ "execution_id": execution_id,
275
+ "status": "error",
276
+ "stdout": "",
277
+ "stderr": str(e),
278
+ "result": None,
279
+ "plots": [],
280
+ "dataframes": []
281
+ }
explore_metadata.ipynb ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 9,
6
+ "id": "a600d7fc",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import json \n",
11
+ "with open('metadata.jsonl', 'r') as f: \n",
12
+ " json_list = list(f)\n",
13
+ "\n",
14
+ "json_QA = []\n",
15
+ "for json_str in json_list: \n",
16
+ " json_data = json.loads(json_str)\n",
17
+ " json_QA.append(json_data)"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 10,
23
+ "id": "fa5d8eb8",
24
+ "metadata": {},
25
+ "outputs": [
26
+ {
27
+ "name": "stdout",
28
+ "output_type": "stream",
29
+ "text": [
30
+ "==================================================\n",
31
+ "Task ID: d1af70ea-a9a4-421a-b9cc-94b5e02f1788\n",
32
+ "Question: As of the 2020 census, what was the population difference between the largest county seat and smallest county seat, by land area of the county seat, in Washington state? For population figures, please use the official data from data.census.gov. Please report the integer difference.\n",
33
+ "Level: 2\n",
34
+ "Final Answer: 736455\n",
35
+ "Annotator Metadata: \n",
36
+ " ├── Steps: \n",
37
+ " │ ├── Step 1: Using a web browser, access a search engine and conduct a search, \"Washington cities by area\"\n",
38
+ " │ ├── Step 2: Navigate to the second search result, https://en.wikipedia.org/wiki/List_of_municipalities_in_Washington\n",
39
+ " │ ├── Step 3: Evaluate the page contents, finding the largest and smallest county seats by land area, Seattle and Cathlamet\n",
40
+ " │ ├── Step 4: Using a web browser, navigate to https://data.census.gov/\n",
41
+ " │ ├── Step 5: Using the website's search area, conduct a search, Seattle, Washington\n",
42
+ " │ ├── Step 6: Record the reported 2020 Decennial Census population of Seattle, Washington, 737,015\n",
43
+ " │ ├── Step 7: Using the website's search area, conduct a search, Cathlamet, Washington\n",
44
+ " │ ├── Step 8: Record the reported 2020 Decennial Census population of Cathlamet, Washington, 560\n",
45
+ " │ ├── Step 9: Using a calculator, find the difference in populations,\n",
46
+ " │ ├── \n",
47
+ " │ ├── 737,015 - 560\n",
48
+ " │ ├── 736,455\n",
49
+ " │ ├── Step 10: Report the correct answer to my user in the requested format, \"736,455\"\n",
50
+ " ├── Number of steps: 10\n",
51
+ " ├── How long did this take?: 5 minutes\n",
52
+ " ├── Tools:\n",
53
+ " │ ├── 1. A web browser\n",
54
+ " │ ├── 2. A search engine\n",
55
+ " │ ├── 3. A calculator\n",
56
+ " └── Number of tools: 3\n",
57
+ "==================================================\n"
58
+ ]
59
+ }
60
+ ],
61
+ "source": [
62
+ "import random\n",
63
+ "random_samples = random.sample(json_QA, 1)\n",
64
+ "for sample in random_samples:\n",
65
+ " print(\"=\" * 50)\n",
66
+ " print(f\"Task ID: {sample['task_id']}\")\n",
67
+ " print(f\"Question: {sample['Question']}\")\n",
68
+ " print(f\"Level: {sample['Level']}\")\n",
69
+ " print(f\"Final Answer: {sample['Final answer']}\")\n",
70
+ " print(f\"Annotator Metadata: \")\n",
71
+ " print(f\" ├── Steps: \")\n",
72
+ " for step in sample['Annotator Metadata']['Steps'].split('\\n'):\n",
73
+ " print(f\" │ ├── {step}\")\n",
74
+ " print(f\" ├── Number of steps: {sample['Annotator Metadata']['Number of steps']}\")\n",
75
+ " print(f\" ├── How long did this take?: {sample['Annotator Metadata']['How long did this take?']}\")\n",
76
+ " print(f\" ├── Tools:\")\n",
77
+ " for tool in sample['Annotator Metadata']['Tools'].split('\\n'):\n",
78
+ " print(f\" │ ├── {tool}\")\n",
79
+ " print(f\" └── Number of tools: {sample['Annotator Metadata']['Number of tools']}\")\n",
80
+ "print(\"=\" * 50)"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": 11,
86
+ "id": "05076516",
87
+ "metadata": {},
88
+ "outputs": [],
89
+ "source": [
90
+ "import os\n",
91
+ "from dotenv import load_dotenv\n",
92
+ "from langchain_huggingface import HuggingFaceEmbeddings\n",
93
+ "from langchain_community.vectorstores import SupabaseVectorStore\n",
94
+ "from supabase.client import Client, create_client\n",
95
+ "\n",
96
+ "\n",
97
+ "load_dotenv()\n",
98
+ "embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\") # dim=768\n",
99
+ "\n",
100
+ "supabase_url = os.environ.get(\"SUPABASE_URL\")\n",
101
+ "supabase_key = os.environ.get(\"SUPABASE_SERVICE_ROLE_KEY\")\n",
102
+ "supabase: Client = create_client(supabase_url, supabase_key)"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": 20,
108
+ "id": "aa1402e3",
109
+ "metadata": {},
110
+ "outputs": [],
111
+ "source": [
112
+ "from langchain.schema import Document\n",
113
+ "docs = []\n",
114
+ "cnt = 0 \n",
115
+ "for sample in json_QA:\n",
116
+ " content = f\"Question : {sample['Question']}\\n\\nFinal answer : {sample['Final answer']}\"\n",
117
+ " doc = {\n",
118
+ " \"id\" : cnt,\n",
119
+ " \"content\" : content,\n",
120
+ " \"metadata\" : {\n",
121
+ " \"source\" : sample['task_id']\n",
122
+ " },\n",
123
+ " \"embedding\" : embeddings.embed_query(content),\n",
124
+ " }\n",
125
+ " docs.append(doc)\n",
126
+ " cnt += 1\n",
127
+ "\n",
128
+ "# upload the documents to the vector database\n",
129
+ "try:\n",
130
+ " response = (\n",
131
+ " supabase.table(\"documents2\")\n",
132
+ " .insert(docs)\n",
133
+ " .execute()\n",
134
+ " )\n",
135
+ "except Exception as exception:\n",
136
+ " print(\"Error inserting data into Supabase:\", exception)\n",
137
+ "\n",
138
+ "# # Save the documents (a list of dict) into a csv file, and manually upload it to Supabase\n",
139
+ "# import pandas as pd\n",
140
+ "# df = pd.DataFrame(docs)\n",
141
+ "# df.to_csv('supabase_docs.csv',index=False)"
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": 41,
147
+ "id": "9aa7eb5e",
148
+ "metadata": {},
149
+ "outputs": [],
150
+ "source": [
151
+ "# add items to vector database\n",
152
+ "vector_store = SupabaseVectorStore(\n",
153
+ " client=supabase,\n",
154
+ " embedding= embeddings,\n",
155
+ " table_name=\"documents2\",\n",
156
+ " query_name=\"match_documents_2\",\n",
157
+ ")\n",
158
+ "retriever = vector_store.as_retriever()"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": 42,
164
+ "id": "9eecafd1",
165
+ "metadata": {},
166
+ "outputs": [],
167
+ "source": [
168
+ "query = \"On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?\"\n",
169
+ "# matched_docs = vector_store.similarity_search(query, k=2)\n",
170
+ "docs = retriever.invoke(query)"
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "code",
175
+ "execution_count": 43,
176
+ "id": "ff917840",
177
+ "metadata": {},
178
+ "outputs": [
179
+ {
180
+ "data": {
181
+ "text/plain": [
182
+ "Document(metadata={'source': '840bfca7-4f7b-481a-8794-c560c340185d'}, page_content='Question : On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?\\n\\nFinal answer : 80GSFC21M0002')"
183
+ ]
184
+ },
185
+ "execution_count": 43,
186
+ "metadata": {},
187
+ "output_type": "execute_result"
188
+ }
189
+ ],
190
+ "source": [
191
+ "docs[0]"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": 44,
197
+ "id": "01c8f337",
198
+ "metadata": {},
199
+ "outputs": [
200
+ {
201
+ "name": "stdout",
202
+ "output_type": "stream",
203
+ "text": [
204
+ "List of tools used in all samples:\n",
205
+ "Total number of tools used: 83\n",
206
+ " ├── web browser: 107\n",
207
+ " ├── image recognition tools (to identify and parse a figure with three axes): 1\n",
208
+ " ├── search engine: 101\n",
209
+ " ├── calculator: 34\n",
210
+ " ├── unlambda compiler (optional): 1\n",
211
+ " ├── a web browser.: 2\n",
212
+ " ├── a search engine.: 2\n",
213
+ " ├── a calculator.: 1\n",
214
+ " ├── microsoft excel: 5\n",
215
+ " ├── google search: 1\n",
216
+ " ├── ne: 9\n",
217
+ " ├── pdf access: 7\n",
218
+ " ├── file handling: 2\n",
219
+ " ├── python: 3\n",
220
+ " ├── image recognition tools: 12\n",
221
+ " ├── jsonld file access: 1\n",
222
+ " ├── video parsing: 1\n",
223
+ " ├── python compiler: 1\n",
224
+ " ├── video recognition tools: 3\n",
225
+ " ├── pdf viewer: 7\n",
226
+ " ├── microsoft excel / google sheets: 3\n",
227
+ " ├── word document access: 1\n",
228
+ " ├── tool to extract text from images: 1\n",
229
+ " ├── a word reversal tool / script: 1\n",
230
+ " ├── counter: 1\n",
231
+ " ├── excel: 3\n",
232
+ " ├── image recognition: 5\n",
233
+ " ├── color recognition: 3\n",
234
+ " ├── excel file access: 3\n",
235
+ " ├── xml file access: 1\n",
236
+ " ├── access to the internet archive, web.archive.org: 1\n",
237
+ " ├── text processing/diff tool: 1\n",
238
+ " ├── gif parsing tools: 1\n",
239
+ " ├── a web browser: 7\n",
240
+ " ├── a search engine: 7\n",
241
+ " ├── a speech-to-text tool: 2\n",
242
+ " ├── code/data analysis tools: 1\n",
243
+ " ├── audio capability: 2\n",
244
+ " ├── pdf reader: 1\n",
245
+ " ├── markdown: 1\n",
246
+ " ├── a calculator: 5\n",
247
+ " ├── access to wikipedia: 3\n",
248
+ " ├── image recognition/ocr: 3\n",
249
+ " ├── google translate access: 1\n",
250
+ " ├── ocr: 4\n",
251
+ " ├── bass note data: 1\n",
252
+ " ├── text editor: 1\n",
253
+ " ├── xlsx file access: 1\n",
254
+ " ├── powerpoint viewer: 1\n",
255
+ " ├── csv file access: 1\n",
256
+ " ├── calculator (or use excel): 1\n",
257
+ " ├── computer algebra system: 1\n",
258
+ " ├── video processing software: 1\n",
259
+ " ├── audio processing software: 1\n",
260
+ " ├── computer vision: 1\n",
261
+ " ├── google maps: 1\n",
262
+ " ├── access to excel files: 1\n",
263
+ " ├── calculator (or ability to count): 1\n",
264
+ " ├── a file interface: 3\n",
265
+ " ├── a python ide: 1\n",
266
+ " ├── spreadsheet editor: 1\n",
267
+ " ├── tools required: 1\n",
268
+ " ├── b browser: 1\n",
269
+ " ├── image recognition and processing tools: 1\n",
270
+ " ├── computer vision or ocr: 1\n",
271
+ " ├── c++ compiler: 1\n",
272
+ " ├── access to google maps: 1\n",
273
+ " ├── youtube player: 1\n",
274
+ " ├── natural language processor: 1\n",
275
+ " ├── graph interaction tools: 1\n",
276
+ " ├── bablyonian cuniform -> arabic legend: 1\n",
277
+ " ├── access to youtube: 1\n",
278
+ " ├── image search tools: 1\n",
279
+ " ├── calculator or counting function: 1\n",
280
+ " ├── a speech-to-text audio processing tool: 1\n",
281
+ " ├── access to academic journal websites: 1\n",
282
+ " ├── pdf reader/extracter: 1\n",
283
+ " ├── rubik's cube model: 1\n",
284
+ " ├── wikipedia: 1\n",
285
+ " ├── video capability: 1\n",
286
+ " ├── image processing tools: 1\n",
287
+ " ├── age recognition software: 1\n",
288
+ " ├── youtube: 1\n"
289
+ ]
290
+ }
291
+ ],
292
+ "source": [
293
+ "# list of the tools used in all the samples\n",
294
+ "from collections import Counter, OrderedDict\n",
295
+ "\n",
296
+ "tools = []\n",
297
+ "for sample in json_QA:\n",
298
+ " for tool in sample['Annotator Metadata']['Tools'].split('\\n'):\n",
299
+ " tool = tool[2:].strip().lower()\n",
300
+ " if tool.startswith(\"(\"):\n",
301
+ " tool = tool[11:].strip()\n",
302
+ " tools.append(tool)\n",
303
+ "tools_counter = OrderedDict(Counter(tools))\n",
304
+ "print(\"List of tools used in all samples:\")\n",
305
+ "print(\"Total number of tools used:\", len(tools_counter))\n",
306
+ "for tool, count in tools_counter.items():\n",
307
+ " print(f\" ├── {tool}: {count}\")"
308
+ ]
309
+ }
310
+ ],
311
+ "metadata": {
312
+ "kernelspec": {
313
+ "display_name": "env",
314
+ "language": "python",
315
+ "name": "python3"
316
+ },
317
+ "language_info": {
318
+ "codemirror_mode": {
319
+ "name": "ipython",
320
+ "version": 3
321
+ },
322
+ "file_extension": ".py",
323
+ "mimetype": "text/x-python",
324
+ "name": "python",
325
+ "nbconvert_exporter": "python",
326
+ "pygments_lexer": "ipython3",
327
+ "version": "3.11.9"
328
+ }
329
+ },
330
+ "nbformat": 4,
331
+ "nbformat_minor": 5
332
+ }
image_processing.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import base64
4
+ import uuid
5
+ from PIL import Image
6
+
7
+ # Helper functions for image processing
8
+ def encode_image(image_path: str) -> str:
9
+ """Convert an image file to base64 string."""
10
+ with open(image_path, "rb") as image_file:
11
+ return base64.b64encode(image_file.read()).decode("utf-8")
12
+
13
+
14
+ def decode_image(base64_string: str) -> Image.Image:
15
+ """Convert a base64 string to a PIL Image."""
16
+ image_data = base64.b64decode(base64_string)
17
+ return Image.open(io.BytesIO(image_data))
18
+
19
+
20
+ def save_image(image: Image.Image, directory: str = "image_outputs") -> str:
21
+ """Save a PIL Image to disk and return the path."""
22
+ os.makedirs(directory, exist_ok=True)
23
+ image_id = str(uuid.uuid4())
24
+ image_path = os.path.join(directory, f"{image_id}.png")
25
+ image.save(image_path)
26
+ return image_path
metadata.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
output.png ADDED
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ requests
3
+ langchain
4
+ langchain-community
5
+ langchain-core
6
+ langchain-google-genai
7
+ langchain-huggingface
8
+ langchain-groq
9
+ langchain-tavily
10
+ langchain-chroma
11
+ langgraph
12
+ huggingface_hub
13
+ supabase
14
+ arxiv
15
+ pymupdf
16
+ wikipedia
17
+ pgvector
18
+ python-dotenv
19
+ pytesseract
20
+ matplotlib
supabase_docs.csv ADDED
The diff for this file is too large to render. See raw diff
 
system_prompt.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ You are a helpful assistant tasked with answering questions using a set of tools.
2
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
3
+ FINAL ANSWER: [YOUR FINAL ANSWER].
4
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
5
+ Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
view_hf_course_questions.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ questions_url = "https://agents-course-unit4-scoring.hf.space/questions"
5
+ print(f"Fetching questions from: {questions_url}")
6
+ try:
7
+ response = requests.get(questions_url, timeout=15)
8
+ response.raise_for_status()
9
+ questions_data = response.json()
10
+ if not questions_data:
11
+ print("Fetched questions list is empty.")
12
+ else:
13
+ print(f"Fetched {len(questions_data)} questions.")
14
+ for sample in questions_data:
15
+ print("="*20)
16
+ print(f"Task ID: {sample['task_id']}")
17
+ print(f"Origin GAIA File: {sample['file_name']}")
18
+ print(f"Level: {sample['Level']}")
19
+ print(f"Question: {sample['question']}")
20
+ print("="*20)
21
+ except requests.exceptions.RequestException as e:
22
+ print(f"Error fetching questions: {e}")
23
+ except requests.exceptions.JSONDecodeError as e:
24
+ print(f"Error decoding JSON response from questions endpoint: {e}")
25
+ print(f"Response text: {response.text[:500]}")
26
+ except Exception as e:
27
+ print(f"An unexpected error occurred fetching questions: {e}")