CUMANI Paolo commited on
Commit
17e605d
·
1 Parent(s): 81917a3

[CHG] Working agent implementation

Browse files
Files changed (6) hide show
  1. .gitignore +203 -0
  2. agent.py +155 -0
  3. app.py +78 -54
  4. requirements.txt +19 -2
  5. system_prompt.yaml +7 -0
  6. tools.py +158 -0
.gitignore ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Marimo
198
+ marimo/_static/
199
+ marimo/_lsp/
200
+ __marimo__/
201
+
202
+ # Streamlit
203
+ .streamlit/secrets.toml
agent.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mimetypes
2
+ import base64
3
+ import yaml
4
+ from typing import TypedDict, Annotated
5
+ from dotenv import load_dotenv
6
+ from langgraph.checkpoint.memory import InMemorySaver
7
+ from langgraph.graph.message import add_messages
8
+ from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
9
+ from langgraph.prebuilt import ToolNode
10
+ from langgraph.graph import START, StateGraph
11
+ from langgraph.prebuilt import tools_condition
12
+ from langchain_core.messages.utils import (
13
+ trim_messages,
14
+ count_tokens_approximately
15
+ )
16
+
17
+ # Import our custom tools from their modules
18
+ from tools import webpage_reader_tool, python_repl_tool, transcribe_youtube_video_tool, wikipedia_query_tool, web_search_tool, read_excel_csv, arxiv_query_tool
19
+
20
+ load_dotenv()
21
+
22
+ class FinalAgent:
23
+
24
+ def __init__(self, model_type="GOOGLE", system_prompt_path="system_prompt.yaml", use_memory=False):
25
+ """
26
+ Args: model_type "GOOGLE" or "HUGGINGFACE" or "OLLAMA"
27
+ """
28
+ with open(system_prompt_path, 'r') as stream:
29
+ prompt_templates = yaml.safe_load(stream)
30
+
31
+ self.model_type = model_type
32
+
33
+ if model_type == "HUGGINGFACE":
34
+ from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
35
+ # Initialize the Hugging Face model
36
+ # Generate the chat interface, including the tools
37
+ llm = HuggingFaceEndpoint(
38
+ repo_id="Qwen/Qwen2.5-Coder-32B-Instruct"
39
+ )
40
+
41
+ chat = ChatHuggingFace(llm=llm, verbose=True)
42
+ elif model_type == "OLLAMA":
43
+ from langchain_ollama import ChatOllama
44
+ #chat = ChatOllama(model = "qwen2.5:14b-instruct")
45
+ chat = ChatOllama(model = "qwen3:8b")
46
+ #chat = ChatOllama(model = "gpt-oss:20b")
47
+ elif model_type == "GOOGLE":
48
+ from langchain_google_genai import ChatGoogleGenerativeAI
49
+ from langchain_core.rate_limiters import InMemoryRateLimiter
50
+ rate_limiter = InMemoryRateLimiter(
51
+ # Max allowed rate per free API: 10 requests per minute, but we use 6 to avoid hitting the limit on subsquent answers.
52
+ requests_per_second=6/60,
53
+ # Wake up every 100 ms to check whether allowed to make a request,
54
+ check_every_n_seconds=0.1,
55
+ max_bucket_size=10, # Controls the maximum burst size.
56
+ )
57
+ chat = ChatGoogleGenerativeAI(model="gemini-2.5-flash", rate_limiter=rate_limiter)
58
+
59
+
60
+ tools = [webpage_reader_tool,
61
+ transcribe_youtube_video_tool,
62
+ web_search_tool,
63
+ wikipedia_query_tool,
64
+ arxiv_query_tool,
65
+ read_excel_csv,
66
+ python_repl_tool,]
67
+ chat_with_tools = chat.bind_tools(tools)
68
+
69
+ class AgentState(TypedDict):
70
+ messages: Annotated[list[AnyMessage], add_messages]
71
+
72
+ def assistant(state: AgentState):
73
+ messages = trim_messages(
74
+ state["messages"],
75
+ strategy="last",
76
+ token_counter=count_tokens_approximately,
77
+ max_tokens=1e6 if self.model_type == "GOOGLE" else 126000,
78
+ start_on="human",
79
+ end_on=("human", "tool"),
80
+ )
81
+ return {
82
+ "messages": [chat_with_tools.invoke([SystemMessage(content=prompt_templates['system_prompt'])] + messages)],
83
+ }
84
+
85
+ builder = StateGraph(AgentState)
86
+
87
+ builder.add_node("assistant", assistant)
88
+ builder.add_node("tools", ToolNode(tools))
89
+
90
+ builder.add_edge(START, "assistant")
91
+ builder.add_conditional_edges("assistant", tools_condition)
92
+ builder.add_edge("tools", "assistant")
93
+
94
+ if use_memory:
95
+ checkpointer = InMemorySaver()
96
+ self.agent = builder.compile(checkpointer=checkpointer)
97
+ else:
98
+ checkpointer = None
99
+ self.agent = builder.compile()
100
+ print("FinalAgent initialized.")
101
+
102
+ def clear_memory(self, thread_id: str) -> None:
103
+ """ Clear the memory for a given thread_id. """
104
+ memory = self.agent.checkpointer
105
+ if memory is None:
106
+ return
107
+ try:
108
+ # If it's an InMemorySaver (which MemorySaver is an alias for),
109
+ # we can directly clear the storage and writes
110
+ if hasattr(memory, 'storage') and hasattr(memory, 'writes'):
111
+ # Clear all checkpoints for this thread_id (all namespaces)
112
+ memory.storage.pop(thread_id, None)
113
+
114
+ # Clear all writes for this thread_id (for all namespaces)
115
+ keys_to_remove = [key for key in memory.writes.keys() if key[0] == thread_id]
116
+ for key in keys_to_remove:
117
+ memory.writes.pop(key, None)
118
+
119
+ print(f"Memory cleared for thread_id: {thread_id}")
120
+ return
121
+
122
+ except Exception as e:
123
+ print(f"Error clearing InMemorySaver storage for thread_id {thread_id}: {e}")
124
+
125
+ def __call__(self, question: str, attached_file: dict, recursion_limit=9) -> str:
126
+ print(f"Agent received question (first 100 chars): {question[:100]}...")
127
+
128
+ if attached_file['name'] != "" and attached_file['content'] is not None:
129
+ mime_type, _ = mimetypes.guess_type(attached_file['name'])
130
+ if mime_type.startswith("image/") or mime_type.startswith("audio/") or mime_type.startswith("video/"):
131
+ # Image file - convert to base64
132
+ encoded_file = base64.b64encode(attached_file['content']).decode('utf-8')
133
+ #
134
+ if self.model_type == "GOOGLE":
135
+ question = [{"type": "text", "text": question},
136
+ {"type": "image" if mime_type.startswith("image/") else "media",
137
+ "source_type": "base64",
138
+ "data": encoded_file,
139
+ "mime_type": mime_type,},
140
+ ]
141
+ else:
142
+ question = f"{question}\n\nAttached file extension:{attached_file['name'].split('.')[-1]} - Attached file base64 encoded: \n{encoded_file}"
143
+ elif mime_type.startswith("text/"):
144
+ # Text-based file (like .py, .txt, .json)
145
+ question = f"{question}\n\nAttached file extension:{attached_file['name'].split('.')[-1]} - Attached file content: \n{attached_file['content'].decode('utf-8')}"
146
+ else:
147
+ encoded_file = base64.b64encode(attached_file['content']).decode('utf-8')
148
+ print(f"Unsupported file {attached_file['name']} type: {mime_type}. Only images, audio, video, and text files are supported.")
149
+ question = f"{question}\n\nAttached file extension: {attached_file['name'].split('.')[-1]}. File path: {attached_file['path']} - Attached file base64 encoded:\n{encoded_file}"
150
+
151
+ if recursion_limit>0:
152
+ agent_reply = self.agent.invoke({"messages": [HumanMessage(content=question)]}, {"recursion_limit": recursion_limit})
153
+ else:
154
+ agent_reply = self.agent.invoke({"messages": [HumanMessage(content=question)]})
155
+ return str(agent_reply['messages'][-1].content)
app.py CHANGED
@@ -1,23 +1,14 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
6
 
 
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
@@ -40,7 +31,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
@@ -73,18 +64,37 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
73
  results_log = []
74
  answers_payload = []
75
  print(f"Running agent on {len(questions_data)} questions...")
76
- for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
 
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
 
90
  if not answers_payload:
@@ -98,46 +108,60 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
98
 
99
  # 5. Submit
100
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
- try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
- response.raise_for_status()
104
- result_data = response.json()
105
- final_status = (
106
- f"Submission Successful!\n"
107
- f"User: {result_data.get('username')}\n"
108
- f"Overall Score: {result_data.get('score', 'N/A')}% "
109
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
- f"Message: {result_data.get('message', 'No message received.')}"
111
- )
112
- print("Submission successful.")
113
- results_df = pd.DataFrame(results_log)
114
- return final_status, results_df
115
- except requests.exceptions.HTTPError as e:
116
- error_detail = f"Server responded with status {e.response.status_code}."
117
  try:
118
- error_json = e.response.json()
119
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
- except requests.exceptions.JSONDecodeError:
121
- error_detail += f" Response: {e.response.text[:500]}"
122
- status_message = f"Submission Failed: {error_detail}"
123
- print(status_message)
124
- results_df = pd.DataFrame(results_log)
125
- return status_message, results_df
126
- except requests.exceptions.Timeout:
127
- status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
- results_df = pd.DataFrame(results_log)
130
- return status_message, results_df
131
- except requests.exceptions.RequestException as e:
132
- status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
134
- results_df = pd.DataFrame(results_log)
135
- return status_message, results_df
136
- except Exception as e:
137
- status_message = f"An unexpected error occurred during submission: {e}"
138
- print(status_message)
139
- results_df = pd.DataFrame(results_log)
140
- return status_message, results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
 
143
  # --- Build Gradio Interface using Blocks ---
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
 
6
+ from agent import FinalAgent
7
+
8
  # (Keep Constants as is)
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def run_and_submit_all( profile: gr.OAuthProfile | None):
14
  """
 
31
 
32
  # 1. Instantiate Agent ( modify this part to create your agent)
33
  try:
34
+ agent = FinalAgent(model_type="GOOGLE")
35
  except Exception as e:
36
  print(f"Error instantiating agent: {e}")
37
  return f"Error initializing agent: {e}", None
 
64
  results_log = []
65
  answers_payload = []
66
  print(f"Running agent on {len(questions_data)} questions...")
67
+ for number, item in enumerate(questions_data):
68
  task_id = item.get("task_id")
69
  question_text = item.get("question")
70
+ file_name = item.get("file_name")
71
+
72
+ if file_name != '':
73
+ file_url = f"{api_url}/files/{task_id}"
74
+ try:
75
+ response = requests.get(file_url, timeout=15)
76
+ response.raise_for_status()
77
+ content = response.content
78
+ print(f"Fetched file {file_url}.")
79
+ except requests.exceptions.RequestException as e:
80
+ print(f"Error fetching file: {e}")
81
+ return f"Error fetching file: {e}", None
82
+ except Exception as e:
83
+ print(f"An unexpected error occurred fetching file: {e}")
84
+ return f"An unexpected error occurred fetching file: {e}", None
85
+ else:
86
+ file_url = None
87
+ content = None
88
  if not task_id or question_text is None:
89
  print(f"Skipping item with missing task_id or question: {item}")
90
  continue
91
  try:
92
+ submitted_answer = agent(question_text, attached_file={"name": file_name, "path": file_url, "content": content}, recursion_limit=-1)
93
+ print(f"Agent submitted {number} answer: {submitted_answer}\n")
94
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
95
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
96
  except Exception as e:
97
+ print(f"Error running agent on task {task_id}: {e}\n")
98
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
99
 
100
  if not answers_payload:
 
108
 
109
  # 5. Submit
110
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
111
+ n= 0
112
+ while n < 5:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  try:
114
+ response = requests.post(submit_url, json=submission_data, timeout=60)
115
+ response.raise_for_status()
116
+ result_data = response.json()
117
+ final_status = (
118
+ f"Submission Successful!\n"
119
+ f"User: {result_data.get('username')}\n"
120
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
121
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
122
+ f"Message: {result_data.get('message', 'No message received.')}"
123
+ )
124
+ print("Submission successful.")
125
+ results_df = pd.DataFrame(results_log)
126
+ return final_status, results_df
127
+ except requests.exceptions.HTTPError as e:
128
+ n += 1
129
+ if n < 5:
130
+ print(f"Server responded with status {e.response.status_code}. Retrying ({n})...")
131
+ continue
132
+ error_detail = f"Server responded with status {e.response.status_code}."
133
+ try:
134
+ error_json = e.response.json()
135
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
136
+ except requests.exceptions.JSONDecodeError:
137
+ error_detail += f" Response: {e.response.text[:500]}"
138
+ status_message = f"Submission Failed: {error_detail}"
139
+ print(status_message)
140
+ results_df = pd.DataFrame(results_log)
141
+ return status_message, results_df
142
+ except requests.exceptions.Timeout:
143
+ n += 1
144
+ if n < 5:
145
+ print(f"Submission Failed: The request timed out. Retrying ({n})...")
146
+ continue
147
+ status_message = "Submission Failed: The request timed out."
148
+ print(status_message)
149
+ results_df = pd.DataFrame(results_log)
150
+ return status_message, results_df
151
+ except requests.exceptions.RequestException as e:
152
+ n += 1
153
+ if n < 5:
154
+ print(f"Submission Failed: Network error - {e}. Retrying ({n})...")
155
+ continue
156
+ status_message = f"Submission Failed: Network error - {e}"
157
+ print(status_message)
158
+ results_df = pd.DataFrame(results_log)
159
+ return status_message, results_df
160
+ except Exception as e:
161
+ status_message = f"An unexpected error occurred during submission: {e}"
162
+ print(status_message)
163
+ results_df = pd.DataFrame(results_log)
164
+ return status_message, results_df
165
 
166
 
167
  # --- Build Gradio Interface using Blocks ---
requirements.txt CHANGED
@@ -1,2 +1,19 @@
1
- gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ arxiv
2
+ datasets
3
+ ddgs
4
+ duckduckgo-search
5
+ gradio[oauth]
6
+ langchain-community
7
+ langchain_experimental
8
+ langchain_google_genai
9
+ langchain_huggingface
10
+ langchain-tavily
11
+ langchain_ollama
12
+ langgraph
13
+ python-dotenv
14
+ openpyxl
15
+ rank_bm25
16
+ requests
17
+ smolagents
18
+ wikipedia
19
+ youtube-transcript-api
system_prompt.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "system_prompt": |-
2
+ You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
3
+ [YOUR FINAL ANSWER].
4
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
5
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
6
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
7
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
tools.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ import pandas as pd
4
+ from youtube_transcript_api import YouTubeTranscriptApi
5
+ from langchain_core.tools import tool
6
+ from langchain_experimental.utilities import PythonREPL
7
+ from langchain_community.document_loaders import WebBaseLoader
8
+ from langchain_community.utilities import WikipediaAPIWrapper
9
+ from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun, ArxivQueryRun
10
+ from langchain_tavily.tavily_search import TavilySearch
11
+
12
+ @tool
13
+ def python_repl_tool(command: str) -> str:
14
+ """A tool to execute Python commands. If you want to see the output of a value, you should print it out with `print(...)`.
15
+ Args:
16
+ command (str): A valid Python command to execute.
17
+ Returns:
18
+ str: The output of the command."""
19
+ print('Python shell tool called')
20
+ result = PythonREPL.run(command)
21
+ return str(result)
22
+
23
+ @tool
24
+ def read_excel_csv(input_str: str, file_type: str = 'csv') -> str:
25
+ """
26
+ Extracts information from a base64-encoded file or a path to a csv or excel file.
27
+
28
+ Args:
29
+ input_str (str): String containing a base64-encoded file or its path.
30
+ file_type (str): Type of the file encoded in base64 ('csv' or 'excel').
31
+
32
+ Returns:
33
+ str: Content of input file.
34
+ """
35
+ print(f'Read excel/csv tool called {file_type} ({input_str[:20]})')
36
+ try:
37
+ # Decode the base64 string
38
+ byte_path = io.BytesIO(base64.b64decode(input_str))
39
+ except Exception as e:
40
+ # Assume it's a file path if decoding fails
41
+ byte_path = input_str
42
+
43
+ # Load into a DataFrame based on file type
44
+ if file_type == 'csv':
45
+ df = pd.read_csv(byte_path)
46
+ elif file_type in ['xlsx', 'excel']:
47
+ df = pd.read_excel(byte_path)
48
+ else:
49
+ raise ValueError("Unsupported file_type. Use 'csv' or 'excel'.")
50
+
51
+ result = f"{file_type.upper()} file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
52
+ result += f"Columns: {', '.join(df.columns)}\n\n"
53
+
54
+ # Add summary statistics
55
+ result += "Summary statistics:\n"
56
+ result += str(df.describe())
57
+ #print(result)
58
+ return result
59
+
60
+ @tool
61
+ def wikipedia_query_tool(query: str) -> str:
62
+ """A tool to query Wikipedia. It returns a summary of the page, not the full content. To get the full content, you can use another tool.
63
+ Args:
64
+ query (str): A search query for Wikipedia.
65
+ Returns:
66
+ str: A summary of the related Wikipedia page."""
67
+ print('Wikipedia query tool called:', query)
68
+ wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2))
69
+ result = wiki.run(query)
70
+ print(f"Wikipedia query {query} result (limited to 10 chars): {result[:10]}")
71
+ return result.strip()
72
+
73
+ @tool
74
+ def arxiv_query_tool(query: str) -> str:
75
+ """A tool to query arXiv.org
76
+ Useful for when you need to answer physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics
77
+ questions from scientific articles on arxiv.
78
+ Args:
79
+ query (str): A search query for ArXiv.
80
+ Returns:
81
+ str: The text content of the ArXiv page.
82
+ """
83
+ print('ArXiv query tool called', query)
84
+ arxiv = ArxivQueryRun()
85
+ result = arxiv.run(query)
86
+ print(f"ArXiv query {query} result (limited to 50 chars): {result[:50]}")
87
+ return result.strip()
88
+
89
+ @tool
90
+ def webpage_reader_tool(page_url: str) -> str:
91
+ """A tool to read the full content of a webpage.
92
+ Args:
93
+ page_url (str): A valid URL of the webpage to read.
94
+ Returns:
95
+ str: The text content of the webpage.
96
+ """
97
+ print('Web page reader tool called', page_url)
98
+ loader = WebBaseLoader(web_paths=[page_url])
99
+ docs = []
100
+ for doc in loader.lazy_load():
101
+ docs.append(doc)
102
+
103
+ assert len(docs) == 1
104
+ doc = docs[0]
105
+
106
+ return f'<Document source="{page_url}" title="{doc.get("title", "")}"/>\n{doc.page_content.strip()}\n</Document>'
107
+
108
+ @tool
109
+ def web_search_tool(query: str) -> str:
110
+ """Search internet for a query and return maximum 3 results.
111
+ Args:
112
+ query: The search query.
113
+ Returns:
114
+ str: The formatted search results.
115
+ """
116
+
117
+ print('Web search tool called', query)
118
+
119
+ try:
120
+ search_docs = TavilySearch(max_results=3).invoke(query)
121
+ formatted_search_docs = "\n\n---\n\n".join(
122
+ [
123
+ f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
124
+ for doc in search_docs['results']
125
+ ]
126
+ )
127
+ except Exception as e:
128
+ print(f'\tError {e}, passing to DuckDuckgo')
129
+ search_docs = DuckDuckGoSearchRun().invoke(query)
130
+ formatted_search_docs = "\n\n---\n\n".join(
131
+ [
132
+ f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
133
+ for doc in search_docs['results']
134
+ ]
135
+ )
136
+ return formatted_search_docs
137
+
138
+ @tool
139
+ def transcribe_youtube_video_tool(video_id: str) -> str:
140
+ """A tool to transcribe the audio of a YouTube video.
141
+ Args:
142
+ video_id (str): A valid YouTube video ID or URL.
143
+ Returns:
144
+ str: The transcribed text of the video.
145
+ """
146
+ print(f"Transcribing YouTube video with ID: {video_id}")
147
+ if 'youtube' in video_id or 'watch' in video_id:
148
+ # Extract video ID from URL
149
+ video_id = video_id.split('v=')[-1].split('&')[0]
150
+
151
+ transcript_api = YouTubeTranscriptApi()
152
+ try:
153
+ transcript = transcript_api.fetch(video_id)
154
+ transcript_text = ' '.join([entry.text for entry in transcript])
155
+ print(f"\t {transcript_text}")
156
+ return transcript_text.strip()
157
+ except transcript_api._errors.TranscriptsDisabled as e:
158
+ return f"Transcription is disabled for this video: {e}"