GAIA_Agent_Rendel

Sleeping

App Files Files Community

Markus Schramm commited on Jun 4, 2025

Commit

4565986

1 Parent(s): 81917a3

Add updated project files

Browse files

Files changed (5) hide show

agents.py +160 -0
app.py +42 -10
requirements.txt +15 -2
tests.ipynb +0 -0
tools.py +398 -0

agents.py ADDED Viewed

	@@ -0,0 +1,160 @@

+# agents.py
+# agents.py
+import shutil
+import os
+import json
+from smolagents import CodeAgent, LiteLLMModel, DuckDuckGoSearchTool, HfApiModel,VisitWebpageTool, FinalAnswerTool
+# import datetime
+from smolagents.tools import Tool
+from typing import Any
+from tools import get_text_from_ascii_file, get_wikipedia_markdown, transcribe_mp3, describe_image_file, read_xls_File, get_youtube_video_transcript
+# class FinalAnswerTool(Tool):
+#     name = "final_answer"
+#     description = "Provides a final answer to the given problem."
+#     inputs = {"answer": {"type": "any", "description": "The final answer to the problem"}}
+#     output_type = "any"
+#     def forward(self, answer: Any) -> Any:
+#         if "FINAL ANSWER:" in answer:
+#             return answer.split("FINAL ANSWER:")[-1].strip()
+#         else:
+#             return answer.strip()
+# helper function based on recommendations:
+#    "For the sake of this course, make sure you don’t include the text “FINAL ANSWER” in your submission,
+#    just make your agent reply with the answer and nothing else."
+def extract_final_answer(answer: str) -> str:
+    # answer = answer.strip()
+    # Check if the answer contains "FINAL ANSWER:" and elemint it
+    if "</think>" in answer:
+        answer = answer.split("</think>")[-1].strip()
+    while "final answer:" in answer.lower():
+        answer = answer.replace("FINAL ANSWER:", "").replace("Final Answer:", "").replace("Final answer:", "").replace("final answer:", "")
+    return answer.strip()
+# Define GAIAAgent
+class GAIAAgent:
+    def __init__(self, use_model: str = "ollamaSRV") -> None:
+        # Initialize LiteLLMModel with Gemini Flash
+        # check for ollama
+        if use_model == "ollamaSRV":
+            print("Using LiteLLMModel with Ollama reverse proxy server.")
+            self.model = LiteLLMModel(
+                # model_id='ollama/devstral:24b',
+                # model_id="ollama/cogito:14b",
+                model_id='ollama/qwen3:32b',
+                # model_id='ollama/gemma3:27b',
+                # model_id='ollama/qwen2.5-coder:32b-instruct-q4_K_M',
+                api_base="https://192.168.5.217:8000",  # replace with remote open-ai compatible server if necessary
+                api_key=os.getenv("OLLAMA_REVPROXY_SRVML"),
+                num_ctx=16384,  # ollama default is 2048 which will often fail horribly. 8192 works for easy tasks, more is better. Check https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator to calculate how much VRAM this will need for the selected model
+                ssl_verify=False,  # Explicitly disable SSL verification
+                extra_headers={
+                    "Authorization": f"Bearer {os.getenv('OLLAMA_REVPROXY_SRVML')}",  # Explicitly set auth header
+                },
+                flatten_messages_as_text = False,
+                timeout=900  # seconds, default is 600 seconds, set to 15 minutes to allow for longer tasks
+            )
+        elif (use_model == "ollama") and shutil.which("ollama"):
+            print("Using LiteLLMModel with local Ollama CLI.")
+            self.model = LiteLLMModel(
+                    model_id="ollama/devstral:24b",
+                    # model_id="ollama/cogito:14b",
+                    max_tokens=16384
+                )
+        elif (use_model == "gemini") and "GEMINI_API_KEY" in os.environ:
+            print("Using Gemini Flash model with API key.")
+            self.model = LiteLLMModel(
+                model_id="gemini/gemini-2.5-flash-preview-05-20",
+                api_key=os.getenv("GEMINI_API_KEY"),
+            )
+        else:
+            print("Using HfApiModel with Qwen2.5-Coder-32B-Instruct.")
+            self.model = HfApiModel(
+                    max_tokens=2096,
+                    temperature=0.5,
+                    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
+                    custom_role_conversions=None
+                    )
+        # Define the tools
+        self.tools = [
+            get_wikipedia_markdown, get_text_from_ascii_file, transcribe_mp3,
+            describe_image_file, get_youtube_video_transcript, read_xls_File,
+            DuckDuckGoSearchTool(),   # Web search (main retrieval)
+            VisitWebpageTool(),       # Optional: visit page if needed (sometimes helps)
+            FinalAnswerTool(),        # Needed for FINAL ANSWER output
+        ]
+        prompt_templates = {'system_prompt': 'You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.\nTo do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.\nTo solve the task, you must plan forward to proceed in a series of steps, in a cycle of \'Thought:\', \'Code:\', and \'Observation:\' sequences.\n\nAt each step, in the \'Thought:\' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.\nThen in the \'Code:\' sequence, you should write the code in simple Python. The code sequence must end with \'<end_code>\' sequence.\nDuring each intermediate step, you can use \'print()\' to save whatever important information you will then need.\nThese print outputs will then appear in the \'Observation:\' field, which will be available as input for the next step.\nIn the end you have to return a final answer using the `final_answer` tool with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don\'t use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don\'t use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. \n\nHere are a few examples using notional tools:\n---\nTask: "What is the result of the following operation: 5 + 3 + 1294.678?"\n\nThought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool\nCode:\n```py\nresult = 5 + 3 + 1294.678\nfinal_answer(print(f"FINAL ANSWER: {result}"))\n```<end_code>\n\n---\nTask:\n"Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French.\nYou have been provided with these additional arguments, that you can access using the keys as variables in your python code:\n{\'question\': \'Quel est l\'animal sur l\'image?\', \'image\': \'path/to/image.jpg\'}"\n\nThought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image.\nCode:\n```py\ntranslated_question = translator(question=question, src_lang="French", tgt_lang="English")\nprint(f"The translated question is {translated_question}.")\nanswer = image_qa(image=image, question=translated_question)\nfinal_answer(f"FINAL ANSWER {answer}")\n```<end_code>\n\n---\nTask:\nIn a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer.\nWhat does he say was the consequence of Einstein learning too much math on his creativity, in one word?\n\nThought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.\nCode:\n```py\npages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein")\nprint(pages)\n```<end_code>\nObservation:\nNo result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein".\n\nThought: The query was maybe too restrictive and did not find any results. Let\'s try again with a broader query.\nCode:\n```py\npages = search(query="1979 interview Stanislaus Ulam")\nprint(pages)\n```<end_code>\nObservation:\nFound 6 pages:\n[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)\n\n[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/)\n\n(truncated)\n\nThought: I will read the first 2 pages to know more.\nCode:\n```py\nfor url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]:\n    whole_page = visit_webpage(url)\n    print(whole_page)\n    print("\\n" + "="*80 + "\\n")  # Print separator between pages\n```<end_code>\nObservation:\nManhattan Project Locations:\nLos Alamos, NM\nStanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at\n(truncated)\n\nThought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let\'s answer in one word.\nCode:\n```py\nfinal_answer("FINAL ANSWER diminished")\n```<end_code>\n\n---\nTask: "Which city has the highest population: Guangzhou or Shanghai?"\n\nThought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.\nCode:\n```py\nfor city in ["Guangzhou", "Shanghai"]:\n    print(f"Population {city}:", search(f"{city} population")\n```<end_code>\nObservation:\nPopulation Guangzhou: [\'Guangzhou has a population of 15 million inhabitants as of 2021.\']\nPopulation Shanghai: \'26 million (2019)\'\n\nThought: Now I know that Shanghai has the highest population.\nCode:\n```py\nfinal_answer("FINAL ANSWER Shanghai")\n```<end_code>\n\n---\nTask: "What is the current age of the pope, raised to the power 0.36?"\n\nThought: I will use the tool `wiki` to get the age of the pope, and confirm that with a web search.\nCode:\n```py\npope_age_wiki = wiki(query="current pope age")\nprint("Pope age as per wikipedia:", pope_age_wiki)\npope_age_search = web_search(query="current pope age")\nprint("Pope age as per google search:", pope_age_search)\n```<end_code>\nObservation:\nPope age: "The pope Francis is currently 88 years old."\n\nThought: I know that the pope is 88 years old. Let\'s compute the result using python code.\nCode:\n```py\npope_current_age = 88 ** 0.36\nfinal_answer(f"FINAL ANSWER {pope_current_age}")\n```<end_code>\n\nAbove example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools, behaving like regular python functions:\n```python\n{%- for tool in tools.values() %}\ndef {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:\n    """{{ tool.description }}\n\n    Args:\n    {%- for arg_name, arg_info in tool.inputs.items() %}\n        {{ arg_name }}: {{ arg_info.description }}\n    {%- endfor %}\n    """\n{% endfor %}\n```\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is \'task\'.\nGiven that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.\nHere is a list of the team members that you can call:\n```python\n{%- for agent in managed_agents.values() %}\ndef {{ agent.name }}("Your query goes here.") -> str:\n    """{{ agent.description }}"""\n{% endfor %}\n```\n{%- endif %}\n\nHere are the rules you should always follow to solve your task:\n1. Always provide a \'Thought:\' sequence, and a \'Code:\\n```py\' sequence ending with \'```<end_code>\' sequence, else you will fail.\n2. Use only variables that you have defined!\n3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in \'answer = wiki({\'query\': "What is the place where James Bond lives?"})\', but use the arguments directly as in \'answer = wiki(query="What is the place where James Bond lives?")\'.\n4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.\n5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters.\n6. Don\'t name any new variable with the same name as a tool: for instance don\'t name a variable \'final_answer\'.\n7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.\n8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}\n9. The state persists between code executions: so if in one step you\'ve created variables or imported modules, these will all persist.\n10. Don\'t give up! You\'re in charge of solving the task, not providing directions to solve it.\n\nNow Begin!',
+                        'planning': {'initial_plan': 'You are a world expert at analyzing a situation to derive facts, and plan accordingly towards solving a task.\nBelow I will present you a task. You will need to 1. build a survey of facts known or needed to solve the task, then 2. make a plan of action to solve the task.\n\n## 1. Facts survey\nYou will build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.\nThese "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:\n### 1.1. Facts given in the task\nList here the specific facts given in the task that could help you (there might be nothing here).\n\n### 1.2. Facts to look up\nList here any facts that we may need to look up.\nAlso list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here.\n\n### 1.3. Facts to derive\nList here anything that we want to derive from the above by logical reasoning, for instance computation or simulation.\n\nDon\'t make any assumptions. For each item, provide a thorough reasoning. Do not add anything else on top of three headings above.\n\n## 2. Plan\nThen for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.\nThis plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.\nDo not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.\nAfter writing the final step of the plan, write the \'\\n<end_plan>\' tag and stop there.\n\nYou can leverage these tools, behaving like regular python functions:\n```python\n{%- for tool in tools.values() %}\ndef {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:\n    """{{ tool.description }}\n\n    Args:\n    {%- for arg_name, arg_info in tool.inputs.items() %}\n        {{ arg_name }}: {{ arg_info.description }}\n    {%- endfor %}\n    """\n{% endfor %}\n```\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is \'task\'.\nGiven that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.\nHere is a list of the team members that you can call:\n```python\n{%- for agent in managed_agents.values() %}\ndef {{ agent.name }}("Your query goes here.") -> str:\n    """{{ agent.description }}"""\n{% endfor %}\n```\n{%- endif %}\n\n---\nNow begin! Here is your task:\n```\n{{task}}\n```\nFirst in part 1, write the facts survey, then in part 2, write your plan.',
+                        'update_plan_pre_messages': 'You are a world expert at analyzing a situation, and plan accordingly towards solving a task.\nYou have been given the following task:\n```\n{{task}}\n```\n\nBelow you will find a history of attempts made to solve this task.\nYou will first have to produce a survey of known and unknown facts, then propose a step-by-step high-level plan to solve the task.\nIf the previous tries so far have met some success, your updated plan can build on these results.\nIf you are stalled, you can make a completely new plan starting from scratch.\n\nFind the task and history below:',
+                        'update_plan_post_messages': 'Now write your updated facts below, taking into account the above history:\n## 1. Updated facts survey\n### 1.1. Facts given in the task\n### 1.2. Facts that we have learned\n### 1.3. Facts still to look up\n### 1.4. Facts still to derive\n\nThen write a step-by-step high-level plan to solve the task above.\n## 2. Plan\n### 2. 1. ...\nEtc.\nThis plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.\nBeware that you have {remaining_steps} steps remaining.\nDo not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.\nAfter writing the final step of the plan, write the \'\\n<end_plan>\' tag and stop there.\n\nYou can leverage these tools, behaving like regular python functions:\n```python\n{%- for tool in tools.values() %}\ndef {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:\n    """{{ tool.description }}\n\n    Args:\n    {%- for arg_name, arg_info in tool.inputs.items() %}\n        {{ arg_name }}: {{ arg_info.description }}\n    {%- endfor %}"""\n{% endfor %}\n```\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is \'task\'.\nGiven that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.\nHere is a list of the team members that you can call:\n```python\n{%- for agent in managed_agents.values() %}\ndef {{ agent.name }}("Your query goes here.") -> str:\n    """{{ agent.description }}"""\n{% endfor %}\n```\n{%- endif %}\n\nNow write your updated facts survey below, then your new plan.'},
+                        'managed_agent': {'task':(
+                        "You are a highly capable and autonomous agent named {{name}}, designed to solve complex tasks efficiently.\n"
+                        "A valued client has assigned you the following task:\n"
+                        "---\n"
+                        "Task:\n"
+                        "{{task}}\n"
+                        "---\n"
+                        "To complete this task successfully, follow these steps carefully:\n"
+                        "    1. Comprehend the task and identify the intended goal.\n"
+                        "    2. Break the task into clear, logical steps.\n"
+                        "    3. Select and prepare the tools or resources you need.\n"
+                        "    4. Set up the required environment or context.\n"
+                        "    5. Execute each step methodically.\n"
+                        "    6. Monitor outcomes and identify any deviations.\n"
+                        "    7. Revise your plan if necessary based on feedback.\n"
+                        "    8. Maintain internal state and track progress.\n"
+                        "    9. Verify that the goal has been fully achieved.\n"
+                        "   10. Present the final result clearly and concisely.\n"
+                        "If you succeed, you will be rewarded with a significant bonus.\n\n"
+                        "Your final_answer MUST be:\n"
+                        "- a number (retain its original type; do not include units),\n"
+                        "- a concise phrase,\n"
+                        "- or a comma-separated list of numbers or strings (no articles, no abbreviations).\n\n"
+                        "Only the content passed to the final_answer tool will be preserved. Any other content will be discarded."),
+                        'report': "{{final_answer}}"},
+                        'final_answer': {
+                            'pre_messages': "",
+                            'post_messages': ""
+                        }}
+        # Create the CodeAgent
+        self.agent = CodeAgent(
+            tools=self.tools,
+            model=self.model,
+            prompt_templates = prompt_templates,
+            max_steps=10,                 # should be enough, first guess --> to check
+            planning_interval=3,          # should be enough, first guess --> to check
+            verbosity_level=2,      # 0: no output, 1: only errors, 2: all outputs
+            additional_authorized_imports=["datetime", "numpy", "requests", "json", "re",
+                                           "bs4", "pandas", "lxml", "pymupdf", "openpyxl",
+                                           "scipy", "PIL", "cv2"],
+            name="RendelsGAIAAgent"
+        )
+        print("✅ GAIAAgent initialized with tools:", [t.name for t in self.tools])
+    def __call__(self, question: str) -> str:
+        print(f"\n[GAIAAgent] Running agent on question:\n{question}\n")
+        try:
+            result = self.agent.run(question)
+            final_answer = extract_final_answer(result)
+            print(f"\n[GAIAAgent] FINAL ANSWER extracted: {final_answer}\n")
+            return final_answer
+        except Exception as e:
+            print(f"[GAIAAgent] ERROR: {e}")
+            return f"ERROR: {e}"

app.py CHANGED Viewed

@@ -1,23 +1,25 @@
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -40,7 +42,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -70,12 +73,41 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         return f"An unexpected error occurred fetching questions: {e}", None
     # 3. Run your Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue

 import os
 import gradio as gr
 import requests
+# import inspect
 import pandas as pd
+from agents import GAIAAgent
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+# class BasicAgent:
+#     def __init__(self):
+#         print("BasicAgent initialized.")
+#     def __call__(self, question: str) -> str:
+#         print(f"Agent received question (first 50 chars): {question[:50]}...")
+#         fixed_answer = "This is a default answer."
+#         print(f"Agent returning fixed answer: {fixed_answer}")
+#         return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        # agent = BasicAgent()
+        agent = GAIAAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
         return f"An unexpected error occurred fetching questions: {e}", None
     # 3. Run your Agent
+    os.makedirs("downloaded_files", exist_ok=True)
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        # Check if task_id and question_text are present
+        filename = item.get("file_name")
+        if filename and not os.path.exists("downloaded_files/"+filename):
+            file_url = f"{api_url}/files/{task_id}"
+            print(f"Attempting to download file from: {file_url}")
+            try:
+                response = requests.get(file_url, timeout=30) # Increased timeout
+                response.raise_for_status()  # Raise an exception for HTTP errors (4xx or 5xx)
+                with open("downloaded_files/"+filename, "wb") as f:
+                    f.write(response.content)
+            except requests.exceptions.HTTPError as http_err:
+                print(f"HTTP error occurred: {http_err}")
+                print(f"Response content (first 500 chars): {response.text[:500]}")
+            except requests.exceptions.ConnectionError as conn_err:
+                print(f"Connection error occurred: {conn_err}")
+            except requests.exceptions.Timeout as timeout_err:
+                print(f"Timeout error occurred: {timeout_err}")
+            except requests.exceptions.RequestException as req_err:
+                print(f"An unexpected error occurred during the request: {req_err}")
+            except Exception as e:
+                print(f"An unexpected error occurred: {e}")
+        if filename:
+            question_text += f" (file: downloaded_files/{filename})"
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue

requirements.txt CHANGED Viewed

@@ -1,2 +1,15 @@
-gradio
-requests

+pandas
+gradio[oauth]
+requests
+smolagents==1.16.1
+huggingface_hub
+litellm
+python-dateutil
+markdownify
+duckduckgo_search
+beautifulsoup4>=4.12.2
+lxml>=4.9.3
+PyMuPDF
+openai-whisper
+ffmpeg-python
+youtube-transcript-api

tests.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

tools.py ADDED Viewed

	@@ -0,0 +1,398 @@

+import os
+import pandas as pd
+# import wikipediaapi
+from markdownify import markdownify as md
+from smolagents import tool, LiteLLMModel
+import whisper
+from youtube_transcript_api import YouTubeTranscriptApi
+from youtube_transcript_api.formatters import JSONFormatter
+import base64
+import mimetypes
+import requests # Keep for consistency, though not used for fetching image in this version
+import os # Added for os.path.join
+import re
+from bs4 import BeautifulSoup, Tag, Comment
+# that could be better done via a managed agent, but this is a quick hack to get it working
+@tool
+def describe_image_file(local_image_path: str) -> str:
+    """
+    Describe the contents of a local image file in detail and return the description as text.
+    Args:
+        local_image_path (str): The path to the local image file to be described.
+    Returns:
+        str: A detailed description of the image contents.
+    """
+    model = LiteLLMModel(
+        model_id='ollama/gemma3:27b',
+        api_base="https://192.168.5.217:8000",  # replace with remote open-ai compatible server if necessary
+        api_key=os.getenv("OLLAMA_REVPROXY_SRVML"),
+        num_ctx=16384,  # ollama default is 2048 which will often fail horribly. 8192 works for easy tasks, more is better. Check https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator to calculate how much VRAM this will need for the selected model
+        ssl_verify=False,  # Explicitly disable SSL verification
+        extra_headers={
+            "Authorization": f"Bearer {os.getenv('OLLAMA_REVPROXY_SRVML')}",  # Explicitly set auth header
+        },
+        flatten_messages_as_text = False
+    )
+    text_prompt = "What is in this image? Describe it in detail."
+    try:
+        if not os.path.exists(local_image_path):
+            raise FileNotFoundError(f"Image file not found at {local_image_path}. Please ensure it was downloaded correctly.")
+        # 1. Read the image content from the local file
+        with open(local_image_path, "rb") as image_file:
+            image_content_bytes = image_file.read()
+        # 2. Base64 encode the image content
+        base64_image_bytes = base64.b64encode(image_content_bytes)
+        base64_image_string = base64_image_bytes.decode('utf-8')
+        # 3. Set MIME type based on file extension
+        if local_image_path.lower().endswith('.png'):
+            content_type = 'image/png'
+        elif local_image_path.lower().endswith('.jpg') or local_image_path.lower().endswith('.jpeg'):
+            content_type = 'image/jpeg'
+        elif local_image_path.lower().endswith('.gif'):
+            content_type = 'image/gif'
+        elif local_image_path.lower().endswith('.bmp'):
+            content_type = 'image/bmp'
+        elif local_image_path.lower().endswith('.webp'):
+            content_type = 'image/webp'
+        else:
+            content_type = mimetypes.guess_type(local_image_path)[0] or 'application/octet-stream'
+        print(f"Using specified MIME type: {content_type}")
+        # 4. Construct the data URI
+        data_uri = f"data:{content_type};base64,{base64_image_string}"
+        # Construct the messages payload
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": text_prompt},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": data_uri  # Use the base64 data URI here
+                        }
+                    }
+                ]
+            }
+        ]
+        # Assuming 'model' is your LiteLLMModel instance initialized in a previous cell (e.g., cell 'dfc845ab')
+        if 'model' not in locals():
+            raise NameError("Variable 'model' is not defined. Please run the cell that initializes the LiteLLMModel.")
+        response = model.generate(messages)
+        return response
+    except FileNotFoundError as fnf_err:
+        print(f"File error: {fnf_err}")
+    except NameError as ne:
+        print(f"A required variable might not be defined (e.g., filename, model): {ne}")
+        print("Please ensure the cells defining these variables have been run.")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+@tool
+def get_youtube_video_transcript(video_id: str) -> str:
+    """
+    Fetches the transcript of a YouTube video by its ID and returns it in JSON format.
+    The video ID can be found in the YouTube video URL:
+    https://www.youtube.com/watch?v=VIDEO_ID, where VIDEO_ID is the part after "v=".
+    example: for the url https://www.youtube.com/watch?v=L1vXCYZAYYM the video_id is "L1vXCYZAYYM".
+    Args:
+        video_id (str): The YouTube video ID.
+    Returns:
+        str: The transcript in JSON format.
+    """
+    ytt_api = YouTubeTranscriptApi()
+    transcript = ytt_api.fetch(video_id)
+    formatter = JSONFormatter()
+    # .format_transcript(transcript) turns the transcript into a JSON string.
+    json_formatted = formatter.format_transcript(transcript)
+    return json_formatted
+@tool
+def transcribe_mp3(mp3_path: str, model_size: str = "base") -> str:
+    """
+    Transcribe an MP3 file to text using Whisper.
+    Args:
+        mp3_path (str): Path to the MP3 file.
+        model_size (str): Whisper model size (tiny, base, small, medium, large).
+    Returns:
+        str: Transcribed text.
+    """
+    transcription_path = mp3_path.replace(".mp3", "_transcript.txt")
+    # Check if transcription already exists
+    if os.path.exists(transcription_path):
+        with open(transcription_path, 'r', encoding='utf-8') as f:
+            return f.read()
+    # Load model
+    model = whisper.load_model(model_size)
+    # Transcribe
+    result = model.transcribe(mp3_path)
+    transcription = result["text"]
+    # Save transcription to file
+    with open(transcription_path, 'w', encoding='utf-8') as f:
+        f.write(transcription)
+    # Return the text
+    return transcription
+@tool
+def get_text_from_ascii_file(filepath: str) -> str:
+    """
+    Reads the content of an ASCII text file and returns it as a string.
+    Args:
+        filepath (str): The path to the ASCII text file.
+    Returns:
+        str: The content of the file as a string.
+    """
+    if not os.path.exists(filepath):
+        raise FileNotFoundError(f"The file at {filepath} does not exist.")
+    with open(filepath, "r") as f:
+        return f.read()
+# @tool
+# def get_wikipedia_page_content(page_title: str, lang: str='en') -> str:
+#     """
+#     This function uses the `wikipediaapi` library to retrieve the content of a specified Wikipedia page in a given language.
+#     For example: for the url 'https://en.wikipedia.org/wiki/Python_(programming_language)' the page_title would be 'Python_(programming_language)' and the lang would be 'en'.
+#     It returns the content of the page as a Markdown-formatted string.
+#     Args:
+#         page_title (str): The title of the Wikipedia page to fetch.
+#         lang (str): The language of the Wikipedia page (default is 'en' for English).
+#     Returns:
+#         str: The content of the Wikipedia page.
+#     """
+#     MY_EMAIL = os.getenv("MY_EMAIL", None)
+#     if MY_EMAIL is None:
+#         raise ValueError("MY_EMAIL environment variable is not set. Please set it to your email address.")
+#     wiki_wiki = wikipediaapi.Wikipedia(user_agent=f'Wiki Agent ({MY_EMAIL})', language=lang)
+#     page = wiki_wiki.page(page_title)
+#     if not page.exists():
+#         raise ValueError(f"The Wikipedia page '{page_title}' does not exist.")
+#     return md(page.text)
+@tool
+def get_wikipedia_markdown(
+    title: str,
+    lang: str = 'en',
+    ignore_references: bool = True,
+    ignore_links: bool = True
+) -> str:
+    """
+    Fetches the main content of a Wikipedia page and returns it as Markdown,
+    excluding infoboxes, navigation templates, images, and—if requested—the
+    References, Further reading, and External links sections. It's recommended
+    to start with ignore_references=True and ignore_links=True
+    to reduce the amount of output to the pure infomation.
+    Args:
+        title (str): Wikipedia page title (e.g., "Mercedes_Sosa").
+        lang (str): Language code (default 'en').
+        ignore_references (bool): If True, drop "References", "Further reading",
+                                  and "External links" sections entirely.
+        ignore_links (bool): If True, strip out all <a> tags entirely.
+    Returns:
+        str: Markdown-formatted content of the main article body.
+    """
+    # 1. Fetch raw HTML
+    url = f"https://{lang}.wikipedia.org/wiki/{title}"
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+    except requests.exceptions.HTTPError as e:
+        # use wikipedia's API to check if the page exists
+        api_url = f"https://{lang}.wikipedia.org/w/api.php"
+        search_params = {
+            'list': 'search',
+            'srprop': '',
+            'srlimit': 10,
+            'limit': 10,
+            'srsearch': title.replace("_", " "),
+            'srinfo': 'suggestion',
+            'format': 'json',
+            'action': 'query'
+        }
+        headers = {
+            'User-Agent': "mozilla /5.0 (Windows NT 10.0; Win64; x64)"
+        }
+        r = requests.get(api_url, params=search_params, headers=headers)
+        raw_results = r.json()
+        search_results = [d['title'].replace(" ", "_") for d in raw_results['query']['search']]
+        if ('searchinfo' in raw_results['query']) and ('suggestion' in raw_results['query']['searchinfo']):
+            search_results.insert(0, raw_results['query']['searchinfo']['suggestion'].replace(" ", "_"))
+        errorMsg = f"Could not fetch page '{title}' for language '{lang}' (HTTP {response.status_code})."
+        if search_results:
+            errorMsg += f" Did you mean one of these pages? {', '.join(search_results)}"
+        raise ValueError(errorMsg) from e
+    html = response.text
+    # 2. Parse with BeautifulSoup and isolate the article’s main <div>
+    soup = BeautifulSoup(html, "lxml")
+    content_div = soup.find("div", class_="mw-parser-output")                          #
+    if content_div is None:
+        raise ValueError(f"Could not find main content for page '{title}'")
+    # 2a. Remove all “[edit]” links (<span class="mw-editsection">…)
+    for edit_span in content_div.find_all("span", class_="mw-editsection"):
+        edit_span.decompose()                                                           #
+    # 2b. Remove any superscript footnote markers (<sup class="reference">…)
+    for sup in content_div.find_all("sup", class_="reference"):
+        sup.decompose()                                                                 #
+    # 2c. Remove any parser‐debug comments (e.g., “NewPP limit report…”, “Transclusion expansion time report…”)
+    for comment in content_div.find_all(string=lambda text: isinstance(text, Comment)):
+        comment_text = str(comment)
+        # If the comment contains debug keywords, extract it
+        if (
+            "NewPP limit report" in comment_text
+            or "Transclusion expansion time report" in comment_text
+            or "Saved in parser cache" in comment_text
+        ):
+            comment.extract()  #
+    # 3. Remove unwanted “boilerplate” elements:
+    #    a) Infoboxes (sidebars)
+    for infobox in content_div.find_all("table", class_=re.compile(r"infobox")):
+        infobox.decompose()                                                             #
+    #    b) Table of Contents
+    toc = content_div.find("div", id="toc")
+    if toc:
+        toc.decompose()                                                                 #
+    #    c) Navigation templates (navbox/vertical-navbox/metadata)
+    for nav in content_div.find_all(
+        ["div", "table"],
+        class_=re.compile(r"navbox|vertical-navbox|metadata")
+    ):
+        nav.decompose()                                                                 #
+    #    d) Thumbnails / image wrappers
+    for thumb in content_div.find_all("div", class_=re.compile(r"thumb")):
+        thumb.decompose()                                                               #
+    #    e) Raw <img> tags
+    for img in content_div.find_all("img"):
+        img.decompose()                                                                 #
+    # 4. Convert any remaining <table> into a Markdown table **in-place**
+    def table_to_markdown(table_tag: Tag) -> str:
+        """
+        Converts a <table> into a Markdown-formatted table, preserving <th> headers.
+        """
+        headers = []
+        header_row = table_tag.find("tr")
+        if header_row:
+            for th in header_row.find_all("th"):
+                headers.append(th.get_text(strip=True))
+        md_table = ""
+        if headers:
+            md_table += "| " + " | ".join(headers) + " |\n"
+            md_table += "| " + " | ".join("---" for _ in headers) + " |\n"
+        # Now process data rows (skip the first <tr> if it was header row)
+        for row in table_tag.find_all("tr")[1:]:
+            cells = row.find_all(["td", "th"])
+            if not cells:
+                continue
+            row_texts = [cell.get_text(strip=True) for cell in cells]
+            md_table += "| " + " | ".join(row_texts) + " |\n"
+        return md_table.rstrip()
+    for table in content_div.find_all("table"):
+        # Skip infobox/navigation tables (already removed above)
+        if "infobox" in table.get("class", []) or table.get("role") == "navigation":
+            continue
+        markdown_table = table_to_markdown(table)                                        #
+        new_node = soup.new_string("\n\n" + markdown_table + "\n\n")
+        table.replace_with(new_node)
+    # 5. Remove “References”, “Further reading” & “External links” sections if requested
+    if ignore_references:
+        section_ids = {"references", "further_reading", "external_links"}
+        # We look for wrapper <div class="mw-heading mw-heading2"> or mw-heading3
+        for wrapper in content_div.find_all("div", class_=re.compile(r"mw-heading mw-heading[23]")):
+            heading_tag = wrapper.find(re.compile(r"^h[2-3]$"))
+            if heading_tag and heading_tag.get("id", "").strip().lower() in section_ids:
+                # Collect every sibling until the next wrapper of the same form
+                siblings_to_remove = []
+                for sib in wrapper.find_next_siblings():
+                    if (
+                        sib.name == "div"
+                        and "mw-heading" in (sib.get("class") or [])
+                        and re.match(r"mw-heading mw-heading[23]", " ".join(sib.get("class") or []))
+                    ):
+                        break
+                    siblings_to_remove.append(sib)
+                # First delete those siblings
+                for node in siblings_to_remove:
+                    node.decompose()                                                        #
+                # Finally delete the wrapper itself
+                wrapper.decompose()                                                          #
+    # 6. Convert the cleaned HTML into Markdown
+    markdown_options = {}
+    if ignore_links:
+        markdown_options["strip"] = ["a"]  # strip all <a> tags (keep only their text)
+    raw_html = "".join(str(child) for child in content_div.children)
+    markdown_text = md(raw_html, **markdown_options)                                   #
+    # 7. Collapse 3+ blank lines into exactly two
+    markdown_text = re.sub(r"\n{3,}", "\n\n", markdown_text).strip()
+    return markdown_text
+@tool
+def read_xls_File(file_path: str) -> object:
+    """This tool loads xls file into pandas and returns it.
+    Args:
+        file_path (str): File path to the xls file.
+    Returns:
+        object: The loaded xls file as a pandas DataFrame.
+    """
+    return pd.read_excel(file_path)