Spaces:

notredameslab
/

agentontology

No application file

App Files Files Community

mathysgrapotte commited on Jun 5, 2025

Commit

17816a1

1 Parent(s): 52a25ec

fetch_ontology from input

Browse files

Files changed (5) hide show

.gitignore +1 -0
fetch_ontology_from_input.py +144 -0
main.py +29 -27
pyproject.toml +2 -0
uv.lock +10 -0

.gitignore CHANGED Viewed

@@ -8,3 +8,4 @@ wheels/
 # Virtual environments
 .venv

 # Virtual environments
 .venv
+.gradio/

fetch_ontology_from_input.py ADDED Viewed

	@@ -0,0 +1,144 @@

+from smolagents import CodeAgent, LiteLLMModel, tool
+from smolagents.tools import ToolCollection
+import gradio as gr
+import requests
+import yaml
+from owlready2 import get_ontology, default_world
+import logging
+def get_fastqc_meta_yaml():
+    """
+    Fetches the content of the FastQC meta.yml file from nf-core modules repository.
+    Returns:
+        str: The content of the YAML file as a string
+    """
+    # Use the raw GitHub URL to get the file content directly
+    url = "https://raw.githubusercontent.com/nf-core/modules/master/modules/nf-core/fastqc/meta.yml"
+    try:
+        response = requests.get(url)
+        response.raise_for_status()  # Raises an HTTPError for bad responses
+        return response.text
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching YAML file: {e}")
+        return None
+def get_fastqc_input_yaml():
+    """
+    Fetches the content of the FastQC input.yml file from nf-core modules repository.
+    Returns:
+        str: The content of the YAML file as a dictionary
+    """
+    yaml_content = get_fastqc_meta_yaml()
+    if yaml_content:
+        try:
+            # Parse the YAML content
+            return yaml.safe_load(yaml_content)["input"]
+        except yaml.YAMLError as e:
+            print(f"Error parsing YAML: {e}")
+            return None
+    else:
+        print("Failed to fetch the YAML file, returning None.")
+        return None
+def load_edam_ontology():
+    """
+    Loads the EDAM ontology OWL file from GitHub using owlready2.
+    Returns:
+        Ontology: The loaded EDAM ontology object, or None if loading fails
+    """
+    # URL to the raw EDAM ontology OWL file
+    url = "https://raw.githubusercontent.com/edamontology/edamontology/main/releases/EDAM_1.25.owl"
+    try:
+        # Load the ontology directly from the URL
+        onto = get_ontology(url).load()
+        print(f"Successfully loaded EDAM ontology: {onto}")
+        return onto
+    except Exception as e:
+        print(f"Error loading EDAM ontology: {e}")
+        return None
+@tool
+def search_edam_ontology_by_search_term(search_term: str = None) -> list:
+    """
+    Generic function to search by EDAM entity type using native search. The native search is strict so you need to provide single word search terms (for example: 'fasta').
+    Args:
+        search_term: single word search term to filter results
+    Returns:
+        list: List of matching classes
+    """
+    onto = load_edam_ontology()
+    entity_type = "format"
+    # Search using IRI pattern matching
+    pattern = f"*{entity_type}_*"
+    matches = onto.search(iri=pattern)
+    # Filter by search term if provided
+    if search_term:
+        search_term_lower = search_term.lower()
+        filtered_matches = []
+        for match in matches:
+            # Check name
+            if search_term_lower in match.name.lower():
+                filtered_matches.append(match)
+                continue
+            # Check labels
+            if hasattr(match, 'label') and match.label:
+                for label in match.label:
+                    if search_term_lower in str(label).lower():
+                        filtered_matches.append(match)
+                        break
+        matches = filtered_matches
+    # Print results
+    search_desc = f" matching '{search_term}'" if search_term else ""
+    print(f"\nFound {len(matches)} {entity_type}(s){search_desc}:")
+    for i, match in enumerate(matches[:10]):  # Limit to 10 for readability
+        print(f"{i+1}. {match.name}")
+        if hasattr(match, 'label') and match.label:
+            print(f"   Label: {match.label[0]}")
+        print()
+    if len(matches) > 10:
+        print(f"... and {len(matches) - 10} more results")
+    return matches
+# Example usage:
+if __name__ == "__main__":
+    tool_list = [search_edam_ontology_by_search_term]
+    print(get_fastqc_input_yaml()[0][0].keys())
+    model = LiteLLMModel(
+    model_id="ollama/devstral:latest",
+    #model_id="ollama/qwen3:0.6b",
+    api_base="http://localhost:11434",
+    temperature=0.0,
+    max_tokens=5000,
+    )
+    agent = CodeAgent(
+        tools=tool_list,
+        model=model,
+        additional_authorized_imports=["inspect", "json"]
+    )
+    print(agent.tools)
+    input_yaml = get_fastqc_input_yaml()
+    for input_tool in input_yaml[0]:
+        for key, value in input_tool.items():
+            if key != "meta":
+                result = agent.run(f"you are presentend with a file format for the type {key}, which is a {value['type']} and is described by the following description: '{value['description']}', search for the single best match out of possible matches in the edam ontology (formated as format_XXXX), and return the answer (a single ontology class) in a final_answer call such as final_answer(f'format_XXXX')")
+                print(result)

main.py CHANGED Viewed

@@ -2,41 +2,43 @@ from smolagents import CodeAgent, LiteLLMModel
 from smolagents.tools import ToolCollection
 import gradio as gr
-def chat_with_agent(message, history):
-    """Initialize MCP client for each request to avoid connection issues"""
-    try:
-        with ToolCollection.from_mcp(
             {"url": "https://notredameslab-nf-ontology.hf.space/gradio_api/mcp/sse", "transport": "sse"},
             trust_remote_code=True  # Acknowledge that we trust this remote MCP server
         ) as tool_collection:
-            model = LiteLLMModel(
-                model_id="ollama/devstral:latest",
-                api_base="http://localhost:11434",
-            )
-            agent = CodeAgent(
-                tools=tool_collection.tools,
-                model=model,
-                additional_authorized_imports=["inspect", "json"]
-            )
-            additional_instructions = """
-            ADDITIONAL IMPORTANT INSTRUCTIONS:
-            use the tool "final_answer" in the code block to provide the answer to the user. Prints are only for debugging purposes. So, to give your results concatenate everything you want to print in a single "final_answer" call as such : final_answer(f"your answer here").
-            """
-            agent.system_prompt += additional_instructions
-            result = agent.run(message)
-            return str(result)
-    except Exception as e:
-        return f"❌ Error: {e}\nType: {type(e).__name__}"
 if __name__ == "__main__":
     demo = gr.ChatInterface(
-        fn=chat_with_agent,
         type="messages",
         examples=["can you extract input/output metadata from fastqc nf-core module ?"],
         title="Agent with MCP Tools (Per-Request Connection)",

 from smolagents.tools import ToolCollection
 import gradio as gr
+additional_instructions = """
+ADDITIONAL IMPORTANT INSTRUCTIONS:
+use the tool "final_answer" in the code block to provide the answer to the user.
+Prints are only for debugging purposes. So, to give your results concatenate everything you want to print in a single "final_answer" call as such : final_answer(f"your answer here").
+Example:
+```python
+result = tool_call(arg1, arg2, arg3)
+final_answer(f"your answer here {result}") # here print statement has been replaced by final_answer tool call
+```
+"""
+def run_agent(message, history):
+    """Create a new MCP connection for each request to avoid event loop issues."""
+    with ToolCollection.from_mcp(
             {"url": "https://notredameslab-nf-ontology.hf.space/gradio_api/mcp/sse", "transport": "sse"},
             trust_remote_code=True  # Acknowledge that we trust this remote MCP server
         ) as tool_collection:
+        model = LiteLLMModel(
+            model_id="ollama/devstral:latest",
+            #model_id="ollama/qwen3:0.6b",
+            api_base="http://localhost:11434",
+        )
+        agent = CodeAgent(
+            tools=tool_collection.tools,
+            model=model,
+            additional_authorized_imports=["inspect", "json"]
+        )
+        return str(agent.run(message + " put the result in a final_answer call such as final_answer(f'your answer here')"))
 if __name__ == "__main__":
     demo = gr.ChatInterface(
+        fn=run_agent,
         type="messages",
         examples=["can you extract input/output metadata from fastqc nf-core module ?"],
         title="Agent with MCP Tools (Per-Request Connection)",

pyproject.toml CHANGED Viewed

@@ -9,6 +9,8 @@ dependencies = [
     "gradio[mcp]>=5.0.0",
     "huggingface_hub[mcp]>=0.32.2",
     "mcp>=1.9.2",
     "requests",
     "smolagents[litellm,mcp]>=1.17.0",
     "textblob>=0.19.0",

     "gradio[mcp]>=5.0.0",
     "huggingface_hub[mcp]>=0.32.2",
     "mcp>=1.9.2",
+    "owlready2>=0.48",
+    "pyyaml>=6.0.2",
     "requests",
     "smolagents[litellm,mcp]>=1.17.0",
     "textblob>=0.19.0",

uv.lock CHANGED Viewed

@@ -15,6 +15,8 @@ dependencies = [
     { name = "gradio", extra = ["mcp"] },
     { name = "huggingface-hub", extra = ["mcp"] },
     { name = "mcp" },
     { name = "requests" },
     { name = "smolagents", extra = ["litellm", "mcp"] },
     { name = "textblob" },
@@ -26,6 +28,8 @@ requires-dist = [
     { name = "gradio", extras = ["mcp"], specifier = ">=5.0.0" },
     { name = "huggingface-hub", extras = ["mcp"], specifier = ">=0.32.2" },
     { name = "mcp", specifier = ">=1.9.2" },
     { name = "requests" },
     { name = "smolagents", extras = ["litellm", "mcp"], specifier = ">=1.17.0" },
     { name = "textblob", specifier = ">=0.19.0" },
@@ -1054,6 +1058,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c2/28/f53038a5a72cc4fd0b56c1eafb4ef64aec9685460d5ac34de98ca78b6e29/orjson-3.10.18-cp313-cp313-win_arm64.whl", hash = "sha256:f54c1385a0e6aba2f15a40d703b858bedad36ded0491e55d35d905b2c34a4cc3", size = 131186 },
 ]
 [[package]]
 name = "packaging"
 version = "25.0"

     { name = "gradio", extra = ["mcp"] },
     { name = "huggingface-hub", extra = ["mcp"] },
     { name = "mcp" },
+    { name = "owlready2" },
+    { name = "pyyaml" },
     { name = "requests" },
     { name = "smolagents", extra = ["litellm", "mcp"] },
     { name = "textblob" },
     { name = "gradio", extras = ["mcp"], specifier = ">=5.0.0" },
     { name = "huggingface-hub", extras = ["mcp"], specifier = ">=0.32.2" },
     { name = "mcp", specifier = ">=1.9.2" },
+    { name = "owlready2", specifier = ">=0.48" },
+    { name = "pyyaml", specifier = ">=6.0.2" },
     { name = "requests" },
     { name = "smolagents", extras = ["litellm", "mcp"], specifier = ">=1.17.0" },
     { name = "textblob", specifier = ">=0.19.0" },
     { url = "https://files.pythonhosted.org/packages/c2/28/f53038a5a72cc4fd0b56c1eafb4ef64aec9685460d5ac34de98ca78b6e29/orjson-3.10.18-cp313-cp313-win_arm64.whl", hash = "sha256:f54c1385a0e6aba2f15a40d703b858bedad36ded0491e55d35d905b2c34a4cc3", size = 131186 },
 ]
+[[package]]
+name = "owlready2"
+version = "0.48"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8d/79/01daa72fbd07b1d4dd0907356d0ae486a684f0bd4654430f27a3e31206ee/owlready2-0.48.tar.gz", hash = "sha256:86b4d8500d769a674c524b54397fdd738ff5d0a96878432b69f4d606d6a7a4d8", size = 27298462 }
 [[package]]
 name = "packaging"
 version = "25.0"