mathysgrapotte commited on
Commit
17816a1
·
1 Parent(s): 52a25ec

fetch_ontology from input

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. fetch_ontology_from_input.py +144 -0
  3. main.py +29 -27
  4. pyproject.toml +2 -0
  5. uv.lock +10 -0
.gitignore CHANGED
@@ -8,3 +8,4 @@ wheels/
8
 
9
  # Virtual environments
10
  .venv
 
 
8
 
9
  # Virtual environments
10
  .venv
11
+ .gradio/
fetch_ontology_from_input.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import CodeAgent, LiteLLMModel, tool
2
+ from smolagents.tools import ToolCollection
3
+ import gradio as gr
4
+ import requests
5
+ import yaml
6
+ from owlready2 import get_ontology, default_world
7
+ import logging
8
+
9
+ def get_fastqc_meta_yaml():
10
+ """
11
+ Fetches the content of the FastQC meta.yml file from nf-core modules repository.
12
+
13
+ Returns:
14
+ str: The content of the YAML file as a string
15
+ """
16
+ # Use the raw GitHub URL to get the file content directly
17
+ url = "https://raw.githubusercontent.com/nf-core/modules/master/modules/nf-core/fastqc/meta.yml"
18
+
19
+ try:
20
+ response = requests.get(url)
21
+ response.raise_for_status() # Raises an HTTPError for bad responses
22
+ return response.text
23
+ except requests.exceptions.RequestException as e:
24
+ print(f"Error fetching YAML file: {e}")
25
+ return None
26
+
27
+ def get_fastqc_input_yaml():
28
+ """
29
+ Fetches the content of the FastQC input.yml file from nf-core modules repository.
30
+
31
+ Returns:
32
+ str: The content of the YAML file as a dictionary
33
+ """
34
+
35
+ yaml_content = get_fastqc_meta_yaml()
36
+ if yaml_content:
37
+ try:
38
+ # Parse the YAML content
39
+ return yaml.safe_load(yaml_content)["input"]
40
+ except yaml.YAMLError as e:
41
+ print(f"Error parsing YAML: {e}")
42
+ return None
43
+ else:
44
+ print("Failed to fetch the YAML file, returning None.")
45
+ return None
46
+
47
+
48
+ def load_edam_ontology():
49
+ """
50
+ Loads the EDAM ontology OWL file from GitHub using owlready2.
51
+
52
+ Returns:
53
+ Ontology: The loaded EDAM ontology object, or None if loading fails
54
+ """
55
+ # URL to the raw EDAM ontology OWL file
56
+ url = "https://raw.githubusercontent.com/edamontology/edamontology/main/releases/EDAM_1.25.owl"
57
+
58
+ try:
59
+ # Load the ontology directly from the URL
60
+ onto = get_ontology(url).load()
61
+ print(f"Successfully loaded EDAM ontology: {onto}")
62
+ return onto
63
+ except Exception as e:
64
+ print(f"Error loading EDAM ontology: {e}")
65
+ return None
66
+
67
+ @tool
68
+ def search_edam_ontology_by_search_term(search_term: str = None) -> list:
69
+ """
70
+ Generic function to search by EDAM entity type using native search. The native search is strict so you need to provide single word search terms (for example: 'fasta').
71
+
72
+ Args:
73
+ search_term: single word search term to filter results
74
+
75
+ Returns:
76
+ list: List of matching classes
77
+ """
78
+ onto = load_edam_ontology()
79
+ entity_type = "format"
80
+ # Search using IRI pattern matching
81
+ pattern = f"*{entity_type}_*"
82
+ matches = onto.search(iri=pattern)
83
+
84
+ # Filter by search term if provided
85
+ if search_term:
86
+ search_term_lower = search_term.lower()
87
+ filtered_matches = []
88
+
89
+ for match in matches:
90
+ # Check name
91
+ if search_term_lower in match.name.lower():
92
+ filtered_matches.append(match)
93
+ continue
94
+
95
+ # Check labels
96
+ if hasattr(match, 'label') and match.label:
97
+ for label in match.label:
98
+ if search_term_lower in str(label).lower():
99
+ filtered_matches.append(match)
100
+ break
101
+
102
+ matches = filtered_matches
103
+
104
+ # Print results
105
+ search_desc = f" matching '{search_term}'" if search_term else ""
106
+ print(f"\nFound {len(matches)} {entity_type}(s){search_desc}:")
107
+ for i, match in enumerate(matches[:10]): # Limit to 10 for readability
108
+ print(f"{i+1}. {match.name}")
109
+ if hasattr(match, 'label') and match.label:
110
+ print(f" Label: {match.label[0]}")
111
+ print()
112
+
113
+ if len(matches) > 10:
114
+ print(f"... and {len(matches) - 10} more results")
115
+
116
+ return matches
117
+
118
+ # Example usage:
119
+ if __name__ == "__main__":
120
+ tool_list = [search_edam_ontology_by_search_term]
121
+
122
+ print(get_fastqc_input_yaml()[0][0].keys())
123
+
124
+ model = LiteLLMModel(
125
+ model_id="ollama/devstral:latest",
126
+ #model_id="ollama/qwen3:0.6b",
127
+ api_base="http://localhost:11434",
128
+ temperature=0.0,
129
+ max_tokens=5000,
130
+ )
131
+
132
+ agent = CodeAgent(
133
+ tools=tool_list,
134
+ model=model,
135
+ additional_authorized_imports=["inspect", "json"]
136
+ )
137
+
138
+ print(agent.tools)
139
+ input_yaml = get_fastqc_input_yaml()
140
+ for input_tool in input_yaml[0]:
141
+ for key, value in input_tool.items():
142
+ if key != "meta":
143
+ result = agent.run(f"you are presentend with a file format for the type {key}, which is a {value['type']} and is described by the following description: '{value['description']}', search for the single best match out of possible matches in the edam ontology (formated as format_XXXX), and return the answer (a single ontology class) in a final_answer call such as final_answer(f'format_XXXX')")
144
+ print(result)
main.py CHANGED
@@ -2,41 +2,43 @@ from smolagents import CodeAgent, LiteLLMModel
2
  from smolagents.tools import ToolCollection
3
  import gradio as gr
4
 
5
- def chat_with_agent(message, history):
6
- """Initialize MCP client for each request to avoid connection issues"""
7
- try:
8
- with ToolCollection.from_mcp(
 
 
 
 
 
 
 
 
 
 
 
9
  {"url": "https://notredameslab-nf-ontology.hf.space/gradio_api/mcp/sse", "transport": "sse"},
10
  trust_remote_code=True # Acknowledge that we trust this remote MCP server
11
  ) as tool_collection:
12
-
13
- model = LiteLLMModel(
14
- model_id="ollama/devstral:latest",
15
- api_base="http://localhost:11434",
16
- )
17
-
18
- agent = CodeAgent(
19
- tools=tool_collection.tools,
20
- model=model,
21
- additional_authorized_imports=["inspect", "json"]
22
- )
23
-
24
- additional_instructions = """
25
- ADDITIONAL IMPORTANT INSTRUCTIONS:
26
- use the tool "final_answer" in the code block to provide the answer to the user. Prints are only for debugging purposes. So, to give your results concatenate everything you want to print in a single "final_answer" call as such : final_answer(f"your answer here").
27
- """
28
 
29
- agent.system_prompt += additional_instructions
 
 
 
 
30
 
31
- result = agent.run(message)
32
- return str(result)
33
-
34
- except Exception as e:
35
- return f"❌ Error: {e}\nType: {type(e).__name__}"
36
 
 
 
 
37
  if __name__ == "__main__":
38
  demo = gr.ChatInterface(
39
- fn=chat_with_agent,
40
  type="messages",
41
  examples=["can you extract input/output metadata from fastqc nf-core module ?"],
42
  title="Agent with MCP Tools (Per-Request Connection)",
 
2
  from smolagents.tools import ToolCollection
3
  import gradio as gr
4
 
5
+ additional_instructions = """
6
+ ADDITIONAL IMPORTANT INSTRUCTIONS:
7
+ use the tool "final_answer" in the code block to provide the answer to the user.
8
+ Prints are only for debugging purposes. So, to give your results concatenate everything you want to print in a single "final_answer" call as such : final_answer(f"your answer here").
9
+
10
+ Example:
11
+ ```python
12
+ result = tool_call(arg1, arg2, arg3)
13
+ final_answer(f"your answer here {result}") # here print statement has been replaced by final_answer tool call
14
+ ```
15
+ """
16
+
17
+ def run_agent(message, history):
18
+ """Create a new MCP connection for each request to avoid event loop issues."""
19
+ with ToolCollection.from_mcp(
20
  {"url": "https://notredameslab-nf-ontology.hf.space/gradio_api/mcp/sse", "transport": "sse"},
21
  trust_remote_code=True # Acknowledge that we trust this remote MCP server
22
  ) as tool_collection:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ model = LiteLLMModel(
25
+ model_id="ollama/devstral:latest",
26
+ #model_id="ollama/qwen3:0.6b",
27
+ api_base="http://localhost:11434",
28
+ )
29
 
30
+ agent = CodeAgent(
31
+ tools=tool_collection.tools,
32
+ model=model,
33
+ additional_authorized_imports=["inspect", "json"]
34
+ )
35
 
36
+
37
+ return str(agent.run(message + " put the result in a final_answer call such as final_answer(f'your answer here')"))
38
+
39
  if __name__ == "__main__":
40
  demo = gr.ChatInterface(
41
+ fn=run_agent,
42
  type="messages",
43
  examples=["can you extract input/output metadata from fastqc nf-core module ?"],
44
  title="Agent with MCP Tools (Per-Request Connection)",
pyproject.toml CHANGED
@@ -9,6 +9,8 @@ dependencies = [
9
  "gradio[mcp]>=5.0.0",
10
  "huggingface_hub[mcp]>=0.32.2",
11
  "mcp>=1.9.2",
 
 
12
  "requests",
13
  "smolagents[litellm,mcp]>=1.17.0",
14
  "textblob>=0.19.0",
 
9
  "gradio[mcp]>=5.0.0",
10
  "huggingface_hub[mcp]>=0.32.2",
11
  "mcp>=1.9.2",
12
+ "owlready2>=0.48",
13
+ "pyyaml>=6.0.2",
14
  "requests",
15
  "smolagents[litellm,mcp]>=1.17.0",
16
  "textblob>=0.19.0",
uv.lock CHANGED
@@ -15,6 +15,8 @@ dependencies = [
15
  { name = "gradio", extra = ["mcp"] },
16
  { name = "huggingface-hub", extra = ["mcp"] },
17
  { name = "mcp" },
 
 
18
  { name = "requests" },
19
  { name = "smolagents", extra = ["litellm", "mcp"] },
20
  { name = "textblob" },
@@ -26,6 +28,8 @@ requires-dist = [
26
  { name = "gradio", extras = ["mcp"], specifier = ">=5.0.0" },
27
  { name = "huggingface-hub", extras = ["mcp"], specifier = ">=0.32.2" },
28
  { name = "mcp", specifier = ">=1.9.2" },
 
 
29
  { name = "requests" },
30
  { name = "smolagents", extras = ["litellm", "mcp"], specifier = ">=1.17.0" },
31
  { name = "textblob", specifier = ">=0.19.0" },
@@ -1054,6 +1058,12 @@ wheels = [
1054
  { url = "https://files.pythonhosted.org/packages/c2/28/f53038a5a72cc4fd0b56c1eafb4ef64aec9685460d5ac34de98ca78b6e29/orjson-3.10.18-cp313-cp313-win_arm64.whl", hash = "sha256:f54c1385a0e6aba2f15a40d703b858bedad36ded0491e55d35d905b2c34a4cc3", size = 131186 },
1055
  ]
1056
 
 
 
 
 
 
 
1057
  [[package]]
1058
  name = "packaging"
1059
  version = "25.0"
 
15
  { name = "gradio", extra = ["mcp"] },
16
  { name = "huggingface-hub", extra = ["mcp"] },
17
  { name = "mcp" },
18
+ { name = "owlready2" },
19
+ { name = "pyyaml" },
20
  { name = "requests" },
21
  { name = "smolagents", extra = ["litellm", "mcp"] },
22
  { name = "textblob" },
 
28
  { name = "gradio", extras = ["mcp"], specifier = ">=5.0.0" },
29
  { name = "huggingface-hub", extras = ["mcp"], specifier = ">=0.32.2" },
30
  { name = "mcp", specifier = ">=1.9.2" },
31
+ { name = "owlready2", specifier = ">=0.48" },
32
+ { name = "pyyaml", specifier = ">=6.0.2" },
33
  { name = "requests" },
34
  { name = "smolagents", extras = ["litellm", "mcp"], specifier = ">=1.17.0" },
35
  { name = "textblob", specifier = ">=0.19.0" },
 
1058
  { url = "https://files.pythonhosted.org/packages/c2/28/f53038a5a72cc4fd0b56c1eafb4ef64aec9685460d5ac34de98ca78b6e29/orjson-3.10.18-cp313-cp313-win_arm64.whl", hash = "sha256:f54c1385a0e6aba2f15a40d703b858bedad36ded0491e55d35d905b2c34a4cc3", size = 131186 },
1059
  ]
1060
 
1061
+ [[package]]
1062
+ name = "owlready2"
1063
+ version = "0.48"
1064
+ source = { registry = "https://pypi.org/simple" }
1065
+ sdist = { url = "https://files.pythonhosted.org/packages/8d/79/01daa72fbd07b1d4dd0907356d0ae486a684f0bd4654430f27a3e31206ee/owlready2-0.48.tar.gz", hash = "sha256:86b4d8500d769a674c524b54397fdd738ff5d0a96878432b69f4d606d6a7a4d8", size = 27298462 }
1066
+
1067
  [[package]]
1068
  name = "packaging"
1069
  version = "25.0"