mathysgrapotte commited on
Commit
6cb2b11
·
1 Parent(s): ccceb10

now works with a dictionary.

Browse files
Files changed (3) hide show
  1. agents/query_ontology_db.py +1 -1
  2. main.py +44 -46
  3. tools/bio_tools_tools.py +9 -6
agents/query_ontology_db.py CHANGED
@@ -6,7 +6,7 @@ model = LiteLLMModel(
6
  #model_id="ollama/qwen3:0.6b",
7
  api_base="http://localhost:11434",
8
  temperature=0.0,
9
- max_tokens=5000,
10
  )
11
 
12
  tool_list = [search_edam_ontology_by_search_term, get_edam_description_from_ontology_format_class]
 
6
  #model_id="ollama/qwen3:0.6b",
7
  api_base="http://localhost:11434",
8
  temperature=0.0,
9
+ max_tokens=8000,
10
  )
11
 
12
  tool_list = [search_edam_ontology_by_search_term, get_edam_description_from_ontology_format_class]
main.py CHANGED
@@ -10,69 +10,67 @@ def run_multi_agent(module_name):
10
  ### RETRIEVE INFORMATION FROM META.YML ###
11
 
12
  meta_yml = get_meta_yml_file(module_name=module_name)
13
- module_info = extract_module_name_description(meta_file=meta_yml)
14
- module_tools = extract_tools_from_meta_json(meta_file=meta_yml)
15
 
16
- ### FIND THE MODULE TOOL ###
17
 
18
- if len(module_info) == 1:
19
- module_yaml_name = module_info[0]
20
- module_description = module_info[1]
21
- else:
22
- # TODO: agent to choose the right tool
23
- first_prompt = f"""
24
- The module {module_info[0]} with desciption '{module_info[1]}' contains a series of tools.
25
- Find the tool that best describes the module. Return only one tool. Return the name.
26
- This is the list of tools:
27
- {"\n\t".join(f"{tool[0]}: {tool[1]}" for tool in module_tools)}
28
- """
29
- module_yaml_name = "fastqc" # TODO: this would be the answer of the first agent
30
- module_description = "my description" # TODO: this would be the answer of the first agent
31
 
32
- ### EXTRACT INFO FROM META.YML ###
33
 
34
- meta_info = extract_information_from_meta_json(meta_file=meta_yml, tool_name=module_yaml_name)
35
 
36
- ### FETCH ONOTOLOGIES FROM BIO.TOOLS ###
37
 
38
- if meta_info["bio_tools_id"] == "":
39
- bio_tools_list = get_biotools_response(module_yaml_name)
40
 
41
- # TODO: agent to select the best match from all possible bio.tools entries
42
- # The answer should be the entry ID
43
- second_prompt = "" # TODO: update
44
- bio_tools_tool = "FastQC" # TODO: this should be the answer form the second agent
45
 
46
- ontology = get_biotools_ontology(module_yaml_name, bio_tools_tool)
47
 
48
- ### CLASSIFY ALL INPUT AND OUTPUT ONTOLOGIES INTO THE APPROPRIATE CHANNELS ###
49
 
50
- # TODO !!!
51
- # Create an agent which classifies the ontologeis into the right i/o
52
- # From biotols we get a list of ontologies for inputs and a list of ontologies for outputs
53
- # but in most nf-core modules we will have finles separated into different channels
54
- # For example bam, bai, sam...
55
- # The agent should recieve the i/o from the module, the ontologies found in bio.tools, and assigne the correct ones to each channel.
56
 
57
- ### FETCH ONTOLOGY TERMS FROM EDAM DATABASE ###
58
 
59
- results = {"inputs": {}, "outputs": {}}
60
 
61
- for input_channel in meta_info["inputs"]:
62
  for ch_element in input_channel:
63
  for key, value in ch_element.items():
64
  if value["type"] == "file":
65
  result = agent.run(f"You are presentend with a file format for the input {key}, which is a file and is described by the following description: '{value['description']}', search for the best matches out of possible matches in the edam ontology (formated as format_XXXX), and return the answer (a list of ontology classes) in a final_answer call such as final_answer([format_XXXX, format_XXXX, ...])")
66
- results["inputs"][key] = result
67
-
68
- for output_channel in meta_info["outputs"]:
69
- for ch_element in output_channel:
70
- for key, value in ch_element.items():
71
- if value["type"] == "file":
72
- result = agent.run(f"You are presentend with a file format for the output {key}, which is a file and is described by the following description: '{value['description']}', search for the best matches out of possible matches in the edam ontology (formated as format_XXXX), and return the answer (a list of ontology classes) in a final_answer call such as final_answer([format_XXXX, format_XXXX, ...])")
73
- results["outputs"][key] = result
74
-
75
- print(results)
76
 
77
  ### FINAL AGENT TO BENCHMARK AND FIND THE COMMONALITIES BETWEEN BIO.TOOLS AND EDAM ###
78
 
 
10
  ### RETRIEVE INFORMATION FROM META.YML ###
11
 
12
  meta_yml = get_meta_yml_file(module_name=module_name)
13
+ # module_info = extract_module_name_description(meta_file=meta_yml)
14
+ # module_tools = extract_tools_from_meta_json(meta_file=meta_yml)
15
 
16
+ # ### FIND THE MODULE TOOL ###
17
 
18
+ # if len(module_info) == 1:
19
+ # module_yaml_name = module_info[0]
20
+ # module_description = module_info[1]
21
+ # else:
22
+ # # TODO: agent to choose the right tool
23
+ # first_prompt = f"""
24
+ # The module {module_info[0]} with desciption '{module_info[1]}' contains a series of tools.
25
+ # Find the tool that best describes the module. Return only one tool. Return the name.
26
+ # This is the list of tools:
27
+ # {"\n\t".join(f"{tool[0]}: {tool[1]}" for tool in module_tools)}
28
+ # """
29
+ # module_yaml_name = "fastqc" # TODO: this would be the answer of the first agent
30
+ # module_description = "my description" # TODO: this would be the answer of the first agent
31
 
32
+ # ### EXTRACT INFO FROM META.YML ###
33
 
34
+ # meta_info = extract_information_from_meta_json(meta_file=meta_yml, tool_name=module_yaml_name)
35
 
36
+ # ### FETCH ONOTOLOGIES FROM BIO.TOOLS ###
37
 
38
+ # if meta_info["bio_tools_id"] == "":
39
+ # bio_tools_list = get_biotools_response(module_yaml_name)
40
 
41
+ # # TODO: agent to select the best match from all possible bio.tools entries
42
+ # # The answer should be the entry ID
43
+ # second_prompt = "" # TODO: update
44
+ # bio_tools_tool = "FastQC" # TODO: this should be the answer form the second agent
45
 
46
+ # ontology = get_biotools_ontology(module_yaml_name, bio_tools_tool)
47
 
48
+ # ### CLASSIFY ALL INPUT AND OUTPUT ONTOLOGIES INTO THE APPROPRIATE CHANNELS ###
49
 
50
+ # # TODO !!!
51
+ # # Create an agent which classifies the ontologeis into the right i/o
52
+ # # From biotols we get a list of ontologies for inputs and a list of ontologies for outputs
53
+ # # but in most nf-core modules we will have finles separated into different channels
54
+ # # For example bam, bai, sam...
55
+ # # The agent should recieve the i/o from the module, the ontologies found in bio.tools, and assigne the correct ones to each channel.
56
 
57
+ # ### FETCH ONTOLOGY TERMS FROM EDAM DATABASE ###
58
 
59
+ results = {"input": {}, "output": {}}
60
 
61
+ for input_channel in meta_yml["input"]:
62
  for ch_element in input_channel:
63
  for key, value in ch_element.items():
64
  if value["type"] == "file":
65
  result = agent.run(f"You are presentend with a file format for the input {key}, which is a file and is described by the following description: '{value['description']}', search for the best matches out of possible matches in the edam ontology (formated as format_XXXX), and return the answer (a list of ontology classes) in a final_answer call such as final_answer([format_XXXX, format_XXXX, ...])")
66
+ results["input"][key] = result
67
+
68
+ # for output_channel in meta_info["outputs"]:
69
+ # for ch_element in output_channel:
70
+ # for key, value in ch_element.items():
71
+ # if value["type"] == "file":
72
+ # result = agent.run(f"You are presentend with a file format for the output {key}, which is a file and is described by the following description: '{value['description']}', search for the best matches out of possible matches in the edam ontology (formated as format_XXXX), and return the answer (a list of ontology classes) in a final_answer call such as final_answer([format_XXXX, format_XXXX, ...])")
73
+ # results["outputs"][key] = result
 
 
74
 
75
  ### FINAL AGENT TO BENCHMARK AND FIND THE COMMONALITIES BETWEEN BIO.TOOLS AND EDAM ###
76
 
tools/bio_tools_tools.py CHANGED
@@ -1,5 +1,8 @@
1
  import json
2
  import requests
 
 
 
3
 
4
  def get_biotools_response(tool_name: str) -> list:
5
  """
@@ -23,13 +26,13 @@ def get_biotools_response(tool_name: str) -> list:
23
  tool_info = [(tool.get("name"), tool.get("description", "")) for tool in data_list]
24
 
25
  for name, desc in tool_info:
26
- print(f"Tool: {name}\nDescription: {desc}\n")
27
 
28
- print(f"Found bio.tools information for '{tool_name}'")
29
  return tool_info
30
 
31
  except requests.exceptions.RequestException as e:
32
- print(f"Could not find bio.tools information for '{tool_name}': {e}")
33
  return f"Could not find bio.tools information for '{tool_name}': {e}"
34
 
35
  def get_biotools_ontology(tool_name, entry_id:str) -> str:
@@ -74,13 +77,13 @@ def get_biotools_ontology(tool_name, entry_id:str) -> str:
74
  for i, (term, uri) in enumerate(format_terms, start=1):
75
  text_block += f"{i}. {term} ({uri})\n"
76
 
77
- print(text_block)
78
  return format_terms
79
 
80
  if not found:
81
- print(f"Could not find the entry '{entry_id}' for the tool {tool_name}")
82
  return f"Could not find the entry '{entry_id}' for the tool {tool_name}"
83
 
84
  except requests.exceptions.RequestException as e:
85
- print(f"Could not find the entry '{entry_id}' for the tool {tool_name}")
86
  return f"Could not find bio.tools information for '{tool_name}': {e}"
 
1
  import json
2
  import requests
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
 
7
  def get_biotools_response(tool_name: str) -> list:
8
  """
 
26
  tool_info = [(tool.get("name"), tool.get("description", "")) for tool in data_list]
27
 
28
  for name, desc in tool_info:
29
+ logger.info(f"Tool: {name}\nDescription: {desc}\n")
30
 
31
+ logger.info(f"Found bio.tools information for '{tool_name}'")
32
  return tool_info
33
 
34
  except requests.exceptions.RequestException as e:
35
+ logger.error(f"Could not find bio.tools information for '{tool_name}': {e}")
36
  return f"Could not find bio.tools information for '{tool_name}': {e}"
37
 
38
  def get_biotools_ontology(tool_name, entry_id:str) -> str:
 
77
  for i, (term, uri) in enumerate(format_terms, start=1):
78
  text_block += f"{i}. {term} ({uri})\n"
79
 
80
+ logger.info(text_block)
81
  return format_terms
82
 
83
  if not found:
84
+ logger.error(f"Could not find the entry '{entry_id}' for the tool {tool_name}")
85
  return f"Could not find the entry '{entry_id}' for the tool {tool_name}"
86
 
87
  except requests.exceptions.RequestException as e:
88
+ logger.error(f"Could not find the entry '{entry_id}' for the tool {tool_name}")
89
  return f"Could not find bio.tools information for '{tool_name}': {e}"