asthara's picture
create get_gene_id and get_gene_location tools
0694f7e verified
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
import xml.etree.ElementTree as ET
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
@tool
def get_gene_id(organism:str, gene:int) -> str:
"""A tool that gets the gene ID from an organism with the provided gene name
Args:
organism: the name of the organism
gene: the name of the gene
"""
search_params = {
"db": "gene",
"term": f"{gene}[Gene Name] AND {organism}[Organism]",
"retmode": "xml",
}
esearch_url = base_url + "esearch.fcgi"
esearch_response = requests.get(esearch_url, params=search_params)
esearch_tree = ET.fromstring(esearch_response.text)
id_list = esearch_tree.find("IdList")
if id_list is None or len(id_list) == 0:
return "Gene not found."
return id_list.find("Id").text
@tool
def get_gene_location(gene_id:str) -> str:
"""A tool that gets the gene location from a gene ID
Args:
gene_id: the gene ID
"""
efetch_url = base_url + "efetch.fcgi"
fetch_params = {
"db": "gene",
"id": gene_id,
"retmode": "xml",
}
efetch_response = requests.get(efetch_url, params=fetch_params)
fetch_tree = ET.fromstring(efetch_response.text)
# Parse XML to extract chromosome, start, end
try:
chr_elem = fetch_tree.find(".//Gene-ref_maploc")
chromosome = chr_elem.text if chr_elem is not None else "Unknown"
interval = fetch_tree.find(".//Seq-interval")
start = int(interval.find("Seq-interval_from").text) + 1
end = int(interval.find("Seq-interval_to").text) + 1
strand_elem = interval.find("Seq-interval_strand/Na-strand/value")
strand = strand_elem.text if strand_elem is not None else "unknown"
return {
"chromosome": chromosome,
"start": start,
"end": end,
"strand": strand
}
except Exception as e:
return "Error parsing gene location:", str(e)
final_answer = FinalAnswerTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[final_answer, get_gene_id, get_gene_location],
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()