File size: 3,028 Bytes
9b5b26a
 
 
 
c19d193
6aae614
8fe992b
9b5b26a
0694f7e
 
 
9b5b26a
 
0694f7e
 
9b5b26a
0694f7e
 
9b5b26a
0694f7e
 
 
 
 
 
 
 
 
 
 
 
 
 
9b5b26a
 
0694f7e
 
9b5b26a
0694f7e
9b5b26a
0694f7e
 
 
 
 
 
 
 
 
 
9b5b26a
0694f7e
 
8c01ffb
0694f7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c01ffb
6aae614
ae7a494
 
 
 
e121372
0694f7e
 
 
 
13d500a
8c01ffb
861422e
 
9b5b26a
8c01ffb
8fe992b
0694f7e
8c01ffb
 
 
 
 
 
861422e
8fe992b
 
9b5b26a
8c01ffb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool

from Gradio_UI import GradioUI
import xml.etree.ElementTree as ET

base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"

@tool
def get_gene_id(organism:str, gene:int) -> str: 
    """A tool that gets the gene ID from an organism with the provided gene name
    Args:
        organism: the name of the organism
        gene: the name of the gene
    """
    search_params = {
        "db": "gene",
        "term": f"{gene}[Gene Name] AND {organism}[Organism]",
        "retmode": "xml",
    }
    esearch_url = base_url + "esearch.fcgi"
    esearch_response = requests.get(esearch_url, params=search_params)
    esearch_tree = ET.fromstring(esearch_response.text)
    id_list = esearch_tree.find("IdList")
    
    if id_list is None or len(id_list) == 0:
        return "Gene not found."

    return id_list.find("Id").text

@tool
def get_gene_location(gene_id:str) -> str:
    """A tool that gets the gene location from a gene ID
    Args:
        gene_id: the gene ID
    """
    efetch_url = base_url + "efetch.fcgi"
    fetch_params = {
        "db": "gene",
        "id": gene_id,
        "retmode": "xml",
    }
    efetch_response = requests.get(efetch_url, params=fetch_params)
    fetch_tree = ET.fromstring(efetch_response.text)

    # Parse XML to extract chromosome, start, end
    try:
        chr_elem = fetch_tree.find(".//Gene-ref_maploc")
        chromosome = chr_elem.text if chr_elem is not None else "Unknown"

        interval = fetch_tree.find(".//Seq-interval")
        start = int(interval.find("Seq-interval_from").text) + 1
        end = int(interval.find("Seq-interval_to").text) + 1

        strand_elem = interval.find("Seq-interval_strand/Na-strand/value")
        strand = strand_elem.text if strand_elem is not None else "unknown"

        return {
            "chromosome": chromosome,
            "start": start,
            "end": end,
            "strand": strand
        }
    except Exception as e:
        return "Error parsing gene location:", str(e)

final_answer = FinalAnswerTool()

# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' 

model = HfApiModel(
    max_tokens=2096,
    temperature=0.5,
    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
    custom_role_conversions=None,
)

with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
agent = CodeAgent(
    model=model,
    tools=[final_answer, get_gene_id, get_gene_location],
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates
)


GradioUI(agent).launch()