File size: 4,312 Bytes
209fce1
63ec1fc
 
456f631
09c9f4b
63ec1fc
 
456f631
 
209fce1
63ec1fc
 
 
 
456f631
09c9f4b
63ec1fc
 
 
8b6732c
456f631
6dea890
 
 
225ca91
8cbbaec
6dea890
8cbbaec
6dea890
 
 
 
 
eb1843c
456f631
6dea890
eb1843c
6dea890
225ca91
6dea890
225ca91
6dea890
 
 
 
 
 
225ca91
6dea890
 
 
225ca91
6dea890
 
225ca91
6dea890
 
225ca91
6dea890
 
225ca91
6dea890
 
 
 
 
225ca91
6dea890
225ca91
6dea890
 
 
 
 
 
 
225ca91
c86887e
 
 
 
 
 
 
6dea890
eb1843c
d09dea4
 
 
 
 
 
 
 
 
 
 
 
118b487
0832457
d09dea4
d11b05e
 
d09dea4
4642e00
ee9677f
 
d09dea4
456f631
6dea890
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

import os
import gradio as gr


from langchain_community.utilities import SQLDatabase
from langchain_anthropic import ChatAnthropic



from langgraph.checkpoint.memory import InMemorySaver
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain.agents import create_agent



from scripts.literature import literature_search
from scripts.uniprot import get_protein_location
from scripts.db import create_database
from scripts.utils import *


#os.environ["OMP_NUM_THREADS"] = "1"

DB_URI = "sqlite:///proteomics.db"

create_database(DB_URI)

db = SQLDatabase.from_uri(DB_URI)
print("DB INFO:")
print(f"Dialect: {db.dialect}")
print(f"Available tables: {db.get_usable_table_names()}")
print(f'Sample output: {db.run("SELECT * FROM proteins LIMIT 5;")}')


model = ChatAnthropic(model="claude-haiku-4-5-20251001")

toolkit = SQLDatabaseToolkit(db=db, llm=model)

tools = toolkit.get_tools()

system_prompt = """
You are an agent designed to interact with a SQL database.
Given an input question, create a syntactically correct {dialect} query to run,
then look at the results of the query and return the answer. Unless the user
specifies a specific number of examples they wish to obtain, always limit your
query to at most {top_k} results.

You can order the results by a relevant column to return the most interesting
examples in the database. Never query for all the columns from a specific table,
only ask for the relevant columns given the question.

You MUST double check your query before executing it. If you get an error while
executing a query, rewrite the query and try again.

DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the
database.

To start you should ALWAYS look at the tables in the database to see what you
can query. Do NOT skip this step.

Then you should query the schema of the most relevant tables.
""".format(
    dialect=db.dialect,
    top_k=5,
)

memory = InMemorySaver()


agent = create_agent(
    model,
    tools,
    system_prompt=system_prompt,
    checkpointer=memory
)

def respond(question, history):
    for step in agent.stream(
    {"messages": [{"role": "user", "content": question}]},
    config={"configurable": {"thread_id": "session_1"}},
    stream_mode="values",
    ):
        continue
    return step["messages"][-1].content

css = """
.prose h1 {color: black}
.gradio-container {background-color: white; width: 100%;}
.bubble-wrap {background-color: white}
.svelte-cmf5ev {color: white; background-image: linear-gradient(to right bottom, rgb(91,76,251), rgb(91,76,251));}
.svelte-1f354aw {background-color: white; color: black}
.svelte-1b6s6s {background-color: white; color: black}
.flex-wrap.user.svelte-1ggj411 {background-color: #70b1fb; color: red;}
.flex-wrap.bot.svelte-1ggj411 {background-color: #ad3dfa; color: red;}
.flex-wrap.bot.svelte-1ggj411.dark {background-color: #ad3dfa; color: red;}
.message.pending.svelte-1gpwetz {background-color: #ad3dfa}
.contain.svelte-1rjryqp.svelte-1rjryqp.svelte-1rjryqp {background-color: white; color: black}
.svelte-1ed2p3z {background-image: url(https://huggingface.co/spaces/jugacostase/BOTeome/resolve/044cd65f416a2c91b464b06fbeef07e0e46bda50/static/img/BOTeome_logo.png); height:170px; background-size: 500px; background-repeat: no-repeat;}
.dark {--body-text-color: white; --input-background-fill: black}
.label.svelte-p5q82i {color: black;}
.bot.svelte-pcjl1g.svelte-pcjl1g {background-color: #B84BFE; color: white;}
.user.svelte-pcjl1g.svelte-pcjl1g {background-color: #93d0ff; color: white;}
.svelte-1viwdyg {background: #93d0ff;}
label.svelte-i3tvor {background: #93d0ff; color: white;}
.label.show_textbox_border.svelte-173056l {background-color: white; color: black; border-color: gray; border-style: solid; border-width: 1px}
.textarea.svelte-173056l {background-color: white; color: black; border-color: gray; border-style: solid; border-width: 1px}
"""

gr.ChatInterface(
    respond,
    chatbot=gr.Chatbot(height=501),
    textbox=gr.Textbox(placeholder="Ask about your proteomics data", container=False, scale=7),
    title=" ",
    css=css,
    #examples=["Is satb2 a transcription factor?",
    #          "Where is the prox1 located?",
    #          "What are the levels of expression of Myh9 on the VCN"],
    #cache_examples=True,

).launch()