import os import gradio as gr from langchain_community.utilities import SQLDatabase from langchain_anthropic import ChatAnthropic from langgraph.checkpoint.memory import InMemorySaver from langchain_community.agent_toolkits import SQLDatabaseToolkit from langchain.agents import create_agent from scripts.literature import literature_search from scripts.uniprot import get_protein_location from scripts.db import create_database from scripts.utils import * #os.environ["OMP_NUM_THREADS"] = "1" DB_URI = "sqlite:///proteomics.db" create_database(DB_URI) db = SQLDatabase.from_uri(DB_URI) print("DB INFO:") print(f"Dialect: {db.dialect}") print(f"Available tables: {db.get_usable_table_names()}") print(f'Sample output: {db.run("SELECT * FROM proteins LIMIT 5;")}') model = ChatAnthropic(model="claude-haiku-4-5-20251001") toolkit = SQLDatabaseToolkit(db=db, llm=model) tools = toolkit.get_tools() system_prompt = """ You are an agent designed to interact with a SQL database. Given an input question, create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database. Never query for all the columns from a specific table, only ask for the relevant columns given the question. You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again. DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database. To start you should ALWAYS look at the tables in the database to see what you can query. Do NOT skip this step. Then you should query the schema of the most relevant tables. """.format( dialect=db.dialect, top_k=5, ) memory = InMemorySaver() agent = create_agent( model, tools, system_prompt=system_prompt, checkpointer=memory ) def respond(question, history): for step in agent.stream( {"messages": [{"role": "user", "content": question}]}, config={"configurable": {"thread_id": "session_1"}}, stream_mode="values", ): continue return step["messages"][-1].content css = """ .prose h1 {color: black} .gradio-container {background-color: white; width: 100%;} .bubble-wrap {background-color: white} .svelte-cmf5ev {color: white; background-image: linear-gradient(to right bottom, rgb(91,76,251), rgb(91,76,251));} .svelte-1f354aw {background-color: white; color: black} .svelte-1b6s6s {background-color: white; color: black} .flex-wrap.user.svelte-1ggj411 {background-color: #70b1fb; color: red;} .flex-wrap.bot.svelte-1ggj411 {background-color: #ad3dfa; color: red;} .flex-wrap.bot.svelte-1ggj411.dark {background-color: #ad3dfa; color: red;} .message.pending.svelte-1gpwetz {background-color: #ad3dfa} .contain.svelte-1rjryqp.svelte-1rjryqp.svelte-1rjryqp {background-color: white; color: black} .svelte-1ed2p3z {background-image: url(https://huggingface.co/spaces/jugacostase/BOTeome/resolve/044cd65f416a2c91b464b06fbeef07e0e46bda50/static/img/BOTeome_logo.png); height:170px; background-size: 500px; background-repeat: no-repeat;} .dark {--body-text-color: white; --input-background-fill: black} .label.svelte-p5q82i {color: black;} .bot.svelte-pcjl1g.svelte-pcjl1g {background-color: #B84BFE; color: white;} .user.svelte-pcjl1g.svelte-pcjl1g {background-color: #93d0ff; color: white;} .svelte-1viwdyg {background: #93d0ff;} label.svelte-i3tvor {background: #93d0ff; color: white;} .label.show_textbox_border.svelte-173056l {background-color: white; color: black; border-color: gray; border-style: solid; border-width: 1px} .textarea.svelte-173056l {background-color: white; color: black; border-color: gray; border-style: solid; border-width: 1px} """ gr.ChatInterface( respond, chatbot=gr.Chatbot(height=501), textbox=gr.Textbox(placeholder="Ask about your proteomics data", container=False, scale=7), title=" ", css=css, #examples=["Is satb2 a transcription factor?", # "Where is the prox1 located?", # "What are the levels of expression of Myh9 on the VCN"], #cache_examples=True, ).launch()