target-poc / app.py
gerglitzen's picture
stable
47c7f9e
raw
history blame
5.51 kB
import gradio as gr
import os
from langchain.callbacks.base import BaseCallbackHandler
from collections.abc import Generator
from queue import Queue, Empty
from threading import Thread
from dotenv import load_dotenv
load_dotenv()
from call_openai import call_openai
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
import pinecone
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
OPENAI_API_KEY=os.environ["OPENAI_API_KEY"]
PINECONE_API_KEY=os.environ["PINECONE_API_KEY"]
PINECONE_ENV=os.environ["PINECONE_ENV"]
PINECONE_INDEX=os.environ["PINECONE_INDEX"]
class QueueCallback(BaseCallbackHandler):
"""Callback handler for streaming LLM responses to a queue."""
def __init__(self, q):
self.q = q
def on_llm_new_token(self, token: str, **kwargs: any) -> None:
self.q.put(token)
def on_llm_end(self, *args, **kwargs: any) -> None:
return self.q.empty()
# TOOL
#####################################################################
llm = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0)
template = """
You are an expert research assistant. You can access information about articles via your tool.
Use information ONLY from this tool. Do not invent or add any more knowladge, be strict for the articles.
{instuction}
User: {user}
--------
{content}
"""
prompt = PromptTemplate(
input_variables=["instuction", "user", "content"],
template=template,
)
pinecone.init(
api_key=PINECONE_API_KEY,
environment=PINECONE_ENV
)
index = pinecone.Index(PINECONE_INDEX)
embedder = OpenAIEmbeddings()
class PineconeSearch:
docsearch = None
topk = 2
def __init__(
self,
namespace,
topk
):
self.docsearch = Pinecone.from_existing_index(PINECONE_INDEX, embedder, namespace=namespace)
self.topk=topk
def __call__(self,query):
docs = self.docsearch.similarity_search(query=query, k=self.topk)
context = ""
for doc in docs:
context += f"Content:\n{doc.page_content}\n"
context += f"Source: {doc.metadata['url']}\n"
context += "----"
return context
def query_tool(category, pinecone_topk, query):
data = {
"1_D3_receptor": "demo-richter-target-400-30-1",
"2_dopamine": "demo-richter-target-400-30-2",
"3_mitochondrial": "demo-richter-target-400-30-3"
}
pinecone_namespace = data[category]
search_tool = PineconeSearch(
namespace=pinecone_namespace,
topk=pinecone_topk,
)
return search_tool(query)
def print_token_and_price(response):
inp = sum(response["token_usage"]["prompt_tokens"])
out = sum( response["token_usage"]["completion_tokens"])
print(f"Token usage: {inp+out}")
price = inp/1000*0.01 + out/1000*0.03
print(f"Total price: {price*370:.2f} Ft")
print("===================================")
agent_prompt = """You are an expert research assistant. You can access information about articles via your tool.
Use information ONLY from this tool. Do not invent or add any more knowladge, be strict for the articles."""
def stream(input_text, history, user_prompt, topic, topk) -> Generator:
# Create a Queue
q = Queue()
job_done = object()
# Create a funciton to call - this will run in a thread
def task():
#chain = LLMChain(llm=llm, prompt=prompt, callbacks=[QueueCallback(q)])
tool_resp = query_tool(topic, topk, input_text)
response = call_openai(
messages=[{"role": "system", "content": agent_prompt},
{"role": "system", "content": user_prompt},
{"role": "user", "content": input_text},
{"role": "system", "content": tool_resp}
],
stream="token",
model="gpt-4-1106-preview",
callback=QueueCallback(q)
)
print(response)
#print_token_and_price(response=response)
q.put(job_done)
# Create a thread and start the function
t = Thread(target=task)
t.start()
content = ""
# Get each new token from the queue and yield for our generator
counter = 0
while True:
try:
next_token = q.get(True, timeout=1)
if next_token is job_done:
break
content += next_token
counter += 1
if counter == 20:
content += "\n"
counter = 0
if "\n" in next_token:
counter = 0
yield next_token, content
except Empty:
continue
def ask_llm(message, history, prompt, topic, topk):
for next_token, content in stream(message, history, prompt, topic, topk):
yield(content)
agent_prompt_textbox = gr.Textbox(
label = "Set the behaviour of the agent",
lines = 2,
value = "Make your brief answer in bullet points."
)
namespace_drobdown = gr.Dropdown(
["1_D3_receptor", "2_dopamine", "3_mitochondrial"],
label="Choose a topic",
value="1_D3_receptor"
)
topk_slider = gr.Slider(
minimum=10,
maximum=100,
value=70,
step=10
)
additional_inputs = [agent_prompt_textbox, namespace_drobdown, topk_slider]
chatInterface = gr.ChatInterface(
fn=ask_llm,
additional_inputs=additional_inputs,
additional_inputs_accordion_name="Agent parameters"
).queue().launch()