LLM_Poet / app.py
ez7051's picture
Update app.py
f586ff8 verified
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from copy import deepcopy
import gradio as gr
inst_llm = ChatNVIDIA(model="mixtral_8x7b") ### Feel free to change the models
prompt1 = ChatPromptTemplate.from_messages([
("system", "Only respond in rhymes"),
("user", "{input}")
])
prompt2 = ChatPromptTemplate.from_messages([
("system", (
"Only responding in rhyme, change the topic of the input poem to be about {topic}!"
" Make it happy! Try to keep the same sentence structure, but make sure it's easy to recite!"
" Try not to rhyme a word with itself."
)),
("user", "{input}")
])
## These are the main chains, constructed here as modules of functionality.
chain1 = prompt1 | inst_llm | StrOutputParser() ## only expects input
chain2 = prompt2 | inst_llm | StrOutputParser() ## expects both input and topic
################################################################################
def rhyme_chat2_stream(message, history, return_buffer=True):
'''This is a generator function, where each call will yield the next entry'''
first_poem = None
for entry in history:
if entry[0] and entry[1]:
## If a generation occurred as a direct result of a user input,
## keep that response (the first poem generated) and break out
first_poem = entry[1]
break
if first_poem is None:
## First Case: There is no initial poem generated. Better make one up!
buffer = "Oh! I can make a wonderful poem about that! Let me think!\n\n"
yield buffer
## iterate over stream generator for first generation
inst_out = ""
chat_gen = chain1.stream({"input" : message})
for token in chat_gen:
inst_out += token
buffer += token
yield buffer if return_buffer else token
passage = "\n\nNow let me rewrite it with a different focus! What should the new focus be?"
buffer += passage
yield buffer if return_buffer else passage
else:
## Subsequent Cases: There is a poem to start with. Generate a similar one with a new topic!
buffer = f"Sure! Here you go!\n\n"
yield buffer
return ## <- TODO: Early termination for generators. Comment this out
## iterate over stream generator for second generation
chat_gen = chain2.stream({"input" : first_poem, "topic" : message})
for token in chat_gen:
buffer += token
yield buffer if return_buffer else token
passage = "\n\nThis is fun! Give me another topic!"
buffer += passage
yield buffer if return_buffer else passage
################################################################################
## Below: This is a small-scale simulation of the gradio routine.
def queue_fake_streaming_gradio(chat_stream, history = [], max_questions=5):
## Mimic of the gradio initialization routine, where a set of starter messages can be printed off
for human_msg, agent_msg in history:
if human_msg: print("\n[ Human ]:", human_msg)
if agent_msg: print("\n[ Agent ]:", agent_msg)
## Mimic of the gradio loop with an initial message from the agent.
for _ in range(max_questions):
message = input("\n[ Human ]: ")
print("\n[ Agent ]: ")
history_entry = [message, ""]
for token in chat_stream(message, history, return_buffer=False):
print(token, end='')
history_entry[1] += token
history += [history_entry]
print("\n")
## history is of format [[User response 0, Bot response 0], ...]
history = [[None, "Let me help you make a poem! What would you like for me to write?"]]
## Simple way to initialize history for the ChatInterface
chatbot = gr.Chatbot(value = [[None, "Let me help you make a poem! What would you like for me to write?"]])
## IF USING COLAB: Share=False is faster
gr.ChatInterface(rhyme_chat2_stream, chatbot=chatbot).queue().launch(debug=True, share=True)