Spaces:
Sleeping
Sleeping
File size: 1,912 Bytes
d9003d6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | import os
import requests
requests.adapters.DEFAULT_TIMEOUT = 60
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
hf_api_key = os.environ['HF_API_KEY']
# Helper function
import requests, json
from text_generation import Client
#FalcomLM-instruct endpoint on the text_generation library
URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
client = Client(URL, timeout=120)
#Back to Lesson 2, time flies!
import gradio as gr
def generate(input, slider):
output = client.generate(input, max_new_tokens=slider).generated_text
return output
def format_chat_prompt(message, chat_history):
prompt = ""
for turn in chat_history:
user_message, bot_message = turn
prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
prompt = f"{prompt}\nUser: {message}\nAssistant:"
return prompt
def respond(message, chat_history):
formatted_prompt = format_chat_prompt(message, chat_history)
bot_message = client.generate(formatted_prompt,
max_new_tokens=1024,
stop_sequences=["\nUser:", "<|endoftext|>"]).generated_text
chat_history.append((message, bot_message))
return "", chat_history
def loadGUI():
with gr.Blocks() as demo:
chatbot = gr.Chatbot(height=240) #just to fit the notebook
msg = gr.Textbox(label="Prompt")
btn = gr.Button("Submit")
clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")
btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot]) #Press enter to submit
gr.close_all()
demo.launch(share=True)
def main():
loadGUI()
if __name__ == "__main__":
main()
|