import os import requests requests.adapters.DEFAULT_TIMEOUT = 60 from dotenv import load_dotenv, find_dotenv _ = load_dotenv(find_dotenv()) # read local .env file hf_api_key = os.environ['HF_API_KEY'] # Helper function import requests, json from text_generation import Client #FalcomLM-instruct endpoint on the text_generation library URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta" client = Client(URL, timeout=120) #Back to Lesson 2, time flies! import gradio as gr def generate(input, slider): output = client.generate(input, max_new_tokens=slider).generated_text return output def format_chat_prompt(message, chat_history): prompt = "" for turn in chat_history: user_message, bot_message = turn prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}" prompt = f"{prompt}\nUser: {message}\nAssistant:" return prompt def respond(message, chat_history): formatted_prompt = format_chat_prompt(message, chat_history) bot_message = client.generate(formatted_prompt, max_new_tokens=1024, stop_sequences=["\nUser:", "<|endoftext|>"]).generated_text chat_history.append((message, bot_message)) return "", chat_history def loadGUI(): with gr.Blocks() as demo: chatbot = gr.Chatbot(height=240) #just to fit the notebook msg = gr.Textbox(label="Prompt") btn = gr.Button("Submit") clear = gr.ClearButton(components=[msg, chatbot], value="Clear console") btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot]) msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot]) #Press enter to submit gr.close_all() demo.launch(share=True) def main(): loadGUI() if __name__ == "__main__": main()