import os import io import base64 import requests requests.adapters.DEFAULT_TIMEOUT = 60 from dotenv import load_dotenv, find_dotenv _ = load_dotenv(find_dotenv()) # read local .env file hf_api_key = os.environ['HF_API_KEY'] # Helper function import requests, json from text_generation import Client #FalcomLM-instruct endpoint on the text_generation library URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta" client = Client(URL, timeout=120) #Back to Lesson 2, time flies! import gradio as gr def generate(input, slider): output = client.generate(input, max_new_tokens=slider).generated_text return output def loadGUI(): demo = gr.Interface(fn=generate, inputs=[gr.Textbox(label="Prompt"), gr.Slider(label="Max new tokens", value=20, maximum=1024, minimum=1)], outputs=[gr.Textbox(label="Completion")]) gr.close_all() demo.launch(share=True) def main(): loadGUI() if __name__ == "__main__": main()