Spaces:
Sleeping
Sleeping
| import os | |
| import yaml | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| hf_token = os.getenv("HF_TOKEN") | |
| with open("prompt.yaml", "r", encoding="utf-8") as f: | |
| system_prompt = yaml.safe_load(f)["system_prompt"] | |
| client = InferenceClient( | |
| model="HuggingFaceH4/zephyr-7b-beta", | |
| token=hf_token | |
| ) | |
| # SIMPLE STATELESS VERSION FIRST (IMPORTANT) | |
| def chat(user_input): | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": str(user_input)} | |
| ] | |
| response = client.chat_completion( | |
| messages=messages, | |
| max_tokens=200, | |
| temperature=0.7 | |
| ) | |
| return response.choices[0].message.content | |
| demo = gr.Interface( | |
| fn=chat, | |
| inputs=gr.Textbox(label="Ask me"), | |
| outputs=gr.Textbox(label="Response"), | |
| ) | |
| demo.launch() |