| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| """ | |
| For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference | |
| """ | |
| from optimum.intel import OVModelForCausalLM | |
| from transformers import AutoTokenizer, pipeline | |
| model_id = "HelloSun/Qwen2.5-0.5B-Instruct-openvino" | |
| model = OVModelForCausalLM.from_pretrained(model_id) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| def respond( | |
| message, | |
| ): | |
| results = pipe(message) | |
| yield results | |
| """ | |
| For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface | |
| """ | |
| demo = gr.ChatInterface( | |
| respond, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |