Spaces:
Paused
Paused
| import gradio as gr | |
| from huggingface_hub import InferenceClient, login | |
| import random | |
| from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline | |
| from langchain.schema import AIMessage, HumanMessage | |
| import os | |
| login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"]) | |
| llm = HuggingFaceEndpoint( | |
| repo_id="HuggingFaceH4/zephyr-7b-beta", | |
| task="text-generation", | |
| max_new_tokens=512, | |
| do_sample=False, | |
| repetition_penalty=1.03, | |
| ) | |
| model = ChatHuggingFace(llm=llm) | |
| def predict(message, history): | |
| history_langchain_format = [] | |
| for msg in history: | |
| if msg['role'] == "user": | |
| history_langchain_format.append(HumanMessage(content=msg['content'])) | |
| elif msg['role'] == "assistant": | |
| history_langchain_format.append(AIMessage(content=msg['content'])) | |
| history_langchain_format.append(HumanMessage(content=message)) | |
| gpt_response = model.invoke(history_langchain_format) | |
| return gpt_response.content | |
| demo = gr.ChatInterface( | |
| predict, | |
| type="messages" | |
| ) | |
| demo.launch() |