Spaces:
Paused
Paused
| import chainlit as cl | |
| from huggingface_hub import AsyncInferenceClient | |
| #load model | |
| API_TOKEN = "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" | |
| headers = {"Authorization": f"Bearer {API_TOKEN}","Content-Type": "application/json"} | |
| API_URL = "https://kfsb1xfskc2136wg.eu-west-1.aws.endpoints.huggingface.cloud" | |
| client = AsyncInferenceClient(model=API_URL,token=API_TOKEN) | |
| async def main(): | |
| cl.user_session.set("history", []) | |
| msg = cl.Message(content=f"Loading Chat please wait ...") | |
| await msg.send() | |
| # Let the user know that the system is ready | |
| await msg.update(content=f"Chat has been loaded. You can now ask questions!") | |
| return | |
| async def main(message: str): | |
| h = cl.user_session.get("history") | |
| h.append("<|prompter|>"+message+"<|endoftext|><|assistant|>") | |
| resp = "" | |
| msg = cl.Message(content="") | |
| async for token in await client.text_generation("".join(h), stream=True,max_new_tokens =250): | |
| if token!="<|endoftext|>": | |
| print(token, end="") | |
| resp += token | |
| await msg.stream_token(token) | |
| h.append(resp+"<|endoftext|>") | |
| cl.user_session.set("history",h) | |
| print(h) | |
| await msg.send() | |