Spaces:
Sleeping
Sleeping
File size: 2,344 Bytes
51c3e2c 6455b35 51c3e2c 6455b35 07e40de 51c3e2c 2ba3a3d 51c3e2c 6455b35 51c3e2c 6455b35 51c3e2c 6455b35 51c3e2c 6455b35 51c3e2c 6455b35 51c3e2c 6455b35 51c3e2c 6455b35 51c3e2c 6455b35 51c3e2c 6455b35 bf989d9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | from typing import List, Optional
import gradio as gr
from datasets import load_dataset
from huggingface_hub import InferenceClient
from dataset.dataset import get_response_from_huggingface_dataset
from prompt.prompt import prompt_for_template, template_bot
from schema.schema import WeonTest
from settings.load_env import load_token
description: str = WeonTest.description
rules: str = WeonTest.rules
behavior: str = WeonTest.comportamento
examples: str = WeonTest.examples
MODEL: str = "meta-llama/Llama-3.2-3B-Instruct"
#TOKEN: str = load_token("token_env")
TEMPLATE_BOT = template_bot()
prompt_template = prompt_for_template(TEMPLATE_BOT)
DATASET = load_dataset("wendellast/weon-messagens")
client: InferenceClient = InferenceClient(model=MODEL)
def respond(
message: str,
history: List[dict],
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
) -> any:
response: Optional[str] = get_response_from_huggingface_dataset(message, DATASET)
if response:
yield response
return
historico = "\n".join(
f"{entry['role'].capitalize()}: {entry['content']}" for entry in history
)
prompt: str = prompt_template.format(
description=description,
regras=rules,
comportamento=behavior,
exemplos=examples,
mensagem=message,
)
print(prompt)
messages: List[dict] = [{"role": "system", "content": prompt}]
response: str = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token: str = message.choices[0].delta.content
response += token
yield response
demo: gr.ChatInterface = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
title="WeOn-BOT",
type="messages",
)
if __name__ == "__main__":
demo.launch(show_error=True)
|