File size: 2,344 Bytes
51c3e2c
 
6455b35
51c3e2c
6455b35
 
07e40de
 
 
 
51c3e2c
 
 
 
 
 
 
 
 
 
 
 
 
2ba3a3d
51c3e2c
 
6455b35
 
 
51c3e2c
 
 
 
 
 
 
 
 
 
 
6455b35
51c3e2c
 
 
6455b35
51c3e2c
 
 
 
 
 
 
6455b35
51c3e2c
 
 
 
6455b35
 
 
 
 
 
 
 
51c3e2c
6455b35
 
 
 
51c3e2c
6455b35
 
51c3e2c
6455b35
 
 
 
 
 
 
 
 
 
51c3e2c
 
6455b35
 
 
 
bf989d9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from typing import List, Optional

import gradio as gr
from datasets import load_dataset
from huggingface_hub import InferenceClient

from dataset.dataset import get_response_from_huggingface_dataset
from prompt.prompt import prompt_for_template, template_bot
from schema.schema import WeonTest
from settings.load_env import load_token

description: str = WeonTest.description
rules: str = WeonTest.rules
behavior: str = WeonTest.comportamento
examples: str = WeonTest.examples


MODEL: str = "meta-llama/Llama-3.2-3B-Instruct"
#TOKEN: str = load_token("token_env")

TEMPLATE_BOT = template_bot()
prompt_template = prompt_for_template(TEMPLATE_BOT)

DATASET = load_dataset("wendellast/weon-messagens")

client: InferenceClient = InferenceClient(model=MODEL)


def respond(
    message: str,
    history: List[dict],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
) -> any:
    response: Optional[str] = get_response_from_huggingface_dataset(message, DATASET)
    if response:
        yield response
        return

    historico = "\n".join(
        f"{entry['role'].capitalize()}: {entry['content']}" for entry in history
    )

    prompt: str = prompt_template.format(
        description=description,
        regras=rules,
        comportamento=behavior,
        exemplos=examples,
        mensagem=message,
    )

    print(prompt)

    messages: List[dict] = [{"role": "system", "content": prompt}]
    response: str = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token: str = message.choices[0].delta.content
        response += token
        yield response


demo: gr.ChatInterface = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    title="WeOn-BOT",
    type="messages",
)


if __name__ == "__main__":
    demo.launch(show_error=True)