File size: 3,581 Bytes
da49834
ee3fc1f
80df02d
ee3fc1f
80df02d
1bd3a70
 
 
ca723d0
da49834
80df02d
1bd3a70
9b07fbe
1bd3a70
ee3fc1f
da49834
ca723d0
2b86145
 
da49834
80df02d
9b07fbe
ca723d0
9b07fbe
ca723d0
ee3fc1f
ca723d0
da49834
5e2ce1e
0e34121
80df02d
ee3fc1f
 
1bd3a70
 
 
0e34121
 
 
 
 
 
 
 
 
 
ee3fc1f
 
0e34121
 
 
ca723d0
ee3fc1f
5e2ce1e
545b229
 
 
 
 
 
 
 
 
 
ee3fc1f
5e2ce1e
6466616
5e2ce1e
 
 
 
ee3fc1f
5e2ce1e
ee3fc1f
1bd3a70
 
 
 
 
 
0e34121
 
 
9b07fbe
0e34121
1bd3a70
 
 
 
 
 
 
 
9b07fbe
1bd3a70
 
 
 
 
9b07fbe
1bd3a70
 
 
 
 
 
 
0e34121
 
1bd3a70
 
0e34121
1bd3a70
 
9b07fbe
0e34121
1bd3a70
0e34121
1bd3a70
 
 
0e34121
1bd3a70
 
 
0e34121
1bd3a70
 
 
 
9b07fbe
 
 
 
 
0e34121
9b07fbe
1bd3a70
 
80df02d
0e34121
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# =========================
# CONFIG
# =========================
MODEL_ID = "google/gemma-2-2b-it"
HF_TOKEN = os.environ.get("HF_TOKEN")

# =========================
# LOAD TOKENIZER
# =========================
print("🔄 Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    token=HF_TOKEN,
    use_fast=False
)

# =========================
# LOAD MODEL
# =========================
print("🔄 Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    token=HF_TOKEN,
    torch_dtype=torch.float32,  # pode trocar pra bfloat16 se tiver GPU
    device_map="auto"
)
model.eval()

# =========================
# CHAT FUNCTION
# =========================
def chat(user_input, system_prompt, temperature, top_p, max_tokens):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_input},
    ]

    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    inputs = tokenizer(
        prompt,
        return_tensors="pt"
    ).to(model.device)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=int(max_tokens),
            do_sample=True,
            temperature=float(temperature),
            top_p=float(top_p),
            repetition_penalty=1.1,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id
        )

    generated_tokens = output[0][inputs["input_ids"].shape[-1]:]

    decoded = tokenizer.decode(
        generated_tokens,
        skip_special_tokens=True
    )

    return decoded.strip()

# =========================
# GRADIO UI
# =========================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # 🐕 DogeAI v1.0  
        Modelo **experimental**, pequeno e focado em respostas claras.  
        ⚠️ Pode errar ou alucinar  
        ❌ Não use como fonte confiável  
        ✅ Use para estudo e experimentação
        """
    )

    with gr.Row():
        with gr.Column(scale=3):
            user_input = gr.Textbox(
                lines=5,
                label="Mensagem",
                placeholder="Converse com o DogeAI 🐶"
            )

            submit = gr.Button("Enviar 🚀")

            output = gr.Textbox(
                lines=14,
                label="Resposta do modelo"
            )

        with gr.Column(scale=2):
            system_prompt = gr.Textbox(
                lines=6,
                value=(
                    "Você é o DogeAI, um modelo experimental e honesto. "
                    "Se não souber algo, diga claramente que não sabe. "
                    "Não invente fatos."
                ),
                label="System Prompt"
            )

            gr.Markdown("### ⚙️ Hiperparâmetros")

            temperature = gr.Slider(
                0.2, 1.5, value=0.7, step=0.05, label="Temperature"
            )

            top_p = gr.Slider(
                0.3, 1.0, value=0.9, step=0.05, label="Top-p"
            )

            max_tokens = gr.Slider(
                32, 512, value=200, step=8, label="Max tokens"
            )

    submit.click(
        chat,
        inputs=[
            user_input,
            system_prompt,
            temperature,
            top_p,
            max_tokens
        ],
        outputs=output
    )

demo.launch()