File size: 6,996 Bytes
a7110dc
f1a072e
1a5bc09
 
 
794f4a2
 
1a5bc09
f1a072e
a7110dc
 
 
794f4a2
 
 
f1a072e
1a5bc09
 
 
 
a7110dc
1a5bc09
794f4a2
a7110dc
 
 
 
794f4a2
f1a072e
a7110dc
 
 
 
 
f1a072e
1a5bc09
 
 
 
 
 
f1a072e
a7110dc
1a5bc09
 
 
 
 
 
 
 
a7110dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a5bc09
f1a072e
a7110dc
f1a072e
a7110dc
 
 
 
 
 
 
 
1a5bc09
a7110dc
 
 
 
 
 
 
 
 
 
 
 
 
 
1a5bc09
794f4a2
 
a7110dc
794f4a2
a7110dc
1a5bc09
794f4a2
 
a7110dc
794f4a2
a7110dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b69f93
f1a072e
a7110dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1a072e
a7110dc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import os, re
import gradio as gr
import torch
import spaces

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from huggingface_hub import login

# =========================
# Variáveis do ambiente
# =========================
BASE_ID = os.getenv("BASE_ID", "mistralai/Mistral-7B-v0.1")
ADAPTER_ID = os.getenv("ADAPTER_ID", "roneymatusp/british-optimizer-mistral-final")
HF_TOKEN = os.getenv("HF_TOKEN")

if HF_TOKEN:
    try:
        login(HF_TOKEN)
    except Exception:
        # Se o token não for necessário (modelo não-gated), segue silencioso.
        pass

# =========================
# Cache de modelo
# =========================
_tok = None
_model = None

def load_model():
    """Carrega Mistral-7B em 4-bit e aplica o LoRA; mantém em cache."""
    global _tok, _model
    if _tok is not None and _model is not None:
        return _tok, _model

    bnb = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

    _tok = AutoTokenizer.from_pretrained(BASE_ID, use_fast=True)
    base = AutoModelForCausalLM.from_pretrained(
        BASE_ID,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        quantization_config=bnb,
    )
    _model = PeftModel.from_pretrained(base, ADAPTER_ID)
    _model.eval()
    return _tok, _model

# =========================
# Política de bloqueio
# (lista simples; ajuste conforme a escola)
# =========================
BANNED = {
    # palavrões/insultos em PT (exemplos)
    "merda","porra","caralho","buceta","puta","puto",
    "viad","bixa","bicha","otario","otário","otaria","otária",
    "idiota","imbecil","burro","burra",
    # acrescente termos específicos da política da escola
}

def violates_policy(text: str) -> bool:
    if not text or len(text.strip()) < 6:
        return True
    t = text.lower()
    return any(b in t for b in BANNED)

# =========================
# Instruções do Otimizador
# =========================
SYSTEM = (
    "You are a PROMPT OPTIMISER for teachers in the UK. "
    "You NEVER answer the user's task or give examples/solutions. "
    "You ONLY return ONE structured prompt that another assistant will answer later. "
    "Use UK spelling and an academic yet concise tone."
)

OPT_TEMPLATE = """Rewrite the user's idea (Portuguese) into exactly ONE optimised prompt for a teaching assistant.

Constraints:
- Headings MUST be in Portuguese EXACTLY as below.
- Content MUST be in UK English (en-GB).
- Do NOT include explanations, solutions, examples, or chit-chat.
- If the idea is vague (e.g., just 'equations'), keep it curriculum-appropriate and generic.
- The assistant may ask up to 3 clarifying questions only if critical gaps remain.

Return ONLY the block below:

Persona: British educator and prompt engineer supporting teachers in UK schools.
Contexto: <brief UK classroom context derived from the user's idea; keep generic if unspecified>
Tarefa: <what the assistant should produce or plan, aligned to the user's intent>
Formato: <bulleted or numbered; resources if any; expected length; approximate timings if relevant>
Critérios: <clarity; UK spelling; curriculum alignment; accessibility (SEN/EAL); inclusivity; retrieval practice>
Idioma de saída: English (United Kingdom)

User idea (pt-BR):
{user_pt}
"""

def _generate(prompt: str, max_new_tokens=280, temperature=0.25) -> str:
    tok, model = load_model()
    inputs = tok(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=0.95,
            pad_token_id=tok.eos_token_id,
        )
    return tok.decode(out[0], skip_special_tokens=True)

def keep_only_block(text: str) -> str:
    """
    Mantém apenas o bloco a partir de 'Persona:' até antes de qualquer
    tokenização extra (User:, Assistant:, ###, ``` etc.). Garante que
    só sai o prompt, nada de respostas.
    """
    m = re.search(r"Persona\s*:", text, flags=re.IGNORECASE)
    if not m:
        # fallback mínimo sempre no formato correto
        return (
            "Persona: British educator and prompt engineer supporting teachers in UK schools.\n"
            "Contexto: UK classroom context (generic).\n"
            "Tarefa: Produce a concise lesson plan outline aligned to the user's intent.\n"
            "Formato: Numbered steps; brief timings; resources if any.\n"
            "Critérios: Clarity; UK spelling; curriculum alignment; inclusivity (SEN/EAL).\n"
            "Idioma de saída: English (United Kingdom)"
        )
    clean = text[m.start():].strip()
    clean = re.split(r"\n\s*(Assistant:|User:|###|```)", clean)[0].strip()
    # Evita vazamentos ao final (repetições ou rodapés).
    return clean

# =========================
# Função pública do Space
# (decorada para ZeroGPU/GPU)
# =========================
@spaces.GPU(duration=120)
def optimise_free_text(user_input: str) -> str:
    if violates_policy(user_input):
        return "fora da política de otimização de prompts"

    instruction = f"{SYSTEM}\n\n" + OPT_TEMPLATE.format(user_pt=user_input.strip())
    raw = _generate(instruction, max_new_tokens=320, temperature=0.22)
    return keep_only_block(raw)

# =========================
# UI — simples, sem chat
# =========================
THEME = gr.themes.Base(
    primary_hue="indigo",
    secondary_hue="red",
)

with gr.Blocks(title="Paulean AI — Otimizador de Prompts (British)", theme=THEME) as demo:
    gr.Markdown(
        "## Paulean AI — Otimizador de Prompts (British)\n"
        "Digite sua ideia **em português** (ex.: *faça uma aula de matemática sobre equações para o IB*). "
        "O sistema **não responde aulas** nem dúvidas — ele **apenas** devolve um **prompt otimizado** "
        "no formato padronizado (**Persona, Contexto, Tarefa, Formato, Critérios, Idioma**).\n\n"
        "**Entradas inadequadas** retornam: `fora da política de otimização de prompts`."
    )
    with gr.Row():
        with gr.Column(scale=1):
            inp = gr.Textbox(
                label="Sua ideia (pt-BR)",
                placeholder="Ex.: Faça uma aula de matemática sobre equações do 2º grau (40-50 min), com exemplos e exercícios...",
                lines=8
            )
            with gr.Row():
                btn = gr.Button("Gerar prompt", variant="primary")
                clr = gr.Button("Limpar")
        with gr.Column(scale=1):
            out = gr.Textbox(
                label="Prompt otimizado (copiar e usar)",
                lines=18,
                show_copy_button=True
            )
    btn.click(optimise_free_text, inputs=inp, outputs=out)
    clr.click(lambda: ("", ""), inputs=None, outputs=[inp, out])

if __name__ == "__main__":
    demo.launch()