File size: 6,996 Bytes
a7110dc f1a072e 1a5bc09 794f4a2 1a5bc09 f1a072e a7110dc 794f4a2 f1a072e 1a5bc09 a7110dc 1a5bc09 794f4a2 a7110dc 794f4a2 f1a072e a7110dc f1a072e 1a5bc09 f1a072e a7110dc 1a5bc09 a7110dc 1a5bc09 f1a072e a7110dc f1a072e a7110dc 1a5bc09 a7110dc 1a5bc09 794f4a2 a7110dc 794f4a2 a7110dc 1a5bc09 794f4a2 a7110dc 794f4a2 a7110dc 4b69f93 f1a072e a7110dc f1a072e a7110dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import os, re
import gradio as gr
import torch
import spaces
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from huggingface_hub import login
# =========================
# Variáveis do ambiente
# =========================
BASE_ID = os.getenv("BASE_ID", "mistralai/Mistral-7B-v0.1")
ADAPTER_ID = os.getenv("ADAPTER_ID", "roneymatusp/british-optimizer-mistral-final")
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
try:
login(HF_TOKEN)
except Exception:
# Se o token não for necessário (modelo não-gated), segue silencioso.
pass
# =========================
# Cache de modelo
# =========================
_tok = None
_model = None
def load_model():
"""Carrega Mistral-7B em 4-bit e aplica o LoRA; mantém em cache."""
global _tok, _model
if _tok is not None and _model is not None:
return _tok, _model
bnb = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16,
)
_tok = AutoTokenizer.from_pretrained(BASE_ID, use_fast=True)
base = AutoModelForCausalLM.from_pretrained(
BASE_ID,
torch_dtype=torch.bfloat16,
device_map="auto",
quantization_config=bnb,
)
_model = PeftModel.from_pretrained(base, ADAPTER_ID)
_model.eval()
return _tok, _model
# =========================
# Política de bloqueio
# (lista simples; ajuste conforme a escola)
# =========================
BANNED = {
# palavrões/insultos em PT (exemplos)
"merda","porra","caralho","buceta","puta","puto",
"viad","bixa","bicha","otario","otário","otaria","otária",
"idiota","imbecil","burro","burra",
# acrescente termos específicos da política da escola
}
def violates_policy(text: str) -> bool:
if not text or len(text.strip()) < 6:
return True
t = text.lower()
return any(b in t for b in BANNED)
# =========================
# Instruções do Otimizador
# =========================
SYSTEM = (
"You are a PROMPT OPTIMISER for teachers in the UK. "
"You NEVER answer the user's task or give examples/solutions. "
"You ONLY return ONE structured prompt that another assistant will answer later. "
"Use UK spelling and an academic yet concise tone."
)
OPT_TEMPLATE = """Rewrite the user's idea (Portuguese) into exactly ONE optimised prompt for a teaching assistant.
Constraints:
- Headings MUST be in Portuguese EXACTLY as below.
- Content MUST be in UK English (en-GB).
- Do NOT include explanations, solutions, examples, or chit-chat.
- If the idea is vague (e.g., just 'equations'), keep it curriculum-appropriate and generic.
- The assistant may ask up to 3 clarifying questions only if critical gaps remain.
Return ONLY the block below:
Persona: British educator and prompt engineer supporting teachers in UK schools.
Contexto: <brief UK classroom context derived from the user's idea; keep generic if unspecified>
Tarefa: <what the assistant should produce or plan, aligned to the user's intent>
Formato: <bulleted or numbered; resources if any; expected length; approximate timings if relevant>
Critérios: <clarity; UK spelling; curriculum alignment; accessibility (SEN/EAL); inclusivity; retrieval practice>
Idioma de saída: English (United Kingdom)
User idea (pt-BR):
{user_pt}
"""
def _generate(prompt: str, max_new_tokens=280, temperature=0.25) -> str:
tok, model = load_model()
inputs = tok(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
out = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
top_p=0.95,
pad_token_id=tok.eos_token_id,
)
return tok.decode(out[0], skip_special_tokens=True)
def keep_only_block(text: str) -> str:
"""
Mantém apenas o bloco a partir de 'Persona:' até antes de qualquer
tokenização extra (User:, Assistant:, ###, ``` etc.). Garante que
só sai o prompt, nada de respostas.
"""
m = re.search(r"Persona\s*:", text, flags=re.IGNORECASE)
if not m:
# fallback mínimo sempre no formato correto
return (
"Persona: British educator and prompt engineer supporting teachers in UK schools.\n"
"Contexto: UK classroom context (generic).\n"
"Tarefa: Produce a concise lesson plan outline aligned to the user's intent.\n"
"Formato: Numbered steps; brief timings; resources if any.\n"
"Critérios: Clarity; UK spelling; curriculum alignment; inclusivity (SEN/EAL).\n"
"Idioma de saída: English (United Kingdom)"
)
clean = text[m.start():].strip()
clean = re.split(r"\n\s*(Assistant:|User:|###|```)", clean)[0].strip()
# Evita vazamentos ao final (repetições ou rodapés).
return clean
# =========================
# Função pública do Space
# (decorada para ZeroGPU/GPU)
# =========================
@spaces.GPU(duration=120)
def optimise_free_text(user_input: str) -> str:
if violates_policy(user_input):
return "fora da política de otimização de prompts"
instruction = f"{SYSTEM}\n\n" + OPT_TEMPLATE.format(user_pt=user_input.strip())
raw = _generate(instruction, max_new_tokens=320, temperature=0.22)
return keep_only_block(raw)
# =========================
# UI — simples, sem chat
# =========================
THEME = gr.themes.Base(
primary_hue="indigo",
secondary_hue="red",
)
with gr.Blocks(title="Paulean AI — Otimizador de Prompts (British)", theme=THEME) as demo:
gr.Markdown(
"## Paulean AI — Otimizador de Prompts (British)\n"
"Digite sua ideia **em português** (ex.: *faça uma aula de matemática sobre equações para o IB*). "
"O sistema **não responde aulas** nem dúvidas — ele **apenas** devolve um **prompt otimizado** "
"no formato padronizado (**Persona, Contexto, Tarefa, Formato, Critérios, Idioma**).\n\n"
"**Entradas inadequadas** retornam: `fora da política de otimização de prompts`."
)
with gr.Row():
with gr.Column(scale=1):
inp = gr.Textbox(
label="Sua ideia (pt-BR)",
placeholder="Ex.: Faça uma aula de matemática sobre equações do 2º grau (40-50 min), com exemplos e exercícios...",
lines=8
)
with gr.Row():
btn = gr.Button("Gerar prompt", variant="primary")
clr = gr.Button("Limpar")
with gr.Column(scale=1):
out = gr.Textbox(
label="Prompt otimizado (copiar e usar)",
lines=18,
show_copy_button=True
)
btn.click(optimise_free_text, inputs=inp, outputs=out)
clr.click(lambda: ("", ""), inputs=None, outputs=[inp, out])
if __name__ == "__main__":
demo.launch() |