Spaces:

roneymatusp
/

paulean-british-optimizer

Sleeping

File size: 6,996 Bytes

import os, re
import gradio as gr
import torch
import spaces

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from huggingface_hub import login

# =========================
# Variáveis do ambiente
# =========================
BASE_ID = os.getenv("BASE_ID", "mistralai/Mistral-7B-v0.1")
ADAPTER_ID = os.getenv("ADAPTER_ID", "roneymatusp/british-optimizer-mistral-final")
HF_TOKEN = os.getenv("HF_TOKEN")

if HF_TOKEN:
    try:
        login(HF_TOKEN)
    except Exception:
        # Se o token não for necessário (modelo não-gated), segue silencioso.
        pass

# =========================
# Cache de modelo
# =========================
_tok = None
_model = None

def load_model():
    """Carrega Mistral-7B em 4-bit e aplica o LoRA; mantém em cache."""
    global _tok, _model
    if _tok is not None and _model is not None:
        return _tok, _model

    bnb = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

    _tok = AutoTokenizer.from_pretrained(BASE_ID, use_fast=True)
    base = AutoModelForCausalLM.from_pretrained(
        BASE_ID,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        quantization_config=bnb,
    )
    _model = PeftModel.from_pretrained(base, ADAPTER_ID)
    _model.eval()
    return _tok, _model

# =========================
# Política de bloqueio
# (lista simples; ajuste conforme a escola)
# =========================
BANNED = {
    # palavrões/insultos em PT (exemplos)
    "merda","porra","caralho","buceta","puta","puto",
    "viad","bixa","bicha","otario","otário","otaria","otária",
    "idiota","imbecil","burro","burra",
    # acrescente termos específicos da política da escola
}

def violates_policy(text: str) -> bool:
    if not text or len(text.strip()) < 6:
        return True
    t = text.lower()
    return any(b in t for b in BANNED)

# =========================
# Instruções do Otimizador
# =========================
SYSTEM = (
    "You are a PROMPT OPTIMISER for teachers in the UK. "
    "You NEVER answer the user's task or give examples/solutions. "
    "You ONLY return ONE structured prompt that another assistant will answer later. "
    "Use UK spelling and an academic yet concise tone."
)

OPT_TEMPLATE = """Rewrite the user's idea (Portuguese) into exactly ONE optimised prompt for a teaching assistant.

Constraints:
- Headings MUST be in Portuguese EXACTLY as below.
- Content MUST be in UK English (en-GB).
- Do NOT include explanations, solutions, examples, or chit-chat.
- If the idea is vague (e.g., just 'equations'), keep it curriculum-appropriate and generic.
- The assistant may ask up to 3 clarifying questions only if critical gaps remain.

Return ONLY the block below:

Persona: British educator and prompt engineer supporting teachers in UK schools.
Contexto: <brief UK classroom context derived from the user's idea; keep generic if unspecified>
Tarefa: <what the assistant should produce or plan, aligned to the user's intent>
Formato: <bulleted or numbered; resources if any; expected length; approximate timings if relevant>
Critérios: <clarity; UK spelling; curriculum alignment; accessibility (SEN/EAL); inclusivity; retrieval practice>
Idioma de saída: English (United Kingdom)

User idea (pt-BR):
{user_pt}
"""

def _generate(prompt: str, max_new_tokens=280, temperature=0.25) -> str:
    tok, model = load_model()
    inputs = tok(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=0.95,
            pad_token_id=tok.eos_token_id,
        )
    return tok.decode(out[0], skip_special_tokens=True)

def keep_only_block(text: str) -> str:
    """
    Mantém apenas o bloco a partir de 'Persona:' até antes de qualquer
    tokenização extra (User:, Assistant:, ###, ``` etc.). Garante que
    só sai o prompt, nada de respostas.
    """
    m = re.search(r"Persona\s*:", text, flags=re.IGNORECASE)
    if not m:
        # fallback mínimo sempre no formato correto
        return (
            "Persona: British educator and prompt engineer supporting teachers in UK schools.\n"
            "Contexto: UK classroom context (generic).\n"
            "Tarefa: Produce a concise lesson plan outline aligned to the user's intent.\n"
            "Formato: Numbered steps; brief timings; resources if any.\n"
            "Critérios: Clarity; UK spelling; curriculum alignment; inclusivity (SEN/EAL).\n"
            "Idioma de saída: English (United Kingdom)"
        )
    clean = text[m.start():].strip()
    clean = re.split(r"\n\s*(Assistant:|User:|###|```)", clean)[0].strip()
    # Evita vazamentos ao final (repetições ou rodapés).
    return clean

# =========================
# Função pública do Space
# (decorada para ZeroGPU/GPU)
# =========================
@spaces.GPU(duration=120)
def optimise_free_text(user_input: str) -> str:
    if violates_policy(user_input):
        return "fora da política de otimização de prompts"

    instruction = f"{SYSTEM}\n\n" + OPT_TEMPLATE.format(user_pt=user_input.strip())
    raw = _generate(instruction, max_new_tokens=320, temperature=0.22)
    return keep_only_block(raw)

# =========================
# UI — simples, sem chat
# =========================
THEME = gr.themes.Base(
    primary_hue="indigo",
    secondary_hue="red",
)

with gr.Blocks(title="Paulean AI — Otimizador de Prompts (British)", theme=THEME) as demo:
    gr.Markdown(
        "## Paulean AI — Otimizador de Prompts (British)\n"
        "Digite sua ideia **em português** (ex.: *faça uma aula de matemática sobre equações para o IB*). "
        "O sistema **não responde aulas** nem dúvidas — ele **apenas** devolve um **prompt otimizado** "
        "no formato padronizado (**Persona, Contexto, Tarefa, Formato, Critérios, Idioma**).\n\n"
        "**Entradas inadequadas** retornam: `fora da política de otimização de prompts`."
    )
    with gr.Row():
        with gr.Column(scale=1):
            inp = gr.Textbox(
                label="Sua ideia (pt-BR)",
                placeholder="Ex.: Faça uma aula de matemática sobre equações do 2º grau (40-50 min), com exemplos e exercícios...",
                lines=8
            )
            with gr.Row():
                btn = gr.Button("Gerar prompt", variant="primary")
                clr = gr.Button("Limpar")
        with gr.Column(scale=1):
            out = gr.Textbox(
                label="Prompt otimizado (copiar e usar)",
                lines=18,
                show_copy_button=True
            )
    btn.click(optimise_free_text, inputs=inp, outputs=out)
    clr.click(lambda: ("", ""), inputs=None, outputs=[inp, out])

if __name__ == "__main__":
    demo.launch()