Spaces:

leicam
/

twettermaker

Sleeping

File size: 16,553 Bytes

# -*- coding: utf-8 -*-
"""
Tweet Image Web App — Hugging Face Spaces (Gradio)
--------------------------------------------------
- Converte o app desktop em uma interface web Gradio para rodar em Spaces.
- Mantém o pipeline EXTRATIVO (não inventa palavras) e a lógica de seleção de frases.
- Gera imagens no estilo "tweet" e permite baixar um ZIP com todas.

Como rodar localmente:
    pip install -r requirements.txt
    python app.py
    # acesse o link http://127.0.0.1:7860

Em Hugging Face Spaces:
    - Crie um Space com SDK = Gradio (Python 3.10+).
    - Faça upload deste arquivo, do requirements.txt e README.md.
    - O Space inicia automaticamente.
"""

import io, re, os, zipfile, random, datetime, tempfile
from typing import List, Optional, Tuple

# Imaging / NLP
from PIL import Image, ImageDraw, ImageFont, ImageOps
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from keybert import KeyBERT
import requests

import gradio as gr

# --------------------------
# CONFIG GERAL (fonts/estilo)
# --------------------------
CANVAS_W = 1600
PADDING = 80
BG_COLOR = (255, 255, 255)
TEXT_COLOR = (15, 20, 25)
HANDLE_COLOR = (83, 100, 113)
META_COLOR = (83, 100, 113)
DIVIDER_COLOR = (239, 243, 244)

# Ajuste para Linux do container dos Spaces (usa DejaVu)
FONT_NAME_BOLD_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
FONT_NAME_REG_PATH  = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
FONT_BODY_REG_PATH  = FONT_NAME_REG_PATH

FS_NAME = 54
FS_HANDLE = 46
FS_BODY = 60
FS_META = 42
FS_METRICS = 44
AVATAR_SIZE = 140
GUTTER = 32
LINE_SPACING = 10

DEVICE_POOL = ["Twitter for iPhone", "Twitter for Android", "Twitter Web App"]

# --------------------------
# Font helper
# --------------------------
def font(path, size):
    try:
        return ImageFont.truetype(path, size)
    except Exception:
        return ImageFont.load_default()

FONT_NAME_BOLD = font(FONT_NAME_BOLD_PATH, FS_NAME)
FONT_NAME_REG  = font(FONT_NAME_REG_PATH,  FS_HANDLE)
FONT_BODY      = font(FONT_BODY_REG_PATH,  FS_BODY)
FONT_META      = font(FONT_NAME_REG_PATH,  FS_META)
FONT_METRICS   = font(FONT_NAME_REG_PATH,  FS_METRICS)

# --------------------------
# MODELOS NLP (carregados 1x)
# --------------------------
EMB_MODEL = None
KW_MODEL = None

def load_models():
    global EMB_MODEL, KW_MODEL
    if EMB_MODEL is None or KW_MODEL is None:
        EMB_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
        KW_MODEL = KeyBERT(EMB_MODEL)

STRONG_TOKENS = {
    "como","por quê","por que","aprendi","nunca","sempre","hoje","resultado",
    "prova","segredo","erro","verdade","atenção","cuidado","evite","descobri",
    "funciona","dados","passo","ganhar","perder","crescer","acertar",
    "fácil","difícil","exemplo","estratégia","tática","processo","prático"
}
BEGIN_BAD = r'^(e|mas|ou|então|daí|aí|só que)\b'

# --------------------------
# UTIL: limpeza/normalização
# --------------------------

# stopwords PT (essenciais) + hedges/fillers. Mantemos negações e números.
PT_STOP = {
    'a','à','às','o','os','as','um','uma','uns','umas','de','do','da','dos','das','no','na','nos','nas','em','por','para','pra','com','sem','sobre','entre','até','após','antes','desde',
    'que','se','quando','onde','como','qual','quais','quanto','tanto','toda','todo','todas','todos','cada','mais','menos','muito','muitos','muita','muitas',
    'já','ainda','também','só','pois','porque','porquê','né','tipo','basicamente','literalmente','na','no','bem','aliás','então','daí','aí','talvez','acho','meio','um pouco','depois','antes',
    'né?','ok','ok?','certo','certo?','assim','coisa','coisas','etc'
}

FILLER_PHRASES = [
    'na verdade','de verdade','de certa forma','no final do dia','de alguma forma','de um jeito','por exemplo','para ser sincero','pra ser sincero','para falar a verdade'
]

WEAK_ADVERBS = {'talvez','acho','quase','apenas','somente','só','um pouco','meio'}
NEGATIONS = {'não','nunca','jamais','sem'}

LEAD_TRIM = re.compile(r'^(e|mas|ou|então|daí|aí|só que|agora|bom|olha)\b\s*', re.I)
MULTI_SPACE = re.compile(r'\s{2,}')
SPACE_PUNCT = re.compile(r'\s+([,.;:!?])')

TOKEN_RE = re.compile(r"\w+|[\.,;:!?()]|[–—-]", re.U)

def safe_tokens(text: str) -> List[str]:
    return TOKEN_RE.findall(text)

def is_number(tok: str) -> bool:
    return bool(re.fullmatch(r"\d+[\d.,]*", tok))

def impact_rewrite_extractive(text: str, strength: int = 50) -> str:
    strength = max(0, min(100, strength))
    keep_ratio = 1.0 - (0.25 + 0.55 * (strength/100.0))  # 0.75..0.2

    s = text.strip()
    s = LEAD_TRIM.sub('', s)
    for fp in FILLER_PHRASES:
        s = re.sub(rf"\b{re.escape(fp)}\b", '', s, flags=re.I)
    s = MULTI_SPACE.sub(' ', s)

    toks = safe_tokens(s)

    scored: List[Tuple[float, int, str]] = []
    for i, t in enumerate(toks):
        tl = t.lower()
        if tl in NEGATIONS or is_number(tl):
            sc = 1.2
        elif tl in STRONG_TOKENS:
            sc = 1.1
        elif tl in WEAK_ADVERBS or tl in PT_STOP:
            sc = 0.2
        elif re.fullmatch(r'[.,;:!?()]', t) or re.fullmatch(r'[–—-]', t):
            sc = 0.6
        else:
            sc = 0.9
        scored.append((sc, i, t))

    n_words = sum(1 for t in toks if re.match(r'\w', t))
    target_words = max(4, int(n_words * keep_ratio))

    word_items = [(sc, i, t) for (sc,i,t) in scored if re.match(r'\w', t)]
    word_items.sort(key=lambda x: (-x[0], x[1]))
    keep_idx = sorted(i for _, i, _ in word_items[:target_words])
    keep_set = set(keep_idx)

    out: List[str] = []
    for i, t in enumerate(toks):
        if re.match(r'\w', t):
            if i in keep_set:
                out.append(t)
        else:
            if out and (i+1 < len(toks) and any(j in keep_set for j in range(i+1, min(i+3, len(toks))))):
                out.append(t)

    sent = ' '.join(out)
    sent = SPACE_PUNCT.sub(r'\1', sent)
    sent = MULTI_SPACE.sub(' ', sent).strip()

    if sent:
        sent = sent[0].upper() + sent[1:]
    return sent

# --------------------------
# FRASES CANDIDATAS
# --------------------------
def split_sentences(text: str):
    parts = re.split(r'(?<=[\.!\?:;])\s+|\n+', text.strip())
    return [p.strip() for p in parts if p.strip()]

def generate_candidates(parts: List[str], max_len=240):
    cand, n = [], len(parts)
    for i in range(n):
        one = parts[i]
        if len(one) <= max_len: cand.append(one)
        if i+1 < n:
            two = (parts[i]+" "+parts[i+1]).strip()
            if len(two) <= max_len: cand.append(two)
        if i+2 < n:
            three = (parts[i]+" "+parts[i+1]+" "+parts[i+2]).strip()
            if len(three) <= max_len: cand.append(three)
    seen, out = set(), []
    for c in cand:
        if c not in seen:
            out.append(c); seen.add(c)
    return out

# --------------------------
# SCORING INTELIGENTE
# --------------------------
def phrase_score(phrase, idx, keyphrases, doc_emb, ph_emb, max_len=240):
    L = len(phrase)
    center = 160
    len_score = max(0, 1 - abs(L-center)/(max_len-center))

    kp_bonus = 0.0
    lo = phrase.lower()
    for kp,_ in keyphrases:
        if all(w in lo for w in kp.split()):
            kp_bonus += 1.0
    kp_score = min(1.0, kp_bonus/3.0)

    rel = float(cosine_similarity(doc_emb, ph_emb)[0][0])
    rel_score = (rel + 1)/2

    words = re.findall(r'\w+', lo, flags=re.UNICODE)
    strong_hits = sum(1 for w in words if w in STRONG_TOKENS)
    punct_hits = len(re.findall(r'[,:;()]', phrase)) + len(re.findall(r'[–—-]', phrase))
    qmark = "?" in phrase
    exclam = "!" in phrase
    first_person = bool(re.search(r'\b(eu|meu|minha|aprendi|descobri)\b', lo))
    imperative = bool(re.match(r'^(faça|evite|pare|comece|teste|use|mude|foque|aprenda)\b', lo))

    style = 0.2*strong_hits + 0.05*punct_hits
    if qmark: style += 0.2
    if exclam: style += 0.1
    if first_person: style += 0.15
    if imperative: style += 0.2
    style_score = min(1.0, 0.4 + style)

    pen = 0.0
    if re.match(BEGIN_BAD, lo): pen += 0.25
    if L < 60: pen += 0.2

    pos_score = max(0.5, 1.0 - 0.02*idx)

    total = (1.6*len_score + 1.6*rel_score + 1.3*style_score + 1.0*kp_score + 0.5*pos_score) - pen
    return total

def pick_best_phrases(text: str, max_len=240, top_k=3, impact_strength: int = 50):
    load_models()
    parts = split_sentences(text)
    cands = generate_candidates(parts, max_len)
    if not cands:
        return []

    kw = KW_MODEL.extract_keywords(text, keyphrase_ngram_range=(1,3), stop_words='portuguese', top_n=8)

    doc_emb = EMB_MODEL.encode([text], convert_to_numpy=True, normalize_embeddings=True)
    ph_embs = EMB_MODEL.encode(cands, convert_to_numpy=True, normalize_embeddings=True)

    scored = []
    for idx, (c, emb) in enumerate(zip(cands, ph_embs)):
        s = phrase_score(c, idx, kw, doc_emb, emb.reshape(1,-1), max_len=max_len)
        scored.append((s, c, idx))
    scored.sort(reverse=True)

    shortlist = [c for _,c,_ in scored[:max(12, top_k*4)]]

    shortlist_embs = EMB_MODEL.encode(shortlist, convert_to_numpy=True, normalize_embeddings=True)
    from sklearn.metrics.pairwise import cosine_similarity as cs
    sim_doc = cs(shortlist_embs, doc_emb).reshape(-1)

    selected = []
    cur = int(np.argmax(sim_doc)); selected.append(cur)
    while len(selected) < min(top_k, len(shortlist)):
        remaining = [i for i in range(len(shortlist)) if i not in selected]
        best_i, best_score = None, -1e9
        for i in remaining:
            sim_to_doc = sim_doc[i]
            sim_to_selected = max([cs(shortlist_embs[i].reshape(1,-1),
                                      shortlist_embs[j].reshape(1,-1))[0][0] for j in selected] + [0])
            mmr = (1-0.6)*sim_to_doc - 0.6*sim_to_selected
            if mmr > best_score:
                best_score = mmr; best_i = i
        selected.append(best_i)

    final_phrases = []
    for i in selected:
        base = shortlist[i]
        rew = impact_rewrite_extractive(base, strength=impact_strength)
        if not rew:
            rew = base
        rew = rew.strip()
        if len(rew) > max_len:
            rew = rew[:max_len].rstrip()
        final_phrases.append(rew)

    return final_phrases

# --------------------------
# RENDER DO TWEET
# --------------------------
def draw_wrapped_text(draw, text, font, x, y, max_width, fill):
    words, lines, line = text.split(), [], []
    for w in words:
        test = " ".join(line + [w])
        wsize = draw.textbbox((0,0), test, font=font)
        if wsize[2]-wsize[0] <= max_width: line.append(w)
        else: lines.append(" ".join(line)); line=[w]
    if line: lines.append(" ".join(line))

    cur_y = y
    for ln in lines:
        draw.text((x, cur_y), ln, font=font, fill=fill)
        bbox = draw.textbbox((x, cur_y), ln, font=font)
        cur_y += (bbox[3]-bbox[1]) + LINE_SPACING
    return cur_y

def circular_avatar(pil_img, size=AVATAR_SIZE):
    if pil_img is None:
        im = Image.new("RGB", (size, size), (200, 205, 210))
    else:
        im = pil_img
    im = ImageOps.fit(im, (size, size), method=Image.LANCZOS)
    mask = Image.new("L", (size, size), 0)
    d = ImageDraw.Draw(mask)
    d.ellipse((0, 0, size, size), fill=255)
    out = Image.new("RGBA", (size, size), (255, 255, 255, 0))
    out.paste(im, (0, 0), mask)
    return out

def random_meta():
    base = datetime.datetime.now() - datetime.timedelta(
        days=random.randint(0,14), hours=random.randint(0,23), minutes=random.randint(0,59)
    )
    hour = base.strftime("%I").lstrip("0") or "0"
    date_str = f"{hour}{base.strftime(':%M %p · %b %d, %Y')}"
    device = random.choice(DEVICE_POOL)
    comments = random.randint(0, 800)
    retweets = random.randint(0, 3500)
    likes = random.randint(0, 15000)
    return date_str, device, comments, retweets, likes

def render_tweet_image(text, name, handle, avatar_img=None) -> Image.Image:
    img = Image.new("RGB", (CANVAS_W, 1200), BG_COLOR)
    draw = ImageDraw.Draw(img)
    x, y = PADDING, PADDING

    avatar = circular_avatar(avatar_img, AVATAR_SIZE)
    img.paste(avatar, (x, y), avatar)

    nx, ny = x + AVATAR_SIZE + GUTTER, y + 6
    draw.text((nx, ny), name, font=FONT_NAME_BOLD, fill=TEXT_COLOR)
    name_bbox = draw.textbbox((nx, ny), name, font=FONT_NAME_BOLD)
    name_w = name_bbox[2]-name_bbox[0]

    hx, hy = nx + name_w + 18, ny + (FS_NAME - FS_HANDLE)
    draw.text((hx, hy), handle, font=FONT_NAME_REG, fill=HANDLE_COLOR)

    body_x, body_y = nx, ny + FS_NAME + 20
    max_text_w = CANVAS_W - body_x - PADDING
    body_end_y = draw_wrapped_text(draw, text, FONT_BODY, body_x, body_y, max_text_w, TEXT_COLOR)

    date_str, device, comments, retweets, likes = random_meta()
    meta = f"{date_str} · {device}"
    meta_y = body_end_y + 20
    draw.text((body_x, meta_y), meta, font=FONT_META, fill=META_COLOR)
    meta_end_y = draw.textbbox((body_x, meta_y), meta, font=FONT_META)[3]

    div_y = meta_end_y + 24
    draw.line([(PADDING, div_y), (CANVAS_W - PADDING, div_y)], fill=DIVIDER_COLOR, width=2)

    metrics_y = div_y + 26
    metrics = f"{comments} Comments    {retweets} Retweets    {likes} Likes"
    draw.text((body_x, metrics_y), metrics, font=FONT_METRICS, fill=TEXT_COLOR)
    end_y = draw.textbbox((body_x, metrics_y), metrics, font=FONT_METRICS)[3] + PADDING

    img = img.crop((0, 0, CANVAS_W, end_y))
    return img

# --------------------------
# FUNÇÕES GRADIO
# --------------------------
def _load_avatar(avatar_url: Optional[str], avatar_file: Optional[Image.Image]) -> Optional[Image.Image]:
    if avatar_file is not None:
        try:
            return avatar_file.convert("RGB")
        except Exception:
            pass
    if avatar_url:
        try:
            r = requests.get(avatar_url, timeout=10)
            r.raise_for_status()
            im = Image.open(io.BytesIO(r.content)).convert("RGB")
            return im
        except Exception:
            pass
    return None

def generate_images(text, name, handle, topk, maxlen, impact, avatar_url, avatar_file):
    load_models()
    if not text or not name or not handle:
        raise gr.Error("Preencha: Texto, Nome e @arroba.")

    phrases = pick_best_phrases(text, max_len=int(maxlen), top_k=int(topk), impact_strength=int(impact))
    if not phrases:
        raise gr.Error("Não encontrei frases ≤ limite de caracteres.")

    avatar_img = _load_avatar(avatar_url, avatar_file)

    images = []
    for p in phrases:
        im = render_tweet_image(p, name, handle, avatar_img)
        images.append((p, im))

    # salva ZIP temporário
    tmpdir = tempfile.mkdtemp(prefix="tweets_")
    zpath = os.path.join(tmpdir, "tweets.zip")
    with zipfile.ZipFile(zpath, "w", compression=zipfile.ZIP_DEFLATED) as zf:
        for i, (p, im) in enumerate(images, 1):
            bio = io.BytesIO()
            im.save(bio, format="PNG")
            fname = re.sub(r'[^a-zA-Z0-9_-]+', '_', (p[:30] or f"tweet_{i}")).strip("_")
            zf.writestr(f"{i:02d}_{fname}.png", bio.getvalue())

    # Apenas imagens para galeria
    pil_list = [im for _, im in images]
    return pil_list, zpath

with gr.Blocks(title="Tweet Image Generator") as demo:
    gr.Markdown("# Tweet Image Generator\nGere imagens estilo tweet a partir de um texto longo.")
    with gr.Row():
        with gr.Column():
            text = gr.Textbox(label="Texto de origem", lines=10, placeholder="Cole aqui o texto...")
            name = gr.Textbox(label="Nome", value="Elon Musk")
            handle = gr.Textbox(label="@arroba", value="@elonmusk")
            with gr.Row():
                topk = gr.Slider(1, 6, value=3, step=1, label="Top K (quantas imagens)")
                maxlen = gr.Slider(80, 280, value=240, step=1, label="Máx. caracteres")
            impact = gr.Slider(0, 100, value=50, step=1, label="Força do Impacto (0=leve, 100=forte)")
            avatar_url = gr.Textbox(label="Avatar URL (opcional)")
            avatar_file = gr.Image(type="pil", label="Avatar arquivo (opcional)")
            btn = gr.Button("Gerar imagens")
        with gr.Column():
            gallery = gr.Gallery(label="Imagens geradas", columns=1, height=520)
            zip_out = gr.File(label="Baixar ZIP")

    btn.click(fn=generate_images,
              inputs=[text, name, handle, topk, maxlen, impact, avatar_url, avatar_file],
              outputs=[gallery, zip_out])

if __name__ == "__main__":
    demo.launch()