twettermaker / app.py
leicam's picture
Update app.py
67cb20e verified
# -*- coding: utf-8 -*-
"""
Tweet Image Web App — Hugging Face Spaces (Gradio)
--------------------------------------------------
- Converte o app desktop em uma interface web Gradio para rodar em Spaces.
- Mantém o pipeline EXTRATIVO (não inventa palavras) e a lógica de seleção de frases.
- Gera imagens no estilo "tweet" e permite baixar um ZIP com todas.
Como rodar localmente:
pip install -r requirements.txt
python app.py
# acesse o link http://127.0.0.1:7860
Em Hugging Face Spaces:
- Crie um Space com SDK = Gradio (Python 3.10+).
- Faça upload deste arquivo, do requirements.txt e README.md.
- O Space inicia automaticamente.
"""
import io, re, os, zipfile, random, datetime, tempfile
from typing import List, Optional, Tuple
# Imaging / NLP
from PIL import Image, ImageDraw, ImageFont, ImageOps
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from keybert import KeyBERT
import requests
import gradio as gr
# --------------------------
# CONFIG GERAL (fonts/estilo)
# --------------------------
CANVAS_W = 1600
PADDING = 80
BG_COLOR = (255, 255, 255)
TEXT_COLOR = (15, 20, 25)
HANDLE_COLOR = (83, 100, 113)
META_COLOR = (83, 100, 113)
DIVIDER_COLOR = (239, 243, 244)
# Ajuste para Linux do container dos Spaces (usa DejaVu)
FONT_NAME_BOLD_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
FONT_NAME_REG_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
FONT_BODY_REG_PATH = FONT_NAME_REG_PATH
FS_NAME = 54
FS_HANDLE = 46
FS_BODY = 60
FS_META = 42
FS_METRICS = 44
AVATAR_SIZE = 140
GUTTER = 32
LINE_SPACING = 10
DEVICE_POOL = ["Twitter for iPhone", "Twitter for Android", "Twitter Web App"]
# --------------------------
# Font helper
# --------------------------
def font(path, size):
try:
return ImageFont.truetype(path, size)
except Exception:
return ImageFont.load_default()
FONT_NAME_BOLD = font(FONT_NAME_BOLD_PATH, FS_NAME)
FONT_NAME_REG = font(FONT_NAME_REG_PATH, FS_HANDLE)
FONT_BODY = font(FONT_BODY_REG_PATH, FS_BODY)
FONT_META = font(FONT_NAME_REG_PATH, FS_META)
FONT_METRICS = font(FONT_NAME_REG_PATH, FS_METRICS)
# --------------------------
# MODELOS NLP (carregados 1x)
# --------------------------
EMB_MODEL = None
KW_MODEL = None
def load_models():
global EMB_MODEL, KW_MODEL
if EMB_MODEL is None or KW_MODEL is None:
EMB_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
KW_MODEL = KeyBERT(EMB_MODEL)
STRONG_TOKENS = {
"como","por quê","por que","aprendi","nunca","sempre","hoje","resultado",
"prova","segredo","erro","verdade","atenção","cuidado","evite","descobri",
"funciona","dados","passo","ganhar","perder","crescer","acertar",
"fácil","difícil","exemplo","estratégia","tática","processo","prático"
}
BEGIN_BAD = r'^(e|mas|ou|então|daí|aí|só que)\b'
# --------------------------
# UTIL: limpeza/normalização
# --------------------------
# stopwords PT (essenciais) + hedges/fillers. Mantemos negações e números.
PT_STOP = {
'a','à','às','o','os','as','um','uma','uns','umas','de','do','da','dos','das','no','na','nos','nas','em','por','para','pra','com','sem','sobre','entre','até','após','antes','desde',
'que','se','quando','onde','como','qual','quais','quanto','tanto','toda','todo','todas','todos','cada','mais','menos','muito','muitos','muita','muitas',
'já','ainda','também','só','pois','porque','porquê','né','tipo','basicamente','literalmente','na','no','bem','aliás','então','daí','aí','talvez','acho','meio','um pouco','depois','antes',
'né?','ok','ok?','certo','certo?','assim','coisa','coisas','etc'
}
FILLER_PHRASES = [
'na verdade','de verdade','de certa forma','no final do dia','de alguma forma','de um jeito','por exemplo','para ser sincero','pra ser sincero','para falar a verdade'
]
WEAK_ADVERBS = {'talvez','acho','quase','apenas','somente','só','um pouco','meio'}
NEGATIONS = {'não','nunca','jamais','sem'}
LEAD_TRIM = re.compile(r'^(e|mas|ou|então|daí|aí|só que|agora|bom|olha)\b\s*', re.I)
MULTI_SPACE = re.compile(r'\s{2,}')
SPACE_PUNCT = re.compile(r'\s+([,.;:!?])')
TOKEN_RE = re.compile(r"\w+|[\.,;:!?()]|[–—-]", re.U)
def safe_tokens(text: str) -> List[str]:
return TOKEN_RE.findall(text)
def is_number(tok: str) -> bool:
return bool(re.fullmatch(r"\d+[\d.,]*", tok))
def impact_rewrite_extractive(text: str, strength: int = 50) -> str:
strength = max(0, min(100, strength))
keep_ratio = 1.0 - (0.25 + 0.55 * (strength/100.0)) # 0.75..0.2
s = text.strip()
s = LEAD_TRIM.sub('', s)
for fp in FILLER_PHRASES:
s = re.sub(rf"\b{re.escape(fp)}\b", '', s, flags=re.I)
s = MULTI_SPACE.sub(' ', s)
toks = safe_tokens(s)
scored: List[Tuple[float, int, str]] = []
for i, t in enumerate(toks):
tl = t.lower()
if tl in NEGATIONS or is_number(tl):
sc = 1.2
elif tl in STRONG_TOKENS:
sc = 1.1
elif tl in WEAK_ADVERBS or tl in PT_STOP:
sc = 0.2
elif re.fullmatch(r'[.,;:!?()]', t) or re.fullmatch(r'[–—-]', t):
sc = 0.6
else:
sc = 0.9
scored.append((sc, i, t))
n_words = sum(1 for t in toks if re.match(r'\w', t))
target_words = max(4, int(n_words * keep_ratio))
word_items = [(sc, i, t) for (sc,i,t) in scored if re.match(r'\w', t)]
word_items.sort(key=lambda x: (-x[0], x[1]))
keep_idx = sorted(i for _, i, _ in word_items[:target_words])
keep_set = set(keep_idx)
out: List[str] = []
for i, t in enumerate(toks):
if re.match(r'\w', t):
if i in keep_set:
out.append(t)
else:
if out and (i+1 < len(toks) and any(j in keep_set for j in range(i+1, min(i+3, len(toks))))):
out.append(t)
sent = ' '.join(out)
sent = SPACE_PUNCT.sub(r'\1', sent)
sent = MULTI_SPACE.sub(' ', sent).strip()
if sent:
sent = sent[0].upper() + sent[1:]
return sent
# --------------------------
# FRASES CANDIDATAS
# --------------------------
def split_sentences(text: str):
parts = re.split(r'(?<=[\.!\?:;])\s+|\n+', text.strip())
return [p.strip() for p in parts if p.strip()]
def generate_candidates(parts: List[str], max_len=240):
cand, n = [], len(parts)
for i in range(n):
one = parts[i]
if len(one) <= max_len: cand.append(one)
if i+1 < n:
two = (parts[i]+" "+parts[i+1]).strip()
if len(two) <= max_len: cand.append(two)
if i+2 < n:
three = (parts[i]+" "+parts[i+1]+" "+parts[i+2]).strip()
if len(three) <= max_len: cand.append(three)
seen, out = set(), []
for c in cand:
if c not in seen:
out.append(c); seen.add(c)
return out
# --------------------------
# SCORING INTELIGENTE
# --------------------------
def phrase_score(phrase, idx, keyphrases, doc_emb, ph_emb, max_len=240):
L = len(phrase)
center = 160
len_score = max(0, 1 - abs(L-center)/(max_len-center))
kp_bonus = 0.0
lo = phrase.lower()
for kp,_ in keyphrases:
if all(w in lo for w in kp.split()):
kp_bonus += 1.0
kp_score = min(1.0, kp_bonus/3.0)
rel = float(cosine_similarity(doc_emb, ph_emb)[0][0])
rel_score = (rel + 1)/2
words = re.findall(r'\w+', lo, flags=re.UNICODE)
strong_hits = sum(1 for w in words if w in STRONG_TOKENS)
punct_hits = len(re.findall(r'[,:;()]', phrase)) + len(re.findall(r'[–—-]', phrase))
qmark = "?" in phrase
exclam = "!" in phrase
first_person = bool(re.search(r'\b(eu|meu|minha|aprendi|descobri)\b', lo))
imperative = bool(re.match(r'^(faça|evite|pare|comece|teste|use|mude|foque|aprenda)\b', lo))
style = 0.2*strong_hits + 0.05*punct_hits
if qmark: style += 0.2
if exclam: style += 0.1
if first_person: style += 0.15
if imperative: style += 0.2
style_score = min(1.0, 0.4 + style)
pen = 0.0
if re.match(BEGIN_BAD, lo): pen += 0.25
if L < 60: pen += 0.2
pos_score = max(0.5, 1.0 - 0.02*idx)
total = (1.6*len_score + 1.6*rel_score + 1.3*style_score + 1.0*kp_score + 0.5*pos_score) - pen
return total
def pick_best_phrases(text: str, max_len=240, top_k=3, impact_strength: int = 50):
load_models()
parts = split_sentences(text)
cands = generate_candidates(parts, max_len)
if not cands:
return []
kw = KW_MODEL.extract_keywords(text, keyphrase_ngram_range=(1,3), stop_words='portuguese', top_n=8)
doc_emb = EMB_MODEL.encode([text], convert_to_numpy=True, normalize_embeddings=True)
ph_embs = EMB_MODEL.encode(cands, convert_to_numpy=True, normalize_embeddings=True)
scored = []
for idx, (c, emb) in enumerate(zip(cands, ph_embs)):
s = phrase_score(c, idx, kw, doc_emb, emb.reshape(1,-1), max_len=max_len)
scored.append((s, c, idx))
scored.sort(reverse=True)
shortlist = [c for _,c,_ in scored[:max(12, top_k*4)]]
shortlist_embs = EMB_MODEL.encode(shortlist, convert_to_numpy=True, normalize_embeddings=True)
from sklearn.metrics.pairwise import cosine_similarity as cs
sim_doc = cs(shortlist_embs, doc_emb).reshape(-1)
selected = []
cur = int(np.argmax(sim_doc)); selected.append(cur)
while len(selected) < min(top_k, len(shortlist)):
remaining = [i for i in range(len(shortlist)) if i not in selected]
best_i, best_score = None, -1e9
for i in remaining:
sim_to_doc = sim_doc[i]
sim_to_selected = max([cs(shortlist_embs[i].reshape(1,-1),
shortlist_embs[j].reshape(1,-1))[0][0] for j in selected] + [0])
mmr = (1-0.6)*sim_to_doc - 0.6*sim_to_selected
if mmr > best_score:
best_score = mmr; best_i = i
selected.append(best_i)
final_phrases = []
for i in selected:
base = shortlist[i]
rew = impact_rewrite_extractive(base, strength=impact_strength)
if not rew:
rew = base
rew = rew.strip()
if len(rew) > max_len:
rew = rew[:max_len].rstrip()
final_phrases.append(rew)
return final_phrases
# --------------------------
# RENDER DO TWEET
# --------------------------
def draw_wrapped_text(draw, text, font, x, y, max_width, fill):
words, lines, line = text.split(), [], []
for w in words:
test = " ".join(line + [w])
wsize = draw.textbbox((0,0), test, font=font)
if wsize[2]-wsize[0] <= max_width: line.append(w)
else: lines.append(" ".join(line)); line=[w]
if line: lines.append(" ".join(line))
cur_y = y
for ln in lines:
draw.text((x, cur_y), ln, font=font, fill=fill)
bbox = draw.textbbox((x, cur_y), ln, font=font)
cur_y += (bbox[3]-bbox[1]) + LINE_SPACING
return cur_y
def circular_avatar(pil_img, size=AVATAR_SIZE):
if pil_img is None:
im = Image.new("RGB", (size, size), (200, 205, 210))
else:
im = pil_img
im = ImageOps.fit(im, (size, size), method=Image.LANCZOS)
mask = Image.new("L", (size, size), 0)
d = ImageDraw.Draw(mask)
d.ellipse((0, 0, size, size), fill=255)
out = Image.new("RGBA", (size, size), (255, 255, 255, 0))
out.paste(im, (0, 0), mask)
return out
def random_meta():
base = datetime.datetime.now() - datetime.timedelta(
days=random.randint(0,14), hours=random.randint(0,23), minutes=random.randint(0,59)
)
hour = base.strftime("%I").lstrip("0") or "0"
date_str = f"{hour}{base.strftime(':%M %p · %b %d, %Y')}"
device = random.choice(DEVICE_POOL)
comments = random.randint(0, 800)
retweets = random.randint(0, 3500)
likes = random.randint(0, 15000)
return date_str, device, comments, retweets, likes
def render_tweet_image(text, name, handle, avatar_img=None) -> Image.Image:
img = Image.new("RGB", (CANVAS_W, 1200), BG_COLOR)
draw = ImageDraw.Draw(img)
x, y = PADDING, PADDING
avatar = circular_avatar(avatar_img, AVATAR_SIZE)
img.paste(avatar, (x, y), avatar)
nx, ny = x + AVATAR_SIZE + GUTTER, y + 6
draw.text((nx, ny), name, font=FONT_NAME_BOLD, fill=TEXT_COLOR)
name_bbox = draw.textbbox((nx, ny), name, font=FONT_NAME_BOLD)
name_w = name_bbox[2]-name_bbox[0]
hx, hy = nx + name_w + 18, ny + (FS_NAME - FS_HANDLE)
draw.text((hx, hy), handle, font=FONT_NAME_REG, fill=HANDLE_COLOR)
body_x, body_y = nx, ny + FS_NAME + 20
max_text_w = CANVAS_W - body_x - PADDING
body_end_y = draw_wrapped_text(draw, text, FONT_BODY, body_x, body_y, max_text_w, TEXT_COLOR)
date_str, device, comments, retweets, likes = random_meta()
meta = f"{date_str} · {device}"
meta_y = body_end_y + 20
draw.text((body_x, meta_y), meta, font=FONT_META, fill=META_COLOR)
meta_end_y = draw.textbbox((body_x, meta_y), meta, font=FONT_META)[3]
div_y = meta_end_y + 24
draw.line([(PADDING, div_y), (CANVAS_W - PADDING, div_y)], fill=DIVIDER_COLOR, width=2)
metrics_y = div_y + 26
metrics = f"{comments} Comments {retweets} Retweets {likes} Likes"
draw.text((body_x, metrics_y), metrics, font=FONT_METRICS, fill=TEXT_COLOR)
end_y = draw.textbbox((body_x, metrics_y), metrics, font=FONT_METRICS)[3] + PADDING
img = img.crop((0, 0, CANVAS_W, end_y))
return img
# --------------------------
# FUNÇÕES GRADIO
# --------------------------
def _load_avatar(avatar_url: Optional[str], avatar_file: Optional[Image.Image]) -> Optional[Image.Image]:
if avatar_file is not None:
try:
return avatar_file.convert("RGB")
except Exception:
pass
if avatar_url:
try:
r = requests.get(avatar_url, timeout=10)
r.raise_for_status()
im = Image.open(io.BytesIO(r.content)).convert("RGB")
return im
except Exception:
pass
return None
def generate_images(text, name, handle, topk, maxlen, impact, avatar_url, avatar_file):
load_models()
if not text or not name or not handle:
raise gr.Error("Preencha: Texto, Nome e @arroba.")
phrases = pick_best_phrases(text, max_len=int(maxlen), top_k=int(topk), impact_strength=int(impact))
if not phrases:
raise gr.Error("Não encontrei frases ≤ limite de caracteres.")
avatar_img = _load_avatar(avatar_url, avatar_file)
images = []
for p in phrases:
im = render_tweet_image(p, name, handle, avatar_img)
images.append((p, im))
# salva ZIP temporário
tmpdir = tempfile.mkdtemp(prefix="tweets_")
zpath = os.path.join(tmpdir, "tweets.zip")
with zipfile.ZipFile(zpath, "w", compression=zipfile.ZIP_DEFLATED) as zf:
for i, (p, im) in enumerate(images, 1):
bio = io.BytesIO()
im.save(bio, format="PNG")
fname = re.sub(r'[^a-zA-Z0-9_-]+', '_', (p[:30] or f"tweet_{i}")).strip("_")
zf.writestr(f"{i:02d}_{fname}.png", bio.getvalue())
# Apenas imagens para galeria
pil_list = [im for _, im in images]
return pil_list, zpath
with gr.Blocks(title="Tweet Image Generator") as demo:
gr.Markdown("# Tweet Image Generator\nGere imagens estilo tweet a partir de um texto longo.")
with gr.Row():
with gr.Column():
text = gr.Textbox(label="Texto de origem", lines=10, placeholder="Cole aqui o texto...")
name = gr.Textbox(label="Nome", value="Elon Musk")
handle = gr.Textbox(label="@arroba", value="@elonmusk")
with gr.Row():
topk = gr.Slider(1, 6, value=3, step=1, label="Top K (quantas imagens)")
maxlen = gr.Slider(80, 280, value=240, step=1, label="Máx. caracteres")
impact = gr.Slider(0, 100, value=50, step=1, label="Força do Impacto (0=leve, 100=forte)")
avatar_url = gr.Textbox(label="Avatar URL (opcional)")
avatar_file = gr.Image(type="pil", label="Avatar arquivo (opcional)")
btn = gr.Button("Gerar imagens")
with gr.Column():
gallery = gr.Gallery(label="Imagens geradas", columns=1, height=520)
zip_out = gr.File(label="Baixar ZIP")
btn.click(fn=generate_images,
inputs=[text, name, handle, topk, maxlen, impact, avatar_url, avatar_file],
outputs=[gallery, zip_out])
if __name__ == "__main__":
demo.launch()