# -*- coding: utf-8 -*- """ Tweet Image Web App — Hugging Face Spaces (Gradio) -------------------------------------------------- - Converte o app desktop em uma interface web Gradio para rodar em Spaces. - Mantém o pipeline EXTRATIVO (não inventa palavras) e a lógica de seleção de frases. - Gera imagens no estilo "tweet" e permite baixar um ZIP com todas. Como rodar localmente: pip install -r requirements.txt python app.py # acesse o link http://127.0.0.1:7860 Em Hugging Face Spaces: - Crie um Space com SDK = Gradio (Python 3.10+). - Faça upload deste arquivo, do requirements.txt e README.md. - O Space inicia automaticamente. """ import io, re, os, zipfile, random, datetime, tempfile from typing import List, Optional, Tuple # Imaging / NLP from PIL import Image, ImageDraw, ImageFont, ImageOps import numpy as np from sklearn.metrics.pairwise import cosine_similarity from sentence_transformers import SentenceTransformer from keybert import KeyBERT import requests import gradio as gr # -------------------------- # CONFIG GERAL (fonts/estilo) # -------------------------- CANVAS_W = 1600 PADDING = 80 BG_COLOR = (255, 255, 255) TEXT_COLOR = (15, 20, 25) HANDLE_COLOR = (83, 100, 113) META_COLOR = (83, 100, 113) DIVIDER_COLOR = (239, 243, 244) # Ajuste para Linux do container dos Spaces (usa DejaVu) FONT_NAME_BOLD_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" FONT_NAME_REG_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" FONT_BODY_REG_PATH = FONT_NAME_REG_PATH FS_NAME = 54 FS_HANDLE = 46 FS_BODY = 60 FS_META = 42 FS_METRICS = 44 AVATAR_SIZE = 140 GUTTER = 32 LINE_SPACING = 10 DEVICE_POOL = ["Twitter for iPhone", "Twitter for Android", "Twitter Web App"] # -------------------------- # Font helper # -------------------------- def font(path, size): try: return ImageFont.truetype(path, size) except Exception: return ImageFont.load_default() FONT_NAME_BOLD = font(FONT_NAME_BOLD_PATH, FS_NAME) FONT_NAME_REG = font(FONT_NAME_REG_PATH, FS_HANDLE) FONT_BODY = font(FONT_BODY_REG_PATH, FS_BODY) FONT_META = font(FONT_NAME_REG_PATH, FS_META) FONT_METRICS = font(FONT_NAME_REG_PATH, FS_METRICS) # -------------------------- # MODELOS NLP (carregados 1x) # -------------------------- EMB_MODEL = None KW_MODEL = None def load_models(): global EMB_MODEL, KW_MODEL if EMB_MODEL is None or KW_MODEL is None: EMB_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") KW_MODEL = KeyBERT(EMB_MODEL) STRONG_TOKENS = { "como","por quê","por que","aprendi","nunca","sempre","hoje","resultado", "prova","segredo","erro","verdade","atenção","cuidado","evite","descobri", "funciona","dados","passo","ganhar","perder","crescer","acertar", "fácil","difícil","exemplo","estratégia","tática","processo","prático" } BEGIN_BAD = r'^(e|mas|ou|então|daí|aí|só que)\b' # -------------------------- # UTIL: limpeza/normalização # -------------------------- # stopwords PT (essenciais) + hedges/fillers. Mantemos negações e números. PT_STOP = { 'a','à','às','o','os','as','um','uma','uns','umas','de','do','da','dos','das','no','na','nos','nas','em','por','para','pra','com','sem','sobre','entre','até','após','antes','desde', 'que','se','quando','onde','como','qual','quais','quanto','tanto','toda','todo','todas','todos','cada','mais','menos','muito','muitos','muita','muitas', 'já','ainda','também','só','pois','porque','porquê','né','tipo','basicamente','literalmente','na','no','bem','aliás','então','daí','aí','talvez','acho','meio','um pouco','depois','antes', 'né?','ok','ok?','certo','certo?','assim','coisa','coisas','etc' } FILLER_PHRASES = [ 'na verdade','de verdade','de certa forma','no final do dia','de alguma forma','de um jeito','por exemplo','para ser sincero','pra ser sincero','para falar a verdade' ] WEAK_ADVERBS = {'talvez','acho','quase','apenas','somente','só','um pouco','meio'} NEGATIONS = {'não','nunca','jamais','sem'} LEAD_TRIM = re.compile(r'^(e|mas|ou|então|daí|aí|só que|agora|bom|olha)\b\s*', re.I) MULTI_SPACE = re.compile(r'\s{2,}') SPACE_PUNCT = re.compile(r'\s+([,.;:!?])') TOKEN_RE = re.compile(r"\w+|[\.,;:!?()]|[–—-]", re.U) def safe_tokens(text: str) -> List[str]: return TOKEN_RE.findall(text) def is_number(tok: str) -> bool: return bool(re.fullmatch(r"\d+[\d.,]*", tok)) def impact_rewrite_extractive(text: str, strength: int = 50) -> str: strength = max(0, min(100, strength)) keep_ratio = 1.0 - (0.25 + 0.55 * (strength/100.0)) # 0.75..0.2 s = text.strip() s = LEAD_TRIM.sub('', s) for fp in FILLER_PHRASES: s = re.sub(rf"\b{re.escape(fp)}\b", '', s, flags=re.I) s = MULTI_SPACE.sub(' ', s) toks = safe_tokens(s) scored: List[Tuple[float, int, str]] = [] for i, t in enumerate(toks): tl = t.lower() if tl in NEGATIONS or is_number(tl): sc = 1.2 elif tl in STRONG_TOKENS: sc = 1.1 elif tl in WEAK_ADVERBS or tl in PT_STOP: sc = 0.2 elif re.fullmatch(r'[.,;:!?()]', t) or re.fullmatch(r'[–—-]', t): sc = 0.6 else: sc = 0.9 scored.append((sc, i, t)) n_words = sum(1 for t in toks if re.match(r'\w', t)) target_words = max(4, int(n_words * keep_ratio)) word_items = [(sc, i, t) for (sc,i,t) in scored if re.match(r'\w', t)] word_items.sort(key=lambda x: (-x[0], x[1])) keep_idx = sorted(i for _, i, _ in word_items[:target_words]) keep_set = set(keep_idx) out: List[str] = [] for i, t in enumerate(toks): if re.match(r'\w', t): if i in keep_set: out.append(t) else: if out and (i+1 < len(toks) and any(j in keep_set for j in range(i+1, min(i+3, len(toks))))): out.append(t) sent = ' '.join(out) sent = SPACE_PUNCT.sub(r'\1', sent) sent = MULTI_SPACE.sub(' ', sent).strip() if sent: sent = sent[0].upper() + sent[1:] return sent # -------------------------- # FRASES CANDIDATAS # -------------------------- def split_sentences(text: str): parts = re.split(r'(?<=[\.!\?:;])\s+|\n+', text.strip()) return [p.strip() for p in parts if p.strip()] def generate_candidates(parts: List[str], max_len=240): cand, n = [], len(parts) for i in range(n): one = parts[i] if len(one) <= max_len: cand.append(one) if i+1 < n: two = (parts[i]+" "+parts[i+1]).strip() if len(two) <= max_len: cand.append(two) if i+2 < n: three = (parts[i]+" "+parts[i+1]+" "+parts[i+2]).strip() if len(three) <= max_len: cand.append(three) seen, out = set(), [] for c in cand: if c not in seen: out.append(c); seen.add(c) return out # -------------------------- # SCORING INTELIGENTE # -------------------------- def phrase_score(phrase, idx, keyphrases, doc_emb, ph_emb, max_len=240): L = len(phrase) center = 160 len_score = max(0, 1 - abs(L-center)/(max_len-center)) kp_bonus = 0.0 lo = phrase.lower() for kp,_ in keyphrases: if all(w in lo for w in kp.split()): kp_bonus += 1.0 kp_score = min(1.0, kp_bonus/3.0) rel = float(cosine_similarity(doc_emb, ph_emb)[0][0]) rel_score = (rel + 1)/2 words = re.findall(r'\w+', lo, flags=re.UNICODE) strong_hits = sum(1 for w in words if w in STRONG_TOKENS) punct_hits = len(re.findall(r'[,:;()]', phrase)) + len(re.findall(r'[–—-]', phrase)) qmark = "?" in phrase exclam = "!" in phrase first_person = bool(re.search(r'\b(eu|meu|minha|aprendi|descobri)\b', lo)) imperative = bool(re.match(r'^(faça|evite|pare|comece|teste|use|mude|foque|aprenda)\b', lo)) style = 0.2*strong_hits + 0.05*punct_hits if qmark: style += 0.2 if exclam: style += 0.1 if first_person: style += 0.15 if imperative: style += 0.2 style_score = min(1.0, 0.4 + style) pen = 0.0 if re.match(BEGIN_BAD, lo): pen += 0.25 if L < 60: pen += 0.2 pos_score = max(0.5, 1.0 - 0.02*idx) total = (1.6*len_score + 1.6*rel_score + 1.3*style_score + 1.0*kp_score + 0.5*pos_score) - pen return total def pick_best_phrases(text: str, max_len=240, top_k=3, impact_strength: int = 50): load_models() parts = split_sentences(text) cands = generate_candidates(parts, max_len) if not cands: return [] kw = KW_MODEL.extract_keywords(text, keyphrase_ngram_range=(1,3), stop_words='portuguese', top_n=8) doc_emb = EMB_MODEL.encode([text], convert_to_numpy=True, normalize_embeddings=True) ph_embs = EMB_MODEL.encode(cands, convert_to_numpy=True, normalize_embeddings=True) scored = [] for idx, (c, emb) in enumerate(zip(cands, ph_embs)): s = phrase_score(c, idx, kw, doc_emb, emb.reshape(1,-1), max_len=max_len) scored.append((s, c, idx)) scored.sort(reverse=True) shortlist = [c for _,c,_ in scored[:max(12, top_k*4)]] shortlist_embs = EMB_MODEL.encode(shortlist, convert_to_numpy=True, normalize_embeddings=True) from sklearn.metrics.pairwise import cosine_similarity as cs sim_doc = cs(shortlist_embs, doc_emb).reshape(-1) selected = [] cur = int(np.argmax(sim_doc)); selected.append(cur) while len(selected) < min(top_k, len(shortlist)): remaining = [i for i in range(len(shortlist)) if i not in selected] best_i, best_score = None, -1e9 for i in remaining: sim_to_doc = sim_doc[i] sim_to_selected = max([cs(shortlist_embs[i].reshape(1,-1), shortlist_embs[j].reshape(1,-1))[0][0] for j in selected] + [0]) mmr = (1-0.6)*sim_to_doc - 0.6*sim_to_selected if mmr > best_score: best_score = mmr; best_i = i selected.append(best_i) final_phrases = [] for i in selected: base = shortlist[i] rew = impact_rewrite_extractive(base, strength=impact_strength) if not rew: rew = base rew = rew.strip() if len(rew) > max_len: rew = rew[:max_len].rstrip() final_phrases.append(rew) return final_phrases # -------------------------- # RENDER DO TWEET # -------------------------- def draw_wrapped_text(draw, text, font, x, y, max_width, fill): words, lines, line = text.split(), [], [] for w in words: test = " ".join(line + [w]) wsize = draw.textbbox((0,0), test, font=font) if wsize[2]-wsize[0] <= max_width: line.append(w) else: lines.append(" ".join(line)); line=[w] if line: lines.append(" ".join(line)) cur_y = y for ln in lines: draw.text((x, cur_y), ln, font=font, fill=fill) bbox = draw.textbbox((x, cur_y), ln, font=font) cur_y += (bbox[3]-bbox[1]) + LINE_SPACING return cur_y def circular_avatar(pil_img, size=AVATAR_SIZE): if pil_img is None: im = Image.new("RGB", (size, size), (200, 205, 210)) else: im = pil_img im = ImageOps.fit(im, (size, size), method=Image.LANCZOS) mask = Image.new("L", (size, size), 0) d = ImageDraw.Draw(mask) d.ellipse((0, 0, size, size), fill=255) out = Image.new("RGBA", (size, size), (255, 255, 255, 0)) out.paste(im, (0, 0), mask) return out def random_meta(): base = datetime.datetime.now() - datetime.timedelta( days=random.randint(0,14), hours=random.randint(0,23), minutes=random.randint(0,59) ) hour = base.strftime("%I").lstrip("0") or "0" date_str = f"{hour}{base.strftime(':%M %p · %b %d, %Y')}" device = random.choice(DEVICE_POOL) comments = random.randint(0, 800) retweets = random.randint(0, 3500) likes = random.randint(0, 15000) return date_str, device, comments, retweets, likes def render_tweet_image(text, name, handle, avatar_img=None) -> Image.Image: img = Image.new("RGB", (CANVAS_W, 1200), BG_COLOR) draw = ImageDraw.Draw(img) x, y = PADDING, PADDING avatar = circular_avatar(avatar_img, AVATAR_SIZE) img.paste(avatar, (x, y), avatar) nx, ny = x + AVATAR_SIZE + GUTTER, y + 6 draw.text((nx, ny), name, font=FONT_NAME_BOLD, fill=TEXT_COLOR) name_bbox = draw.textbbox((nx, ny), name, font=FONT_NAME_BOLD) name_w = name_bbox[2]-name_bbox[0] hx, hy = nx + name_w + 18, ny + (FS_NAME - FS_HANDLE) draw.text((hx, hy), handle, font=FONT_NAME_REG, fill=HANDLE_COLOR) body_x, body_y = nx, ny + FS_NAME + 20 max_text_w = CANVAS_W - body_x - PADDING body_end_y = draw_wrapped_text(draw, text, FONT_BODY, body_x, body_y, max_text_w, TEXT_COLOR) date_str, device, comments, retweets, likes = random_meta() meta = f"{date_str} · {device}" meta_y = body_end_y + 20 draw.text((body_x, meta_y), meta, font=FONT_META, fill=META_COLOR) meta_end_y = draw.textbbox((body_x, meta_y), meta, font=FONT_META)[3] div_y = meta_end_y + 24 draw.line([(PADDING, div_y), (CANVAS_W - PADDING, div_y)], fill=DIVIDER_COLOR, width=2) metrics_y = div_y + 26 metrics = f"{comments} Comments {retweets} Retweets {likes} Likes" draw.text((body_x, metrics_y), metrics, font=FONT_METRICS, fill=TEXT_COLOR) end_y = draw.textbbox((body_x, metrics_y), metrics, font=FONT_METRICS)[3] + PADDING img = img.crop((0, 0, CANVAS_W, end_y)) return img # -------------------------- # FUNÇÕES GRADIO # -------------------------- def _load_avatar(avatar_url: Optional[str], avatar_file: Optional[Image.Image]) -> Optional[Image.Image]: if avatar_file is not None: try: return avatar_file.convert("RGB") except Exception: pass if avatar_url: try: r = requests.get(avatar_url, timeout=10) r.raise_for_status() im = Image.open(io.BytesIO(r.content)).convert("RGB") return im except Exception: pass return None def generate_images(text, name, handle, topk, maxlen, impact, avatar_url, avatar_file): load_models() if not text or not name or not handle: raise gr.Error("Preencha: Texto, Nome e @arroba.") phrases = pick_best_phrases(text, max_len=int(maxlen), top_k=int(topk), impact_strength=int(impact)) if not phrases: raise gr.Error("Não encontrei frases ≤ limite de caracteres.") avatar_img = _load_avatar(avatar_url, avatar_file) images = [] for p in phrases: im = render_tweet_image(p, name, handle, avatar_img) images.append((p, im)) # salva ZIP temporário tmpdir = tempfile.mkdtemp(prefix="tweets_") zpath = os.path.join(tmpdir, "tweets.zip") with zipfile.ZipFile(zpath, "w", compression=zipfile.ZIP_DEFLATED) as zf: for i, (p, im) in enumerate(images, 1): bio = io.BytesIO() im.save(bio, format="PNG") fname = re.sub(r'[^a-zA-Z0-9_-]+', '_', (p[:30] or f"tweet_{i}")).strip("_") zf.writestr(f"{i:02d}_{fname}.png", bio.getvalue()) # Apenas imagens para galeria pil_list = [im for _, im in images] return pil_list, zpath with gr.Blocks(title="Tweet Image Generator") as demo: gr.Markdown("# Tweet Image Generator\nGere imagens estilo tweet a partir de um texto longo.") with gr.Row(): with gr.Column(): text = gr.Textbox(label="Texto de origem", lines=10, placeholder="Cole aqui o texto...") name = gr.Textbox(label="Nome", value="Elon Musk") handle = gr.Textbox(label="@arroba", value="@elonmusk") with gr.Row(): topk = gr.Slider(1, 6, value=3, step=1, label="Top K (quantas imagens)") maxlen = gr.Slider(80, 280, value=240, step=1, label="Máx. caracteres") impact = gr.Slider(0, 100, value=50, step=1, label="Força do Impacto (0=leve, 100=forte)") avatar_url = gr.Textbox(label="Avatar URL (opcional)") avatar_file = gr.Image(type="pil", label="Avatar arquivo (opcional)") btn = gr.Button("Gerar imagens") with gr.Column(): gallery = gr.Gallery(label="Imagens geradas", columns=1, height=520) zip_out = gr.File(label="Baixar ZIP") btn.click(fn=generate_images, inputs=[text, name, handle, topk, maxlen, impact, avatar_url, avatar_file], outputs=[gallery, zip_out]) if __name__ == "__main__": demo.launch()