Spaces:
Running
Running
| # -*- coding: utf-8 -*- | |
| """ | |
| Tweet Image Web App — Hugging Face Spaces (Gradio) | |
| -------------------------------------------------- | |
| - Converte o app desktop em uma interface web Gradio para rodar em Spaces. | |
| - Mantém o pipeline EXTRATIVO (não inventa palavras) e a lógica de seleção de frases. | |
| - Gera imagens no estilo "tweet" e permite baixar um ZIP com todas. | |
| Como rodar localmente: | |
| pip install -r requirements.txt | |
| python app.py | |
| # acesse o link http://127.0.0.1:7860 | |
| Em Hugging Face Spaces: | |
| - Crie um Space com SDK = Gradio (Python 3.10+). | |
| - Faça upload deste arquivo, do requirements.txt e README.md. | |
| - O Space inicia automaticamente. | |
| """ | |
| import io, re, os, zipfile, random, datetime, tempfile | |
| from typing import List, Optional, Tuple | |
| # Imaging / NLP | |
| from PIL import Image, ImageDraw, ImageFont, ImageOps | |
| import numpy as np | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from sentence_transformers import SentenceTransformer | |
| from keybert import KeyBERT | |
| import requests | |
| import gradio as gr | |
| # -------------------------- | |
| # CONFIG GERAL (fonts/estilo) | |
| # -------------------------- | |
| CANVAS_W = 1600 | |
| PADDING = 80 | |
| BG_COLOR = (255, 255, 255) | |
| TEXT_COLOR = (15, 20, 25) | |
| HANDLE_COLOR = (83, 100, 113) | |
| META_COLOR = (83, 100, 113) | |
| DIVIDER_COLOR = (239, 243, 244) | |
| # Ajuste para Linux do container dos Spaces (usa DejaVu) | |
| FONT_NAME_BOLD_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" | |
| FONT_NAME_REG_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" | |
| FONT_BODY_REG_PATH = FONT_NAME_REG_PATH | |
| FS_NAME = 54 | |
| FS_HANDLE = 46 | |
| FS_BODY = 60 | |
| FS_META = 42 | |
| FS_METRICS = 44 | |
| AVATAR_SIZE = 140 | |
| GUTTER = 32 | |
| LINE_SPACING = 10 | |
| DEVICE_POOL = ["Twitter for iPhone", "Twitter for Android", "Twitter Web App"] | |
| # -------------------------- | |
| # Font helper | |
| # -------------------------- | |
| def font(path, size): | |
| try: | |
| return ImageFont.truetype(path, size) | |
| except Exception: | |
| return ImageFont.load_default() | |
| FONT_NAME_BOLD = font(FONT_NAME_BOLD_PATH, FS_NAME) | |
| FONT_NAME_REG = font(FONT_NAME_REG_PATH, FS_HANDLE) | |
| FONT_BODY = font(FONT_BODY_REG_PATH, FS_BODY) | |
| FONT_META = font(FONT_NAME_REG_PATH, FS_META) | |
| FONT_METRICS = font(FONT_NAME_REG_PATH, FS_METRICS) | |
| # -------------------------- | |
| # MODELOS NLP (carregados 1x) | |
| # -------------------------- | |
| EMB_MODEL = None | |
| KW_MODEL = None | |
| def load_models(): | |
| global EMB_MODEL, KW_MODEL | |
| if EMB_MODEL is None or KW_MODEL is None: | |
| EMB_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| KW_MODEL = KeyBERT(EMB_MODEL) | |
| STRONG_TOKENS = { | |
| "como","por quê","por que","aprendi","nunca","sempre","hoje","resultado", | |
| "prova","segredo","erro","verdade","atenção","cuidado","evite","descobri", | |
| "funciona","dados","passo","ganhar","perder","crescer","acertar", | |
| "fácil","difícil","exemplo","estratégia","tática","processo","prático" | |
| } | |
| BEGIN_BAD = r'^(e|mas|ou|então|daí|aí|só que)\b' | |
| # -------------------------- | |
| # UTIL: limpeza/normalização | |
| # -------------------------- | |
| # stopwords PT (essenciais) + hedges/fillers. Mantemos negações e números. | |
| PT_STOP = { | |
| 'a','à','às','o','os','as','um','uma','uns','umas','de','do','da','dos','das','no','na','nos','nas','em','por','para','pra','com','sem','sobre','entre','até','após','antes','desde', | |
| 'que','se','quando','onde','como','qual','quais','quanto','tanto','toda','todo','todas','todos','cada','mais','menos','muito','muitos','muita','muitas', | |
| 'já','ainda','também','só','pois','porque','porquê','né','tipo','basicamente','literalmente','na','no','bem','aliás','então','daí','aí','talvez','acho','meio','um pouco','depois','antes', | |
| 'né?','ok','ok?','certo','certo?','assim','coisa','coisas','etc' | |
| } | |
| FILLER_PHRASES = [ | |
| 'na verdade','de verdade','de certa forma','no final do dia','de alguma forma','de um jeito','por exemplo','para ser sincero','pra ser sincero','para falar a verdade' | |
| ] | |
| WEAK_ADVERBS = {'talvez','acho','quase','apenas','somente','só','um pouco','meio'} | |
| NEGATIONS = {'não','nunca','jamais','sem'} | |
| LEAD_TRIM = re.compile(r'^(e|mas|ou|então|daí|aí|só que|agora|bom|olha)\b\s*', re.I) | |
| MULTI_SPACE = re.compile(r'\s{2,}') | |
| SPACE_PUNCT = re.compile(r'\s+([,.;:!?])') | |
| TOKEN_RE = re.compile(r"\w+|[\.,;:!?()]|[–—-]", re.U) | |
| def safe_tokens(text: str) -> List[str]: | |
| return TOKEN_RE.findall(text) | |
| def is_number(tok: str) -> bool: | |
| return bool(re.fullmatch(r"\d+[\d.,]*", tok)) | |
| def impact_rewrite_extractive(text: str, strength: int = 50) -> str: | |
| strength = max(0, min(100, strength)) | |
| keep_ratio = 1.0 - (0.25 + 0.55 * (strength/100.0)) # 0.75..0.2 | |
| s = text.strip() | |
| s = LEAD_TRIM.sub('', s) | |
| for fp in FILLER_PHRASES: | |
| s = re.sub(rf"\b{re.escape(fp)}\b", '', s, flags=re.I) | |
| s = MULTI_SPACE.sub(' ', s) | |
| toks = safe_tokens(s) | |
| scored: List[Tuple[float, int, str]] = [] | |
| for i, t in enumerate(toks): | |
| tl = t.lower() | |
| if tl in NEGATIONS or is_number(tl): | |
| sc = 1.2 | |
| elif tl in STRONG_TOKENS: | |
| sc = 1.1 | |
| elif tl in WEAK_ADVERBS or tl in PT_STOP: | |
| sc = 0.2 | |
| elif re.fullmatch(r'[.,;:!?()]', t) or re.fullmatch(r'[–—-]', t): | |
| sc = 0.6 | |
| else: | |
| sc = 0.9 | |
| scored.append((sc, i, t)) | |
| n_words = sum(1 for t in toks if re.match(r'\w', t)) | |
| target_words = max(4, int(n_words * keep_ratio)) | |
| word_items = [(sc, i, t) for (sc,i,t) in scored if re.match(r'\w', t)] | |
| word_items.sort(key=lambda x: (-x[0], x[1])) | |
| keep_idx = sorted(i for _, i, _ in word_items[:target_words]) | |
| keep_set = set(keep_idx) | |
| out: List[str] = [] | |
| for i, t in enumerate(toks): | |
| if re.match(r'\w', t): | |
| if i in keep_set: | |
| out.append(t) | |
| else: | |
| if out and (i+1 < len(toks) and any(j in keep_set for j in range(i+1, min(i+3, len(toks))))): | |
| out.append(t) | |
| sent = ' '.join(out) | |
| sent = SPACE_PUNCT.sub(r'\1', sent) | |
| sent = MULTI_SPACE.sub(' ', sent).strip() | |
| if sent: | |
| sent = sent[0].upper() + sent[1:] | |
| return sent | |
| # -------------------------- | |
| # FRASES CANDIDATAS | |
| # -------------------------- | |
| def split_sentences(text: str): | |
| parts = re.split(r'(?<=[\.!\?:;])\s+|\n+', text.strip()) | |
| return [p.strip() for p in parts if p.strip()] | |
| def generate_candidates(parts: List[str], max_len=240): | |
| cand, n = [], len(parts) | |
| for i in range(n): | |
| one = parts[i] | |
| if len(one) <= max_len: cand.append(one) | |
| if i+1 < n: | |
| two = (parts[i]+" "+parts[i+1]).strip() | |
| if len(two) <= max_len: cand.append(two) | |
| if i+2 < n: | |
| three = (parts[i]+" "+parts[i+1]+" "+parts[i+2]).strip() | |
| if len(three) <= max_len: cand.append(three) | |
| seen, out = set(), [] | |
| for c in cand: | |
| if c not in seen: | |
| out.append(c); seen.add(c) | |
| return out | |
| # -------------------------- | |
| # SCORING INTELIGENTE | |
| # -------------------------- | |
| def phrase_score(phrase, idx, keyphrases, doc_emb, ph_emb, max_len=240): | |
| L = len(phrase) | |
| center = 160 | |
| len_score = max(0, 1 - abs(L-center)/(max_len-center)) | |
| kp_bonus = 0.0 | |
| lo = phrase.lower() | |
| for kp,_ in keyphrases: | |
| if all(w in lo for w in kp.split()): | |
| kp_bonus += 1.0 | |
| kp_score = min(1.0, kp_bonus/3.0) | |
| rel = float(cosine_similarity(doc_emb, ph_emb)[0][0]) | |
| rel_score = (rel + 1)/2 | |
| words = re.findall(r'\w+', lo, flags=re.UNICODE) | |
| strong_hits = sum(1 for w in words if w in STRONG_TOKENS) | |
| punct_hits = len(re.findall(r'[,:;()]', phrase)) + len(re.findall(r'[–—-]', phrase)) | |
| qmark = "?" in phrase | |
| exclam = "!" in phrase | |
| first_person = bool(re.search(r'\b(eu|meu|minha|aprendi|descobri)\b', lo)) | |
| imperative = bool(re.match(r'^(faça|evite|pare|comece|teste|use|mude|foque|aprenda)\b', lo)) | |
| style = 0.2*strong_hits + 0.05*punct_hits | |
| if qmark: style += 0.2 | |
| if exclam: style += 0.1 | |
| if first_person: style += 0.15 | |
| if imperative: style += 0.2 | |
| style_score = min(1.0, 0.4 + style) | |
| pen = 0.0 | |
| if re.match(BEGIN_BAD, lo): pen += 0.25 | |
| if L < 60: pen += 0.2 | |
| pos_score = max(0.5, 1.0 - 0.02*idx) | |
| total = (1.6*len_score + 1.6*rel_score + 1.3*style_score + 1.0*kp_score + 0.5*pos_score) - pen | |
| return total | |
| def pick_best_phrases(text: str, max_len=240, top_k=3, impact_strength: int = 50): | |
| load_models() | |
| parts = split_sentences(text) | |
| cands = generate_candidates(parts, max_len) | |
| if not cands: | |
| return [] | |
| kw = KW_MODEL.extract_keywords(text, keyphrase_ngram_range=(1,3), stop_words='portuguese', top_n=8) | |
| doc_emb = EMB_MODEL.encode([text], convert_to_numpy=True, normalize_embeddings=True) | |
| ph_embs = EMB_MODEL.encode(cands, convert_to_numpy=True, normalize_embeddings=True) | |
| scored = [] | |
| for idx, (c, emb) in enumerate(zip(cands, ph_embs)): | |
| s = phrase_score(c, idx, kw, doc_emb, emb.reshape(1,-1), max_len=max_len) | |
| scored.append((s, c, idx)) | |
| scored.sort(reverse=True) | |
| shortlist = [c for _,c,_ in scored[:max(12, top_k*4)]] | |
| shortlist_embs = EMB_MODEL.encode(shortlist, convert_to_numpy=True, normalize_embeddings=True) | |
| from sklearn.metrics.pairwise import cosine_similarity as cs | |
| sim_doc = cs(shortlist_embs, doc_emb).reshape(-1) | |
| selected = [] | |
| cur = int(np.argmax(sim_doc)); selected.append(cur) | |
| while len(selected) < min(top_k, len(shortlist)): | |
| remaining = [i for i in range(len(shortlist)) if i not in selected] | |
| best_i, best_score = None, -1e9 | |
| for i in remaining: | |
| sim_to_doc = sim_doc[i] | |
| sim_to_selected = max([cs(shortlist_embs[i].reshape(1,-1), | |
| shortlist_embs[j].reshape(1,-1))[0][0] for j in selected] + [0]) | |
| mmr = (1-0.6)*sim_to_doc - 0.6*sim_to_selected | |
| if mmr > best_score: | |
| best_score = mmr; best_i = i | |
| selected.append(best_i) | |
| final_phrases = [] | |
| for i in selected: | |
| base = shortlist[i] | |
| rew = impact_rewrite_extractive(base, strength=impact_strength) | |
| if not rew: | |
| rew = base | |
| rew = rew.strip() | |
| if len(rew) > max_len: | |
| rew = rew[:max_len].rstrip() | |
| final_phrases.append(rew) | |
| return final_phrases | |
| # -------------------------- | |
| # RENDER DO TWEET | |
| # -------------------------- | |
| def draw_wrapped_text(draw, text, font, x, y, max_width, fill): | |
| words, lines, line = text.split(), [], [] | |
| for w in words: | |
| test = " ".join(line + [w]) | |
| wsize = draw.textbbox((0,0), test, font=font) | |
| if wsize[2]-wsize[0] <= max_width: line.append(w) | |
| else: lines.append(" ".join(line)); line=[w] | |
| if line: lines.append(" ".join(line)) | |
| cur_y = y | |
| for ln in lines: | |
| draw.text((x, cur_y), ln, font=font, fill=fill) | |
| bbox = draw.textbbox((x, cur_y), ln, font=font) | |
| cur_y += (bbox[3]-bbox[1]) + LINE_SPACING | |
| return cur_y | |
| def circular_avatar(pil_img, size=AVATAR_SIZE): | |
| if pil_img is None: | |
| im = Image.new("RGB", (size, size), (200, 205, 210)) | |
| else: | |
| im = pil_img | |
| im = ImageOps.fit(im, (size, size), method=Image.LANCZOS) | |
| mask = Image.new("L", (size, size), 0) | |
| d = ImageDraw.Draw(mask) | |
| d.ellipse((0, 0, size, size), fill=255) | |
| out = Image.new("RGBA", (size, size), (255, 255, 255, 0)) | |
| out.paste(im, (0, 0), mask) | |
| return out | |
| def random_meta(): | |
| base = datetime.datetime.now() - datetime.timedelta( | |
| days=random.randint(0,14), hours=random.randint(0,23), minutes=random.randint(0,59) | |
| ) | |
| hour = base.strftime("%I").lstrip("0") or "0" | |
| date_str = f"{hour}{base.strftime(':%M %p · %b %d, %Y')}" | |
| device = random.choice(DEVICE_POOL) | |
| comments = random.randint(0, 800) | |
| retweets = random.randint(0, 3500) | |
| likes = random.randint(0, 15000) | |
| return date_str, device, comments, retweets, likes | |
| def render_tweet_image(text, name, handle, avatar_img=None) -> Image.Image: | |
| img = Image.new("RGB", (CANVAS_W, 1200), BG_COLOR) | |
| draw = ImageDraw.Draw(img) | |
| x, y = PADDING, PADDING | |
| avatar = circular_avatar(avatar_img, AVATAR_SIZE) | |
| img.paste(avatar, (x, y), avatar) | |
| nx, ny = x + AVATAR_SIZE + GUTTER, y + 6 | |
| draw.text((nx, ny), name, font=FONT_NAME_BOLD, fill=TEXT_COLOR) | |
| name_bbox = draw.textbbox((nx, ny), name, font=FONT_NAME_BOLD) | |
| name_w = name_bbox[2]-name_bbox[0] | |
| hx, hy = nx + name_w + 18, ny + (FS_NAME - FS_HANDLE) | |
| draw.text((hx, hy), handle, font=FONT_NAME_REG, fill=HANDLE_COLOR) | |
| body_x, body_y = nx, ny + FS_NAME + 20 | |
| max_text_w = CANVAS_W - body_x - PADDING | |
| body_end_y = draw_wrapped_text(draw, text, FONT_BODY, body_x, body_y, max_text_w, TEXT_COLOR) | |
| date_str, device, comments, retweets, likes = random_meta() | |
| meta = f"{date_str} · {device}" | |
| meta_y = body_end_y + 20 | |
| draw.text((body_x, meta_y), meta, font=FONT_META, fill=META_COLOR) | |
| meta_end_y = draw.textbbox((body_x, meta_y), meta, font=FONT_META)[3] | |
| div_y = meta_end_y + 24 | |
| draw.line([(PADDING, div_y), (CANVAS_W - PADDING, div_y)], fill=DIVIDER_COLOR, width=2) | |
| metrics_y = div_y + 26 | |
| metrics = f"{comments} Comments {retweets} Retweets {likes} Likes" | |
| draw.text((body_x, metrics_y), metrics, font=FONT_METRICS, fill=TEXT_COLOR) | |
| end_y = draw.textbbox((body_x, metrics_y), metrics, font=FONT_METRICS)[3] + PADDING | |
| img = img.crop((0, 0, CANVAS_W, end_y)) | |
| return img | |
| # -------------------------- | |
| # FUNÇÕES GRADIO | |
| # -------------------------- | |
| def _load_avatar(avatar_url: Optional[str], avatar_file: Optional[Image.Image]) -> Optional[Image.Image]: | |
| if avatar_file is not None: | |
| try: | |
| return avatar_file.convert("RGB") | |
| except Exception: | |
| pass | |
| if avatar_url: | |
| try: | |
| r = requests.get(avatar_url, timeout=10) | |
| r.raise_for_status() | |
| im = Image.open(io.BytesIO(r.content)).convert("RGB") | |
| return im | |
| except Exception: | |
| pass | |
| return None | |
| def generate_images(text, name, handle, topk, maxlen, impact, avatar_url, avatar_file): | |
| load_models() | |
| if not text or not name or not handle: | |
| raise gr.Error("Preencha: Texto, Nome e @arroba.") | |
| phrases = pick_best_phrases(text, max_len=int(maxlen), top_k=int(topk), impact_strength=int(impact)) | |
| if not phrases: | |
| raise gr.Error("Não encontrei frases ≤ limite de caracteres.") | |
| avatar_img = _load_avatar(avatar_url, avatar_file) | |
| images = [] | |
| for p in phrases: | |
| im = render_tweet_image(p, name, handle, avatar_img) | |
| images.append((p, im)) | |
| # salva ZIP temporário | |
| tmpdir = tempfile.mkdtemp(prefix="tweets_") | |
| zpath = os.path.join(tmpdir, "tweets.zip") | |
| with zipfile.ZipFile(zpath, "w", compression=zipfile.ZIP_DEFLATED) as zf: | |
| for i, (p, im) in enumerate(images, 1): | |
| bio = io.BytesIO() | |
| im.save(bio, format="PNG") | |
| fname = re.sub(r'[^a-zA-Z0-9_-]+', '_', (p[:30] or f"tweet_{i}")).strip("_") | |
| zf.writestr(f"{i:02d}_{fname}.png", bio.getvalue()) | |
| # Apenas imagens para galeria | |
| pil_list = [im for _, im in images] | |
| return pil_list, zpath | |
| with gr.Blocks(title="Tweet Image Generator") as demo: | |
| gr.Markdown("# Tweet Image Generator\nGere imagens estilo tweet a partir de um texto longo.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| text = gr.Textbox(label="Texto de origem", lines=10, placeholder="Cole aqui o texto...") | |
| name = gr.Textbox(label="Nome", value="Elon Musk") | |
| handle = gr.Textbox(label="@arroba", value="@elonmusk") | |
| with gr.Row(): | |
| topk = gr.Slider(1, 6, value=3, step=1, label="Top K (quantas imagens)") | |
| maxlen = gr.Slider(80, 280, value=240, step=1, label="Máx. caracteres") | |
| impact = gr.Slider(0, 100, value=50, step=1, label="Força do Impacto (0=leve, 100=forte)") | |
| avatar_url = gr.Textbox(label="Avatar URL (opcional)") | |
| avatar_file = gr.Image(type="pil", label="Avatar arquivo (opcional)") | |
| btn = gr.Button("Gerar imagens") | |
| with gr.Column(): | |
| gallery = gr.Gallery(label="Imagens geradas", columns=1, height=520) | |
| zip_out = gr.File(label="Baixar ZIP") | |
| btn.click(fn=generate_images, | |
| inputs=[text, name, handle, topk, maxlen, impact, avatar_url, avatar_file], | |
| outputs=[gallery, zip_out]) | |
| if __name__ == "__main__": | |
| demo.launch() | |