Spaces:

leicam
/

twettermaker

Sleeping

App Files Files Community

twettermaker / app.py

leicam

Update app.py

67cb20e verified 3 months ago

raw

history blame contribute delete

16.6 kB

	# -- coding: utf-8 --
	"""
	Tweet Image Web App — Hugging Face Spaces (Gradio)
	--------------------------------------------------
	- Converte o app desktop em uma interface web Gradio para rodar em Spaces.
	- Mantém o pipeline EXTRATIVO (não inventa palavras) e a lógica de seleção de frases.
	- Gera imagens no estilo "tweet" e permite baixar um ZIP com todas.

	Como rodar localmente:
	pip install -r requirements.txt
	python app.py
	# acesse o link http://127.0.0.1:7860

	Em Hugging Face Spaces:
	- Crie um Space com SDK = Gradio (Python 3.10+).
	- Faça upload deste arquivo, do requirements.txt e README.md.
	- O Space inicia automaticamente.
	"""

	import io, re, os, zipfile, random, datetime, tempfile
	from typing import List, Optional, Tuple

	# Imaging / NLP
	from PIL import Image, ImageDraw, ImageFont, ImageOps
	import numpy as np
	from sklearn.metrics.pairwise import cosine_similarity
	from sentence_transformers import SentenceTransformer
	from keybert import KeyBERT
	import requests

	import gradio as gr

	# --------------------------
	# CONFIG GERAL (fonts/estilo)
	# --------------------------
	CANVAS_W = 1600
	PADDING = 80
	BG_COLOR = (255, 255, 255)
	TEXT_COLOR = (15, 20, 25)
	HANDLE_COLOR = (83, 100, 113)
	META_COLOR = (83, 100, 113)
	DIVIDER_COLOR = (239, 243, 244)

	# Ajuste para Linux do container dos Spaces (usa DejaVu)
	FONT_NAME_BOLD_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
	FONT_NAME_REG_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
	FONT_BODY_REG_PATH = FONT_NAME_REG_PATH

	FS_NAME = 54
	FS_HANDLE = 46
	FS_BODY = 60
	FS_META = 42
	FS_METRICS = 44
	AVATAR_SIZE = 140
	GUTTER = 32
	LINE_SPACING = 10

	DEVICE_POOL = ["Twitter for iPhone", "Twitter for Android", "Twitter Web App"]

	# --------------------------
	# Font helper
	# --------------------------
	def font(path, size):
	try:
	return ImageFont.truetype(path, size)
	except Exception:
	return ImageFont.load_default()

	FONT_NAME_BOLD = font(FONT_NAME_BOLD_PATH, FS_NAME)
	FONT_NAME_REG = font(FONT_NAME_REG_PATH, FS_HANDLE)
	FONT_BODY = font(FONT_BODY_REG_PATH, FS_BODY)
	FONT_META = font(FONT_NAME_REG_PATH, FS_META)
	FONT_METRICS = font(FONT_NAME_REG_PATH, FS_METRICS)

	# --------------------------
	# MODELOS NLP (carregados 1x)
	# --------------------------
	EMB_MODEL = None
	KW_MODEL = None

	def load_models():
	global EMB_MODEL, KW_MODEL
	if EMB_MODEL is None or KW_MODEL is None:
	EMB_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
	KW_MODEL = KeyBERT(EMB_MODEL)

	STRONG_TOKENS = {
	"como","por quê","por que","aprendi","nunca","sempre","hoje","resultado",
	"prova","segredo","erro","verdade","atenção","cuidado","evite","descobri",
	"funciona","dados","passo","ganhar","perder","crescer","acertar",
	"fácil","difícil","exemplo","estratégia","tática","processo","prático"
	}
	BEGIN_BAD = r'^(e\|mas\|ou\|então\|daí\|aí\|só que)\b'

	# --------------------------
	# UTIL: limpeza/normalização
	# --------------------------

	# stopwords PT (essenciais) + hedges/fillers. Mantemos negações e números.
	PT_STOP = {
	'a','à','às','o','os','as','um','uma','uns','umas','de','do','da','dos','das','no','na','nos','nas','em','por','para','pra','com','sem','sobre','entre','até','após','antes','desde',
	'que','se','quando','onde','como','qual','quais','quanto','tanto','toda','todo','todas','todos','cada','mais','menos','muito','muitos','muita','muitas',
	'já','ainda','também','só','pois','porque','porquê','né','tipo','basicamente','literalmente','na','no','bem','aliás','então','daí','aí','talvez','acho','meio','um pouco','depois','antes',
	'né?','ok','ok?','certo','certo?','assim','coisa','coisas','etc'
	}

	FILLER_PHRASES = [
	'na verdade','de verdade','de certa forma','no final do dia','de alguma forma','de um jeito','por exemplo','para ser sincero','pra ser sincero','para falar a verdade'
	]

	WEAK_ADVERBS = {'talvez','acho','quase','apenas','somente','só','um pouco','meio'}
	NEGATIONS = {'não','nunca','jamais','sem'}

	LEAD_TRIM = re.compile(r'^(e\|mas\|ou\|então\|daí\|aí\|só que\|agora\|bom\|olha)\b\s*', re.I)
	MULTI_SPACE = re.compile(r'\s{2,}')
	SPACE_PUNCT = re.compile(r'\s+([,.;:!?])')

	TOKEN_RE = re.compile(r"\w+\|[\.,;:!?()]\|[–—-]", re.U)

	def safe_tokens(text: str) -> List[str]:
	return TOKEN_RE.findall(text)

	def is_number(tok: str) -> bool:
	return bool(re.fullmatch(r"\d+[\d.,]*", tok))

	def impact_rewrite_extractive(text: str, strength: int = 50) -> str:
	strength = max(0, min(100, strength))
	keep_ratio = 1.0 - (0.25 + 0.55 * (strength/100.0)) # 0.75..0.2

	s = text.strip()
	s = LEAD_TRIM.sub('', s)
	for fp in FILLER_PHRASES:
	s = re.sub(rf"\b{re.escape(fp)}\b", '', s, flags=re.I)
	s = MULTI_SPACE.sub(' ', s)

	toks = safe_tokens(s)

	scored: List[Tuple[float, int, str]] = []
	for i, t in enumerate(toks):
	tl = t.lower()
	if tl in NEGATIONS or is_number(tl):
	sc = 1.2
	elif tl in STRONG_TOKENS:
	sc = 1.1
	elif tl in WEAK_ADVERBS or tl in PT_STOP:
	sc = 0.2
	elif re.fullmatch(r'[.,;:!?()]', t) or re.fullmatch(r'[–—-]', t):
	sc = 0.6
	else:
	sc = 0.9
	scored.append((sc, i, t))

	n_words = sum(1 for t in toks if re.match(r'\w', t))
	target_words = max(4, int(n_words * keep_ratio))

	word_items = [(sc, i, t) for (sc,i,t) in scored if re.match(r'\w', t)]
	word_items.sort(key=lambda x: (-x[0], x[1]))
	keep_idx = sorted(i for _, i, _ in word_items[:target_words])
	keep_set = set(keep_idx)

	out: List[str] = []
	for i, t in enumerate(toks):
	if re.match(r'\w', t):
	if i in keep_set:
	out.append(t)
	else:
	if out and (i+1 < len(toks) and any(j in keep_set for j in range(i+1, min(i+3, len(toks))))):
	out.append(t)

	sent = ' '.join(out)
	sent = SPACE_PUNCT.sub(r'\1', sent)
	sent = MULTI_SPACE.sub(' ', sent).strip()

	if sent:
	sent = sent[0].upper() + sent[1:]
	return sent

	# --------------------------
	# FRASES CANDIDATAS
	# --------------------------
	def split_sentences(text: str):
	parts = re.split(r'(?<=[\.!\?:;])\s+\|\n+', text.strip())
	return [p.strip() for p in parts if p.strip()]

	def generate_candidates(parts: List[str], max_len=240):
	cand, n = [], len(parts)
	for i in range(n):
	one = parts[i]
	if len(one) <= max_len: cand.append(one)
	if i+1 < n:
	two = (parts[i]+" "+parts[i+1]).strip()
	if len(two) <= max_len: cand.append(two)
	if i+2 < n:
	three = (parts[i]+" "+parts[i+1]+" "+parts[i+2]).strip()
	if len(three) <= max_len: cand.append(three)
	seen, out = set(), []
	for c in cand:
	if c not in seen:
	out.append(c); seen.add(c)
	return out

	# --------------------------
	# SCORING INTELIGENTE
	# --------------------------
	def phrase_score(phrase, idx, keyphrases, doc_emb, ph_emb, max_len=240):
	L = len(phrase)
	center = 160
	len_score = max(0, 1 - abs(L-center)/(max_len-center))

	kp_bonus = 0.0
	lo = phrase.lower()
	for kp,_ in keyphrases:
	if all(w in lo for w in kp.split()):
	kp_bonus += 1.0
	kp_score = min(1.0, kp_bonus/3.0)

	rel = float(cosine_similarity(doc_emb, ph_emb)[0][0])
	rel_score = (rel + 1)/2

	words = re.findall(r'\w+', lo, flags=re.UNICODE)
	strong_hits = sum(1 for w in words if w in STRONG_TOKENS)
	punct_hits = len(re.findall(r'[,:;()]', phrase)) + len(re.findall(r'[–—-]', phrase))
	qmark = "?" in phrase
	exclam = "!" in phrase
	first_person = bool(re.search(r'\b(eu\|meu\|minha\|aprendi\|descobri)\b', lo))
	imperative = bool(re.match(r'^(faça\|evite\|pare\|comece\|teste\|use\|mude\|foque\|aprenda)\b', lo))

	style = 0.2strong_hits + 0.05punct_hits
	if qmark: style += 0.2
	if exclam: style += 0.1
	if first_person: style += 0.15
	if imperative: style += 0.2
	style_score = min(1.0, 0.4 + style)

	pen = 0.0
	if re.match(BEGIN_BAD, lo): pen += 0.25
	if L < 60: pen += 0.2

	pos_score = max(0.5, 1.0 - 0.02*idx)

	total = (1.6len_score + 1.6rel_score + 1.3style_score + 1.0kp_score + 0.5*pos_score) - pen
	return total

	def pick_best_phrases(text: str, max_len=240, top_k=3, impact_strength: int = 50):
	load_models()
	parts = split_sentences(text)
	cands = generate_candidates(parts, max_len)
	if not cands:
	return []

	kw = KW_MODEL.extract_keywords(text, keyphrase_ngram_range=(1,3), stop_words='portuguese', top_n=8)

	doc_emb = EMB_MODEL.encode([text], convert_to_numpy=True, normalize_embeddings=True)
	ph_embs = EMB_MODEL.encode(cands, convert_to_numpy=True, normalize_embeddings=True)

	scored = []
	for idx, (c, emb) in enumerate(zip(cands, ph_embs)):
	s = phrase_score(c, idx, kw, doc_emb, emb.reshape(1,-1), max_len=max_len)
	scored.append((s, c, idx))
	scored.sort(reverse=True)

	shortlist = [c for _,c,_ in scored[:max(12, top_k*4)]]

	shortlist_embs = EMB_MODEL.encode(shortlist, convert_to_numpy=True, normalize_embeddings=True)
	from sklearn.metrics.pairwise import cosine_similarity as cs
	sim_doc = cs(shortlist_embs, doc_emb).reshape(-1)

	selected = []
	cur = int(np.argmax(sim_doc)); selected.append(cur)
	while len(selected) < min(top_k, len(shortlist)):
	remaining = [i for i in range(len(shortlist)) if i not in selected]
	best_i, best_score = None, -1e9
	for i in remaining:
	sim_to_doc = sim_doc[i]
	sim_to_selected = max([cs(shortlist_embs[i].reshape(1,-1),
	shortlist_embs[j].reshape(1,-1))[0][0] for j in selected] + [0])
	mmr = (1-0.6)sim_to_doc - 0.6sim_to_selected
	if mmr > best_score:
	best_score = mmr; best_i = i
	selected.append(best_i)

	final_phrases = []
	for i in selected:
	base = shortlist[i]
	rew = impact_rewrite_extractive(base, strength=impact_strength)
	if not rew:
	rew = base
	rew = rew.strip()
	if len(rew) > max_len:
	rew = rew[:max_len].rstrip()
	final_phrases.append(rew)

	return final_phrases

	# --------------------------
	# RENDER DO TWEET
	# --------------------------
	def draw_wrapped_text(draw, text, font, x, y, max_width, fill):
	words, lines, line = text.split(), [], []
	for w in words:
	test = " ".join(line + [w])
	wsize = draw.textbbox((0,0), test, font=font)
	if wsize[2]-wsize[0] <= max_width: line.append(w)
	else: lines.append(" ".join(line)); line=[w]
	if line: lines.append(" ".join(line))

	cur_y = y
	for ln in lines:
	draw.text((x, cur_y), ln, font=font, fill=fill)
	bbox = draw.textbbox((x, cur_y), ln, font=font)
	cur_y += (bbox[3]-bbox[1]) + LINE_SPACING
	return cur_y

	def circular_avatar(pil_img, size=AVATAR_SIZE):
	if pil_img is None:
	im = Image.new("RGB", (size, size), (200, 205, 210))
	else:
	im = pil_img
	im = ImageOps.fit(im, (size, size), method=Image.LANCZOS)
	mask = Image.new("L", (size, size), 0)
	d = ImageDraw.Draw(mask)
	d.ellipse((0, 0, size, size), fill=255)
	out = Image.new("RGBA", (size, size), (255, 255, 255, 0))
	out.paste(im, (0, 0), mask)
	return out

	def random_meta():
	base = datetime.datetime.now() - datetime.timedelta(
	days=random.randint(0,14), hours=random.randint(0,23), minutes=random.randint(0,59)
	)
	hour = base.strftime("%I").lstrip("0") or "0"
	date_str = f"{hour}{base.strftime(':%M %p · %b %d, %Y')}"
	device = random.choice(DEVICE_POOL)
	comments = random.randint(0, 800)
	retweets = random.randint(0, 3500)
	likes = random.randint(0, 15000)
	return date_str, device, comments, retweets, likes

	def render_tweet_image(text, name, handle, avatar_img=None) -> Image.Image:
	img = Image.new("RGB", (CANVAS_W, 1200), BG_COLOR)
	draw = ImageDraw.Draw(img)
	x, y = PADDING, PADDING

	avatar = circular_avatar(avatar_img, AVATAR_SIZE)
	img.paste(avatar, (x, y), avatar)

	nx, ny = x + AVATAR_SIZE + GUTTER, y + 6
	draw.text((nx, ny), name, font=FONT_NAME_BOLD, fill=TEXT_COLOR)
	name_bbox = draw.textbbox((nx, ny), name, font=FONT_NAME_BOLD)
	name_w = name_bbox[2]-name_bbox[0]

	hx, hy = nx + name_w + 18, ny + (FS_NAME - FS_HANDLE)
	draw.text((hx, hy), handle, font=FONT_NAME_REG, fill=HANDLE_COLOR)

	body_x, body_y = nx, ny + FS_NAME + 20
	max_text_w = CANVAS_W - body_x - PADDING
	body_end_y = draw_wrapped_text(draw, text, FONT_BODY, body_x, body_y, max_text_w, TEXT_COLOR)

	date_str, device, comments, retweets, likes = random_meta()
	meta = f"{date_str} · {device}"
	meta_y = body_end_y + 20
	draw.text((body_x, meta_y), meta, font=FONT_META, fill=META_COLOR)
	meta_end_y = draw.textbbox((body_x, meta_y), meta, font=FONT_META)[3]

	div_y = meta_end_y + 24
	draw.line([(PADDING, div_y), (CANVAS_W - PADDING, div_y)], fill=DIVIDER_COLOR, width=2)

	metrics_y = div_y + 26
	metrics = f"{comments} Comments {retweets} Retweets {likes} Likes"
	draw.text((body_x, metrics_y), metrics, font=FONT_METRICS, fill=TEXT_COLOR)
	end_y = draw.textbbox((body_x, metrics_y), metrics, font=FONT_METRICS)[3] + PADDING

	img = img.crop((0, 0, CANVAS_W, end_y))
	return img

	# --------------------------
	# FUNÇÕES GRADIO
	# --------------------------
	def _load_avatar(avatar_url: Optional[str], avatar_file: Optional[Image.Image]) -> Optional[Image.Image]:
	if avatar_file is not None:
	try:
	return avatar_file.convert("RGB")
	except Exception:
	pass
	if avatar_url:
	try:
	r = requests.get(avatar_url, timeout=10)
	r.raise_for_status()
	im = Image.open(io.BytesIO(r.content)).convert("RGB")
	return im
	except Exception:
	pass
	return None

	def generate_images(text, name, handle, topk, maxlen, impact, avatar_url, avatar_file):
	load_models()
	if not text or not name or not handle:
	raise gr.Error("Preencha: Texto, Nome e @arroba.")

	phrases = pick_best_phrases(text, max_len=int(maxlen), top_k=int(topk), impact_strength=int(impact))
	if not phrases:
	raise gr.Error("Não encontrei frases ≤ limite de caracteres.")

	avatar_img = _load_avatar(avatar_url, avatar_file)

	images = []
	for p in phrases:
	im = render_tweet_image(p, name, handle, avatar_img)
	images.append((p, im))

	# salva ZIP temporário
	tmpdir = tempfile.mkdtemp(prefix="tweets_")
	zpath = os.path.join(tmpdir, "tweets.zip")
	with zipfile.ZipFile(zpath, "w", compression=zipfile.ZIP_DEFLATED) as zf:
	for i, (p, im) in enumerate(images, 1):
	bio = io.BytesIO()
	im.save(bio, format="PNG")
	fname = re.sub(r'[^a-zA-Z0-9_-]+', '_', (p[:30] or f"tweet_{i}")).strip("_")
	zf.writestr(f"{i:02d}_{fname}.png", bio.getvalue())

	# Apenas imagens para galeria
	pil_list = [im for _, im in images]
	return pil_list, zpath

	with gr.Blocks(title="Tweet Image Generator") as demo:
	gr.Markdown("# Tweet Image Generator\nGere imagens estilo tweet a partir de um texto longo.")
	with gr.Row():
	with gr.Column():
	text = gr.Textbox(label="Texto de origem", lines=10, placeholder="Cole aqui o texto...")
	name = gr.Textbox(label="Nome", value="Elon Musk")
	handle = gr.Textbox(label="@arroba", value="@elonmusk")
	with gr.Row():
	topk = gr.Slider(1, 6, value=3, step=1, label="Top K (quantas imagens)")
	maxlen = gr.Slider(80, 280, value=240, step=1, label="Máx. caracteres")
	impact = gr.Slider(0, 100, value=50, step=1, label="Força do Impacto (0=leve, 100=forte)")
	avatar_url = gr.Textbox(label="Avatar URL (opcional)")
	avatar_file = gr.Image(type="pil", label="Avatar arquivo (opcional)")
	btn = gr.Button("Gerar imagens")
	with gr.Column():
	gallery = gr.Gallery(label="Imagens geradas", columns=1, height=520)
	zip_out = gr.File(label="Baixar ZIP")

	btn.click(fn=generate_images,
	inputs=[text, name, handle, topk, maxlen, impact, avatar_url, avatar_file],
	outputs=[gallery, zip_out])

	if __name__ == "__main__":
	demo.launch()