VNEWS

Running

App Files Files Community

VNEWS / ai_patch.py

bep40

Squash main

4b6e868 about 13 hours ago

Raw

History Blame Contribute Delete

37.2 kB

	import os
	import re
	import time
	import random
	import json
	import html as html_lib
	import subprocess
	import requests
	import hashlib
	import ai_ext as base
	from ai_ext import app
	from fastapi import Request
	from fastapi.responses import JSONResponse, HTMLResponse, FileResponse
	from bs4 import BeautifulSoup
	from urllib.parse import quote_plus

	try:
	from PIL import Image, ImageDraw, ImageFont
	except Exception:
	Image = ImageDraw = ImageFont = None


	def _clean(s):
	s = html_lib.unescape(s or "")
	s = re.sub(r"[ \t]+", " ", s)
	s = re.sub(r"\n{3,}", "\n\n", s)
	return s.strip()


	def _norm(s):
	s = s.lower()
	s = re.sub(r"[^\wÀ-ỹ\s]", " ", s)
	s = re.sub(r"\s+", " ", s).strip()
	return s


	def _similar(a, b):
	ta = set(_norm(a).split())
	tb = set(_norm(b).split())
	if not ta or not tb:
	return False
	return len(ta & tb) / max(1, min(len(ta), len(tb))) >= 0.72


	def _dedupe_units(units, max_units=25):
	"""Deduplicate units - only skip exact matches to ensure all bullet points are read."""
	out, seen = [], set()
	for u in units:
	u = _clean(re.sub(r"^[-•*\d\.\)\s]+", "", u))
	if len(u) < 18:
	continue
	nu = _norm(u)
	# Only skip exact matches, NOT similar content (to avoid skipping valid bullet points)
	if nu in seen:
	continue
	seen.add(nu)
	out.append(u)
	if len(out) >= max_units:
	break
	return out


	def _postprocess_ai_text(text, max_units=20):
	text = _clean(text)
	if not text:
	return text
	drop_prefixes = (
	"dưới đây", "sau đây", "bài viết", "tôi sẽ", "mình sẽ",
	"tóm tắt bài", "tiêu đề:", "sapo:", "nội dung:", "kết luận:"
	)
	raw_lines = []
	for line in re.split(r"\n+", text):
	line = _clean(line)
	if not line:
	continue
	low = line.lower().strip()
	if any(low.startswith(p) and len(line) < 80 for p in drop_prefixes):
	continue
	raw_lines.append(line)
	units = []
	for line in raw_lines:
	# KEEP FULL bullet point - don't truncate or split into segments
	if len(line) >= 18:
	units.append(_clean(re.sub(r"^[-•*\d\.\)\s]+", "", line)))
	units = _dedupe_units(units, max_units=max_units)
	if not units:
	return text[:900]
	title = ""
	if raw_lines and len(raw_lines[0]) <= 90 and not raw_lines[0].startswith(("-", "•", "*")):
	title = raw_lines[0]
	units = [u for u in units if not _similar(u, title)]
	body = "\n".join("• " + u for u in units[:max_units])
	return (title + "\n\n" + body).strip() if title else body


	def _fallback_summary_from_prompt(prompt, max_units=6):
	text = prompt or ""
	for marker in ["Nội dung nguồn:", "Nội dung bài:", "Nội dung gốc:", "Nội dung:", "Nguồn/bối cảnh internet:"]:
	if marker in text:
	text = text.split(marker, 1)[1]
	break
	text = re.sub(r"https?://\S+", "", text)
	text = re.sub(r"\s+", " ", text).strip()
	sentences = re.split(r"(?<=[\.\!\?])\s+(?=[A-ZÀ-Ỹ0-9])", text)
	candidates = []
	for s in sentences:
	s = _clean(s)
	if 45 <= len(s) <= 260:
	candidates.append(s)
	units = _dedupe_units(candidates, max_units=max_units)
	if units:
	return "\n".join("• " + u for u in units)
	if text:
	return "• " + text[:700].rsplit(" ", 1)[0]
	return "• Không có đủ nội dung nguồn để tóm tắt."


	def _source_line(sources):
	names = []
	for s in (sources or [])[:5]:
	via = s.get("via") or base._domain(s.get("url", "")) or s.get("title", "")
	if via and via not in names:
	names.append(via)
	return "Nguồn tham khảo: " + ", ".join(names[:5]) if names else "Nguồn tham khảo: tổng hợp internet"


	def _make_summary_prompt(title, raw, source_hint=""):
	return f"""Bạn là biên tập viên tóm tắt tin tức tiếng Việt.

	NHIỆM VỤ BẮT BUỘC:
	- Chỉ TÓM TẮT nội dung chính, KHÔNG viết lại toàn bộ bài.
	- Không lặp lại cùng một ý, cùng một câu, cùng một chi tiết.
	- Không thêm thông tin ngoài nguồn.
	- Tối đa 5 gạch đầu dòng, mỗi gạch đầu dòng 1 câu ngắn.
	- Nếu bài có số liệu/nhân vật/thời điểm quan trọng thì giữ lại.
	- Không viết phần mở bài dài, không viết văn kể lại.

	Tiêu đề nguồn: {title}
	Nguồn: {source_hint}

	Nội dung nguồn:
	{raw[:14000]}
	"""


	def _direct_news_rss(topic, limit=10):
	out = []
	try:
	url = "https://news.google.com/rss/search?q=" + quote_plus(topic) + "&hl=vi&gl=VN&ceid=VN:vi"
	r = requests.get(url, headers=base.HEADERS, timeout=15)
	r.encoding = "utf-8"
	soup = BeautifulSoup(r.text, "xml")
	for it in soup.find_all("item")[:limit]:
	title = it.find("title").get_text(" ", strip=True) if it.find("title") else ""
	link = it.find("link").get_text(strip=True) if it.find("link") else ""
	src = it.find("source").get_text(" ", strip=True) if it.find("source") else base._domain(link)
	if title and link:
	out.append({"title": title, "url": link, "via": src, "excerpt": title})
	except Exception:
	pass
	return out


	def _topic_source_articles(topic, limit=5):
	"""Return actual scraped article bodies for a topic. Each source becomes one Wall AI post."""
	try:
	_ctx, sources = base.web_context(topic, limit=limit)
	except Exception:
	sources = []
	if not sources:
	sources = _direct_news_rss(topic, limit=10)
	out, seen = [], set()
	for s in (sources or [])[:limit * 3]:
	url = s.get("url") or ""
	if not url.startswith("http") or url in seen:
	continue
	seen.add(url)
	try:
	page = base.scrape_any_url(url)
	raw = (page.get("summary", "") + "\n" + page.get("text", "")).strip()
	if len(raw) < 180:
	continue
	title = page.get("title") or s.get("title") or url
	via = page.get("via") or s.get("via") or base._domain(url)
	out.append({
	"title": title,
	"url": url,
	"raw": raw,
	"image": page.get("image") or "",
	"via": via,
	"source": {"title": title, "url": url, "excerpt": raw[:700], "via": via}
	})
	if len(out) >= limit:
	break
	except Exception:
	continue
	if not out:
	for s in (sources or _direct_news_rss(topic, 6))[:limit]:
	title = s.get("title") or topic
	excerpt = s.get("excerpt") or s.get("description") or s.get("content") or title
	url = s.get("url", "")
	via = s.get("via") or base._domain(url)
	out.append({
	"title": title,
	"url": url,
	"raw": excerpt,
	"image": base.pollinations_image_url(title),
	"via": via,
	"source": {"title": title, "url": url, "excerpt": excerpt[:700], "via": via}
	})
	return out[:limit]


	async def qwen_generate_resilient(prompt: str, image_url=None, max_tokens: int = 1200):
	errors = []
	token = base._hf_token()
	try:
	original = getattr(base, "_original_qwen_generate", None)
	if original:
	txt = await original(prompt, image_url=image_url, max_tokens=max_tokens)
	if txt:
	base.LAST_QWEN_ERROR = ""
	return txt
	if getattr(base, "LAST_QWEN_ERROR", ""):
	errors.append("sdk: " + str(base.LAST_QWEN_ERROR)[:260])
	except Exception as e:
	errors.append(f"sdk: {type(e).__name__}: {str(e)[:260]}")
	if token:
	models = []
	for m in [
	os.getenv("QWEN_VL_MODEL", ""),
	"Qwen/Qwen2.5-VL-7B-Instruct",
	"Qwen/Qwen2.5-VL-3B-Instruct",
	"Qwen/Qwen2.5-7B-Instruct",
	"Qwen/Qwen2.5-3B-Instruct",
	"Qwen/Qwen2.5-1.5B-Instruct",
	]:
	if m and m not in models:
	models.append(m)
	headers = {"Authorization": "Bearer " + token, "Content-Type": "application/json"}
	for model in models:
	try:
	is_vl = "VL" in model and bool(image_url)
	user_content = ([{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}] if is_vl else prompt)
	payload = {
	"model": model,
	"messages": [
	{"role": "system", "content": "Bạn là biên tập viên AI tiếng Việt. Chỉ tóm tắt súc tích nội dung nguồn, không viết lại toàn bài, không lặp ý, không bịa chi tiết."},
	{"role": "user", "content": user_content},
	],
	"max_tokens": min(int(max_tokens or 900), 1400),
	"temperature": 0.35,
	"top_p": 0.85,
	}
	r = requests.post("https://router.huggingface.co/v1/chat/completions", headers=headers, json=payload, timeout=95)
	if r.status_code >= 300:
	errors.append(f"{model}: HTTP {r.status_code} {r.text[:180]}")
	continue
	j = r.json()
	txt = (j.get("choices", [{}])[0].get("message", {}).get("content") or "").strip()
	if txt:
	base.LAST_QWEN_ERROR = ""
	return txt
	errors.append(f"{model}: empty response")
	except Exception as e:
	errors.append(f"{model}: {type(e).__name__}: {str(e)[:220]}")
	else:
	errors.append("missing HF_TOKEN")
	base.LAST_QWEN_ERROR = " \| ".join(errors[-6:]) or "Qwen unavailable; used extractive fallback"
	print("[qwen resilient fallback]", base.LAST_QWEN_ERROR)
	return _fallback_summary_from_prompt(prompt, max_units=12)


	if not hasattr(base, "_original_qwen_generate"):
	base._original_qwen_generate = base.qwen_generate
	base.qwen_generate = qwen_generate_resilient


	@app.get('/api/wall')
	def compat_wall():
	return JSONResponse({'posts': base._load_ai_wall()[:80]})


	_PATCHED_PATHS = {
	('/api/topic_post', 'POST'),
	('/api/url_wall', 'POST'),
	('/api/rewrite_share', 'POST'),
	('/api/ai/short/{post_id}', 'POST'),
	}
	app.router.routes = [
	r for r in app.router.routes
	if not any(getattr(r, 'path', None) == p and m in getattr(r, 'methods', set()) for p, m in _PATCHED_PATHS)
	]


	@app.post('/api/topic_post')
	async def compat_topic_post(request: Request):
	body = await request.json()
	topic = base._clean_text(body.get('topic', ''))
	if not topic:
	return JSONResponse({'error': 'missing topic'}, status_code=400)
	articles = _topic_source_articles(topic, limit=4)
	if not articles:
	return JSONResponse({'error': 'Không lấy được bài viết nguồn cho chủ đề này.'}, status_code=422)
	new_posts = []
	posts = base._load_ai_wall()
	for art in articles:
	prompt = f"""Tóm tắt RIÊNG bài viết nguồn sau để đăng Tường AI.

	Chủ đề lọc: {topic}
	Tiêu đề bài nguồn: {art['title']}
	Nguồn: {art['via']}

	Yêu cầu bắt buộc:
	- Tóm tắt nội dung trong BÀI VIẾT này, không chỉ tiêu đề.
	- Không trộn với bài khác.
	- Không viết lại toàn bộ bài.
	- Không lặp ý.
	- 4-6 gạch đầu dòng, mỗi dòng 1 câu rõ ràng.
	- Giữ số liệu/nhân vật/thời điểm quan trọng nếu có.

	Nội dung bài:
	{art['raw'][:14000]}"""
	text = await base.qwen_generate(prompt, image_url=art.get('image') or None, max_tokens=1500)
	text = _postprocess_ai_text(text, max_units=20)
	src = [art['source']]
	if 'Nguồn tham khảo:' not in text:
	text += "\n\n" + _source_line(src)
	post = base.make_post(art['title'], text, art.get('image') or base.pollinations_image_url(art['title']), art.get('url') or '', 'topic_article', sources=src)
	new_posts.append(post)
	posts = new_posts + posts
	base._save_ai_wall(posts)
	return JSONResponse({'post': new_posts[0], 'posts': new_posts, 'count': len(new_posts)})


	@app.post('/api/url_wall')
	async def compat_url_wall(request: Request):
	body = await request.json()
	url = base._clean_text(body.get('url', ''))
	if not url.startswith('http'):
	return JSONResponse({'error': 'missing url'}, status_code=400)
	try:
	data = base.scrape_any_url(url)
	except Exception as e:
	return JSONResponse({'error': 'Không scrape được URL: ' + str(e)[:180]}, status_code=422)
	raw = (data.get('summary', '') + '\n' + data.get('text', '')).strip()
	if len(raw) < 120:
	return JSONResponse({'error': 'URL không có đủ nội dung để tóm tắt'}, status_code=422)
	prompt = _make_summary_prompt(data.get('title', ''), raw, data.get('via', '') or base._domain(url))
	text = await base.qwen_generate(prompt, image_url=data.get('image') or None, max_tokens=1500)
	text = _postprocess_ai_text(text, max_units=20)
	src = [{'title': data.get('title'), 'url': url, 'excerpt': raw[:500], 'via': data.get('via') or base._domain(url)}]
	if 'Nguồn tham khảo:' not in text:
	text += "\n\n" + _source_line(src)
	post = base.make_post(data.get('title') or 'Bài viết', text, data.get('image') or '', url, 'url', sources=src)
	posts = base._load_ai_wall(); posts.insert(0, post); base._save_ai_wall(posts)
	return JSONResponse({'post': post})


	def _is_relevant_image(img_url, title, text):
	"""Check if an image is relevant to the article content."""
	if not img_url:
	return False
	skip_patterns = ['pixel', 'analytics', 'tracking', '1x1.gif', 'spacer.gif',
	'logo', 'icon', 'avatar', 'emoji', 'smiley', 'sprite',
	'advertisement', 'ad-banner', 'sponsored', 'banner-ads']
	img_lower = img_url.lower()
	for p in skip_patterns:
	if p in img_lower:
	return False
	if not any(img_lower.endswith(ext) for ext in ['.jpg', '.jpeg', '.png', '.webp', '.gif']):
	return False
	return True


	def _filter_relevant_images(images, title, text, max_images=8):
	"""Filter and rank images by relevance to article content."""
	if not images:
	return []
	seen = set()
	relevant = []
	for img in images:
	if img in seen:
	continue
	seen.add(img)
	if _is_relevant_image(img, title, text):
	relevant.append(img)
	return relevant[:max_images]


	def _extract_key_points_for_slides(paragraphs, max_points=12):
	"""Extract key points from paragraphs for slides - extracts ALL sentences, not just first one."""
	points = []
	for p in paragraphs:
	if len(points) >= max_points:
	break
	p = _clean(p)
	if not p:
	continue
	# Split paragraph into sentences using Vietnamese + English punctuation - GET ALL SENTENCES
	sentences = re.split(r'(?<=[.!?])\s+(?=[A-ZÀ-Ỹ0-9])', p)
	sentences = [s.strip() for s in sentences if s.strip()]

	for sentence in sentences:
	if len(points) >= max_points:
	break
	sentence = _clean(sentence)
	if len(sentence) < 30:
	continue
	if any(sentence[:60] in existing for existing in points):
	continue
	if not sentence.endswith(('.', '!', '?')):
	sentence = sentence + '.'
	points.append(sentence)
	return points


	def _scrape_article_images(url):
	"""Scrape article page and return only relevant images."""
	try:
	headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
	"Accept-Language": "vi-VN,vi;q=0.9,en;q=0.8"}
	r = requests.get(url, headers=headers, timeout=15, allow_redirects=True)
	r.encoding = 'utf-8'
	soup = BeautifulSoup(r.text, 'lxml')
	for tag in soup.find_all(['script', 'style', 'nav', 'footer', 'aside', 'form']):
	tag.decompose()
	h1 = soup.find('h1')
	ogt = soup.find('meta', property='og:title')
	title = (h1.get_text(strip=True) if h1 else '') or (ogt.get('content', '') if ogt else '')
	ogi = soup.find('meta', property='og:image')
	og_img = ogi.get('content', '') if ogi else ''
	if og_img and og_img.startswith('//'):
	og_img = 'https:' + og_img
	block = None
	for sel in ['article', '.singular-content', '.detail-content', '.fck_detail', '.content-detail', '.knc-content', 'main', '.cms-body', '.article__body']:
	el = soup.select_one(sel)
	if el and len(el.find_all('p')) >= 2:
	block = el
	break
	if not block:
	block = soup.body or soup
	paragraphs = []
	all_images = []
	seen_imgs = set()
	if og_img and og_img not in seen_imgs:
	all_images.append(og_img)
	seen_imgs.add(og_img)
	for el in block.find_all(['p', 'h2', 'h3', 'figure', 'img'], recursive=True):
	if el.name == 'p':
	t = _clean(el.get_text(strip=True))
	if t and len(t) > 40:
	paragraphs.append(t)
	elif el.name in ('figure', 'img'):
	im = el if el.name == 'img' else el.find('img')
	if im:
	src = im.get('data-src') or im.get('src') or im.get('data-original') or ''
	if src and 'base64' not in src:
	if src.startswith('//'):
	src = 'https:' + src
	if src not in seen_imgs:
	all_images.append(src)
	seen_imgs.add(src)
	relevant_images = _filter_relevant_images(all_images, title, ' '.join(paragraphs[:5]))
	return {'title': _clean(title), 'paragraphs': paragraphs, 'images': relevant_images, 'og_img': og_img}
	except Exception:
	return None


	@app.post('/api/rewrite_share')
	async def compat_rewrite_share(request: Request):
	body = await request.json()
	url = base._clean_text(body.get('url', ''))
	if not url.startswith('http'):
	return JSONResponse({'error': 'missing url'}, status_code=400)
	try:
	data = base.scrape_any_url(url)
	except Exception as e:
	return JSONResponse({'error': 'Không đọc được bài viết: ' + str(e)[:180]}, status_code=422)
	raw = (data.get('summary', '') + '\n' + data.get('text', '')).strip()
	if len(raw) < 120:
	return JSONResponse({'error': 'Bài viết không đủ nội dung để tóm tắt'}, status_code=422)
	prompt = _make_summary_prompt(data.get('title', ''), raw, data.get('via', '') or base._domain(url))
	text = await base.qwen_generate(prompt, image_url=data.get('image') or None, max_tokens=1500)
	text = _postprocess_ai_text(text, max_units=20)
	src = [{'title': data.get('title'), 'url': url, 'excerpt': raw[:500], 'via': data.get('via') or base._domain(url)}]
	if 'Nguồn tham khảo:' not in text:
	text += "\n\n" + _source_line(src)
	post = base.make_post(data.get('title') or 'Bài viết', text, data.get('image') or '', url, 'summary', sources=src)
	posts = base._load_ai_wall(); posts.insert(0, post); base._save_ai_wall(posts)

	# Generate slides with relevant images only
	slides = []
	page_data = _scrape_article_images(url)
	if page_data and page_data.get('paragraphs'):
	key_points = _extract_key_points_for_slides(page_data['paragraphs'], max_points=12)
	if key_points:
	relevant_imgs = page_data.get('images', [])
	if not relevant_imgs and page_data.get('og_img'):
	relevant_imgs = [page_data['og_img']]
	for i, point in enumerate(key_points):
	img = relevant_imgs[i] if i < len(relevant_imgs) else (relevant_imgs[-1] if relevant_imgs else '')
	slides.append({'text': point, 'image': img, 'index': i + 1})

	return JSONResponse({'post': post, 'slides': slides})


	def _emotion_script(text, emotion):
	"""Prepend emotion-appropriate prefix to text based on emotion type.

	NOTE: Prefix is NOT added to avoid cluttering Short AI speech.
	The emotion is still used for voice selection but content is read cleanly.
	"""
	text = _clean(text)
	# REMOVED: No prefix added to keep content clean and natural
	return text


	def _tts_script_smart(post, emotion):
	raw = base._short_script(post) if hasattr(base, '_short_script') else _clean(post.get('text', '') or post.get('title', ''))
	raw = re.sub(r"^[•\-\]\s", "", raw, flags=re.M)
	raw = re.sub(r"\s\n\s", ". ", raw)
	raw = re.sub(r"([\.\!\?])\s*", r"\1\n", raw)
	raw = re.sub(r"\n{2,}", "\n", raw).strip()
	# REMOVED: _emotion_script call - read content cleanly without prefix
	# INCREASED to 3000 to read full content of all bullet points
	if len(raw) > 3000:
	raw = raw[:3000]
	cut = max(raw.rfind("."), raw.rfind("!"), raw.rfind("?"))
	if cut > 700:
	raw = raw[:cut + 1]
	return raw


	def _split_subtitle_sentences(script):
	parts = []
	for line in script.splitlines():
	line = _clean(line)
	if not line:
	continue
	for s in re.split(r"(?<=[\.\!\?])\s+", line):
	s = _clean(s)
	if 8 <= len(s) <= 140:
	parts.append(s)
	return parts[:12]


	def _srt_time(sec):
	ms = int((sec - int(sec)) * 1000)
	sec = int(sec)
	h = sec // 3600
	m = (sec % 3600) // 60
	s = sec % 60
	return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"


	def _write_srt(script, path, total_duration=30):
	subs = _split_subtitle_sentences(script)
	if not subs:
	subs = [script[:120]]
	dur = max(2.2, min(5.0, total_duration / max(1, len(subs))))
	cur = 0.3
	with open(path, 'w', encoding='utf-8') as f:
	for i, s in enumerate(subs, 1):
	start = cur
	end = cur + dur
	cur = end + 0.15
	f.write(f"{i}\n{_srt_time(start)} --> {_srt_time(end)}\n{s}\n\n")


	def _wrap_text_px(draw, text, font, max_width, max_lines):
	words = _clean(text).split()
	lines, cur = [], ""
	for w in words:
	test = (cur + " " + w).strip()
	try:
	width = draw.textbbox((0, 0), test, font=font)[2]
	except Exception:
	width = len(test) * 20
	if width <= max_width:
	cur = test
	else:
	if cur:
	lines.append(cur)
	cur = w
	if len(lines) >= max_lines:
	break
	if cur and len(lines) < max_lines:
	lines.append(cur)
	return lines


	def _make_short_frame_full(post, img_path, out_path):
	if Image is None:
	return base._make_short_frame(post, img_path, out_path)
	W, H = 1080, 1920
	bg = Image.new("RGB", (W, H), (14, 14, 14))
	try:
	im = Image.open(img_path).convert("RGB")
	target = (1080, 760)
	im_ratio = im.width / im.height
	target_ratio = target[0] / target[1]
	if im_ratio > target_ratio:
	new_h = target[1]
	new_w = int(new_h * im_ratio)
	else:
	new_w = target[0]
	new_h = int(new_w / im_ratio)
	im = im.resize((new_w, new_h))
	left = (new_w - target[0]) // 2
	top = (new_h - target[1]) // 2
	im = im.crop((left, top, left + target[0], top + target[1]))
	bg.paste(im, (0, 0))
	except Exception:
	pass
	draw = ImageDraw.Draw(bg)
	try:
	font_title = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 54)
	font_body = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 38)
	font_label = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 30)
	except Exception:
	font_title = font_body = font_label = None
	draw.rectangle((0, 720, W, H), fill=(14, 14, 14))
	margin = 48
	maxw = W - margin * 2
	draw.text((margin, 770), "VNEWS · Tường AI", fill=(92, 184, 122), font=font_label)
	y = 830
	for ln in _wrap_text_px(draw, post.get("title", ""), font_title, maxw, 4):
	draw.text((margin, y), ln, fill=(255, 255, 255), font=font_title)
	y += 66
	y += 18
	text = post.get("text", "")
	text = re.sub(r"Nguồn tham khảo:.*", "", text, flags=re.S).strip()
	body_lines = _wrap_text_px(draw, text, font_body, maxw, 14)
	for ln in body_lines:
	draw.text((margin, y), ln, fill=(220, 220, 220), font=font_body)
	y += 50
	if y > 1640:
	break
	bg.save(out_path, quality=92)




	def _summary_segments_from_post(post, max_segments=25):
	raw = _clean(post.get('text') or post.get('title') or '')
	raw = re.sub(r'^Bản tin AI viết lại:\s*', '', raw, flags=re.I)
	raw = re.sub(r'Nguồn tham khảo:.*$', '', raw, flags=re.I\|re.S).strip()
	lines=[]
	for ln in raw.splitlines():
	ln=_clean(re.sub(r'^[•\-\*\d\.\)\s]+','',ln))
	if not ln: continue
	low=ln.lower()
	if low.startswith(('điểm chính','tiêu đề','sapo','nguồn tham khảo')): continue
	if len(ln)>=18: lines.append(ln)
	if len(lines)<3:
	lines=[]
	for s in re.split(r'(?<=[\.\!\?])\s+', raw):
	s=_clean(s)
	if len(s)>=25: lines.append(s)
	segs=_dedupe_units(lines, max_units=max_segments)
	return segs[:max_segments] if segs else [post.get('title','Bản tin VNEWS')]


	def _make_scene_frame(post, segment, idx, total, img_path, out_path, emotion='neutral'):
	if Image is None:
	return _make_short_frame_full(post, img_path, out_path)
	W,H=1080,1920
	bg=Image.new('RGB',(W,H),(10,10,10))
	try:
	im=Image.open(img_path).convert('RGB')
	ratio=im.width/max(1,im.height); target=W/H
	if ratio>target:
	nh=H; nw=int(nh*ratio)
	else:
	nw=W; nh=int(nw/ratio)
	cover=im.resize((nw,nh)); left=(nw-W)//2; top=(nh-H)//2
	cover=cover.crop((left,top,left+W,top+H))
	bg.paste(cover,(0,0))
	bg=Image.blend(bg, Image.new('RGB',(W,H),(0,0,0)), 0.50)
	hero_h=720; target=W/hero_h
	if ratio>target:
	nh=hero_h; nw=int(nh*ratio)
	else:
	nw=W; nh=int(nw/ratio)
	hero=im.resize((nw,nh)); left=(nw-W)//2; top=(nh-hero_h)//2
	hero=hero.crop((left,top,left+W,top+hero_h))
	bg.paste(hero,(0,0))
	except Exception:
	pass
	draw=ImageDraw.Draw(bg)
	try:
	font_brand=ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf',34)
	font_small=ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf',28)
	font_seg=ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf',58)
	font_title=ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf',34)
	except Exception:
	font_brand=font_small=font_seg=font_title=None
	draw.rectangle((0,680,W,H), fill=(12,12,12))
	dot_x=48; dot_y=742
	for i in range(total):
	fill=(92,184,122) if i==idx else (70,70,70)
	draw.rounded_rectangle((dot_x+i38,dot_y,dot_x+i38+24,dot_y+10), radius=5, fill=fill)
	draw.text((48,780),'VNEWS AI SHORT',fill=(110,231,143),font=font_brand)
	draw.rounded_rectangle((48,834,260,880), radius=20, fill=(28,70,45))
	draw.text((66,842),f'Đoạn {idx+1}/{total}',fill=(235,235,235),font=font_small)
	y=940; maxw=W-96
	# INCREASED from 12 to 18 for full content display - each key point can span multiple lines
	for ln in _wrap_text_px(draw, segment, font_seg, maxw, 18):
	draw.text((48,y),ln,fill=(255,255,255),font=font_seg)
	y+=74
	if y>1500: break
	y2=1640
	draw.line((48,y2-22,W-48,y2-22),fill=(70,70,70),width=2)
	for ln in _wrap_text_px(draw, post.get('title',''), font_title, maxw, 3):
	draw.text((48,y2),ln,fill=(220,220,220),font=font_title)
	y2+=46
	bg.save(out_path, quality=92)


	def _estimate_audio_duration(path, fallback=15.0):
	"""Estimate audio duration with 15s minimum per segment for complete bullet reading."""
	try:
	pr=subprocess.run(['ffprobe','-v','error','-show_entries','format=duration','-of','default=noprint_wrappers=1:no_key=1',path], stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=20)
	return max(12.0, float((pr.stdout or b'').decode().strip() or fallback))
	except Exception:
	return fallback


	@app.post('/api/ai/short/{post_id}')
	async def patched_ai_short(post_id: str, request: Request):
	try:
	body = await request.json()
	except Exception:
	body = {}
	voice = str(body.get('voice', 'nu')).strip().lower()
	emotion = str(body.get('emotion', 'neutral')).strip().lower()
	speed = float(body.get('speed', 1.0) or 1.0)
	speed = max(0.85, min(1.35, speed))

	posts = base._load_ai_wall()
	post = next((p for p in posts if str(p.get('id')) == str(post_id)), None)
	if not post:
	return JSONResponse({'error': 'post not found'}, status_code=404)

	segments = _summary_segments_from_post(post, max_segments=25)
	seg_hash = hashlib.md5(('\|'.join(segments)+voice+emotion+str(speed)).encode('utf-8')).hexdigest()[:8]
	os.makedirs(base.SHORTS_DIR, exist_ok=True)
	suffix = f"_{voice}_{emotion}_{str(speed).replace('.', 'p')}_{seg_hash}_scenes_nosub"
	out_mp4 = os.path.join(base.SHORTS_DIR, base._safe_name(post_id + suffix) + '.mp4')
	if os.path.exists(out_mp4):
	post['video'] = '/api/ai/short-file/' + post_id + suffix
	post['short_voice'] = voice
	post['short_emotion'] = emotion
	post['short_speed'] = speed
	post['short_segments'] = segments
	post['short_subtitles'] = False
	base._save_ai_wall(posts)
	return JSONResponse({'video': post['video'], 'voice': voice, 'emotion': emotion, 'speed': speed, 'subtitles': False, 'segments': segments})
	if base.gTTS is None:
	return JSONResponse({'error': 'gTTS chưa sẵn sàng'}, status_code=503)

	work = os.path.join(base.SHORTS_DIR, base._safe_name(post_id + suffix))
	os.makedirs(work, exist_ok=True)
	img = os.path.join(work, 'image.jpg')
	try:
	base._download_image(post.get('img'), post.get('title', 'AI news'), img)
	edge_voice = {
	# Vietnamese
	'vi-vn-hoaimyneural': 'vi-VN-HoaiMyNeural',
	'vi-vn-namminhneural': 'vi-VN-NamMinhNeural',
	'hoaimy': 'vi-VN-HoaiMyNeural',
	'namminh': 'vi-VN-NamMinhNeural',
	'nam': 'vi-VN-NamMinhNeural',
	'male': 'vi-VN-NamMinhNeural',
	'nu': 'vi-VN-HoaiMyNeural',
	'female': 'vi-VN-HoaiMyNeural',
	'mien-nam': 'vi-VN-HoaiMyNeural',
	# English - Multilingual
	'en-us-andrewmultilingualneural': 'en-US-AndrewMultilingualNeural',
	'en-au-williammultilingualneural': 'en-AU-WilliamMultilingualNeural',
	'andrew': 'en-US-AndrewMultilingualNeural',
	'en_andrew': 'en-US-AndrewMultilingualNeural',
	'jenny': 'en-US-AndrewMultilingualNeural',
	'en_jenny': 'en-US-AndrewMultilingualNeural',
	# Portuguese - Multilingual (ONLY Thalita)
	'pt-br-thalitamultilingualneural': 'pt-BR-ThalitaMultilingualNeural',
	'thalita': 'pt-BR-ThalitaMultilingualNeural',
	'pt_thalita': 'pt-BR-ThalitaMultilingualNeural',
	'pt_br_thalita': 'pt-BR-ThalitaMultilingualNeural',
	'pt': 'pt-BR-ThalitaMultilingualNeural',
	'pt_francisco': 'pt-BR-ThalitaMultilingualNeural',
	# French - Multilingual
	'fr-fr-viviennemultilingualneural': 'fr-FR-VivienneMultilingualNeural',
	'fr-fr-remymultilingualneural': 'fr-FR-RemyMultilingualNeural',
	'denise': 'fr-FR-VivienneMultilingualNeural',
	'fr': 'fr-FR-VivienneMultilingualNeural',
	'fr_denise': 'fr-FR-VivienneMultilingualNeural',
	# German - Multilingual
	'de-de-seraphinamultilingualneural': 'de-DE-SeraphinaMultilingualNeural',
	'de-de-florianmultilingualneural': 'de-DE-FlorianMultilingualNeural',
	'katja': 'de-DE-SeraphinaMultilingualNeural',
	'de': 'de-DE-SeraphinaMultilingualNeural',
	'de_katja': 'de-DE-SeraphinaMultilingualNeural',
	# Korean - Multilingual (Hyunsu, NOT SunHee)
	'ko-kr-hyusumultilingualneural': 'ko-KR-HyunsuMultilingualNeural',
	'ko-kr-hyunsuneural': 'ko-KR-HyunsuMultilingualNeural',
	'sunhee': 'ko-KR-HyunsuMultilingualNeural',
	'ko': 'ko-KR-HyunsuMultilingualNeural',
	'ko_sunhee': 'ko-KR-HyunsuMultilingualNeural',
	# Italian - Multilingual
	'it-it-giuseppemultilingualneural': 'it-IT-GiuseppeMultilingualNeural',
	# Spanish (keep for backward compat)
	'ela': 'en-US-AndrewMultilingualNeural',
	'es_ela': 'en-US-AndrewMultilingualNeural',
	'es': 'en-US-AndrewMultilingualNeural',
	'es_carlos': 'en-US-AndrewMultilingualNeural',
	# Japanese (keep for backward compat)
	'nanami': 'en-US-AndrewMultilingualNeural',
	'ja': 'en-US-AndrewMultilingualNeural',
	'ja_nanami': 'en-US-AndrewMultilingualNeural',
	# Chinese (keep for backward compat)
	'xiaochen': 'en-US-AndrewMultilingualNeural',
	'zh': 'en-US-AndrewMultilingualNeural',
	'zh_xiaochen': 'en-US-AndrewMultilingualNeural',
	}.get(voice, 'vi-VN-HoaiMyNeural')
	part_files=[]
	for idx, seg in enumerate(segments):
	frame=os.path.join(work,f'frame_{idx:02d}.jpg')
	aud=os.path.join(work,f'voice_{idx:02d}.mp3')
	aud_fast=os.path.join(work,f'voice_{idx:02d}_fast.mp3')
	part=os.path.join(work,f'part_{idx:02d}.mp4')
	_make_scene_frame(post, seg, idx, len(segments), img, frame, emotion=emotion)
	spoken=_emotion_script(seg, emotion)
	try:
	subprocess.run(['python','-m','edge_tts','--voice',edge_voice,'--text',spoken,'--write-media',aud], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=120)
	except Exception:
	tld='com.vn' if voice in ('nu','female','mien-nam','hoaimy') else 'com'
	try:
	base.gTTS(spoken, lang='vi', tld=tld, slow=False).save(aud)
	except TypeError:
	base.gTTS(spoken, lang='vi', slow=False).save(aud)
	subprocess.run(['ffmpeg','-y','-i',aud,'-filter:a',f'atempo={speed}','-vn',aud_fast], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=90)
	dur=_estimate_audio_duration(aud_fast, fallback=15.0)+0.35
	subprocess.run(['ffmpeg','-y','-loop','1','-t',str(dur),'-i',frame,'-i',aud_fast,'-shortest','-c:v','libx264','-tune','stillimage','-pix_fmt','yuv420p','-c:a','aac','-b:a','128k',part], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=150)
	part_files.append(part)
	concat=os.path.join(work,'concat.txt')
	with open(concat,'w',encoding='utf-8') as f:
	for p in part_files:
	f.write("file '" + p.replace("'", "'\\''") + "'\n")
	subprocess.run(['ffmpeg','-y','-f','concat','-safe','0','-i',concat,'-c','copy',out_mp4], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=180)
	post['video'] = '/api/ai/short-file/' + post_id + suffix
	post['short_voice'] = voice
	post['short_emotion'] = emotion
	post['short_speed'] = speed
	post['short_segments'] = segments
	post['short_subtitles'] = False
	base._save_ai_wall(posts)
	return JSONResponse({'video': post['video'], 'voice': voice, 'emotion': emotion, 'speed': speed, 'subtitles': False, 'segments': segments})
	except Exception as e:
	return JSONResponse({'error': 'Không tạo được shorts: ' + str(e)[:220]}, status_code=500)


	@app.get('/api/ai/short-file/{file_id}')
	def patched_ai_short_file(file_id: str):
	path = os.path.join(base.SHORTS_DIR, base._safe_name(file_id) + '.mp4')
	if not os.path.exists(path):
	return JSONResponse({'error': 'not found'}, status_code=404)
	return FileResponse(path, media_type='video/mp4', filename=f'vnews-ai-{file_id}.mp4')


	@app.get('/api/ai_shorts')
	def api_ai_shorts():
	posts = [p for p in base._load_ai_wall() if p.get('video')]
	return JSONResponse({'posts': posts[:80]})


	app.router.routes = [r for r in app.router.routes if not (getattr(r, 'path', None) == '/' and 'GET' in getattr(r, 'methods', set()))]