Fix ai_patch syntax/self import and stabilize segmented shorts
Browse files- ai_patch.py +175 -208
ai_patch.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
-
import os, re,
|
| 2 |
-
import
|
| 3 |
-
|
| 4 |
-
from ai_ext import app
|
| 5 |
from fastapi import Request
|
| 6 |
from fastapi.responses import JSONResponse, HTMLResponse, FileResponse
|
|
|
|
|
|
|
| 7 |
|
| 8 |
try:
|
| 9 |
from PIL import Image, ImageDraw, ImageFont
|
|
@@ -21,68 +22,66 @@ def _norm(s):
|
|
| 21 |
return re.sub(r"\s+", " ", s).strip()
|
| 22 |
|
| 23 |
|
| 24 |
-
def _similar(a,
|
| 25 |
-
ta,
|
| 26 |
-
if not ta or not tb:
|
| 27 |
-
return len(ta
|
| 28 |
|
| 29 |
|
| 30 |
-
def _dedupe_units(units,
|
| 31 |
-
out
|
| 32 |
for u in units:
|
| 33 |
-
u
|
| 34 |
-
if len(u)
|
| 35 |
-
nu
|
| 36 |
-
if nu in seen or any(_similar(u,
|
| 37 |
-
seen.add(nu);
|
| 38 |
-
if len(out)
|
| 39 |
return out
|
| 40 |
|
| 41 |
|
| 42 |
-
def
|
| 43 |
-
text
|
| 44 |
-
if not text:
|
| 45 |
-
|
| 46 |
-
for line in re.split(r"\n+",
|
| 47 |
-
line
|
| 48 |
-
if not line:
|
| 49 |
-
low
|
| 50 |
-
if any(low.startswith(p) and len(line)
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
for marker in ["Nội dung nguồn:", "Nội dung gốc:", "Nội dung:", "Nguồn/bối cảnh internet:"]:
|
| 68 |
if marker in text:
|
| 69 |
-
text
|
| 70 |
-
text
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
return "\n".join("• " + u for u in units) if units else ("• " + text[:700].rsplit(" ", 1)[0] if text else "• Không có đủ nội dung nguồn để tóm tắt.")
|
| 75 |
|
| 76 |
|
| 77 |
def _source_line(sources):
|
| 78 |
names=[]
|
| 79 |
for s in (sources or [])[:5]:
|
| 80 |
via=s.get("via") or base._domain(s.get("url","")) or s.get("title","")
|
| 81 |
-
if via and via not in names:
|
| 82 |
return "Nguồn tham khảo: "+", ".join(names[:5]) if names else "Nguồn tham khảo: tổng hợp internet"
|
| 83 |
|
| 84 |
|
| 85 |
-
def _make_summary_prompt(title,
|
| 86 |
return f"""Bạn là biên tập viên tóm tắt tin tức tiếng Việt.
|
| 87 |
|
| 88 |
NHIỆM VỤ BẮT BUỘC:
|
|
@@ -90,7 +89,6 @@ NHIỆM VỤ BẮT BUỘC:
|
|
| 90 |
- Không lặp lại cùng một ý, cùng một câu, cùng một chi tiết.
|
| 91 |
- Không thêm thông tin ngoài nguồn.
|
| 92 |
- Tối đa 5 gạch đầu dòng, mỗi gạch đầu dòng 1 câu ngắn.
|
| 93 |
-
- Nếu bài có số liệu/nhân vật/thời điểm quan trọng thì giữ lại.
|
| 94 |
|
| 95 |
Tiêu đề nguồn: {title}
|
| 96 |
Nguồn: {source_hint}
|
|
@@ -99,79 +97,70 @@ Nội dung nguồn:
|
|
| 99 |
{raw[:14000]}"""
|
| 100 |
|
| 101 |
|
| 102 |
-
def _rich_web_context(topic,
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
except Exception:
|
| 107 |
-
ctx, sources = "", []
|
| 108 |
-
rich, rich_sources, seen = [], [], set()
|
| 109 |
for s in (sources or [])[:limit*2]:
|
| 110 |
-
url
|
| 111 |
-
if not url.startswith("http") or url in seen:
|
| 112 |
seen.add(url)
|
| 113 |
try:
|
| 114 |
-
data
|
| 115 |
-
raw
|
| 116 |
-
if len(raw)
|
| 117 |
-
title
|
| 118 |
-
via
|
| 119 |
rich.append(f"### {title} ({via})\n{raw[:2400]}")
|
| 120 |
rich_sources.append({"title":title,"url":url,"excerpt":raw[:700],"via":via})
|
| 121 |
-
if len(rich)
|
| 122 |
-
except Exception:
|
| 123 |
-
|
| 124 |
-
if
|
| 125 |
-
|
| 126 |
-
return f"Chủ đề cần tóm tắt: {topic}. Không truy xuất được nguồn đầy đủ, hãy tạo bản tóm tắt định hướng ngắn.", []
|
| 127 |
|
| 128 |
|
| 129 |
-
async def
|
| 130 |
-
token
|
| 131 |
try:
|
| 132 |
-
|
| 133 |
-
if
|
| 134 |
-
txt
|
| 135 |
-
if txt:
|
| 136 |
-
|
| 137 |
-
except Exception as e: errors.append(f"sdk: {type(e).__name__}: {str(e)[:220]}")
|
| 138 |
if token:
|
| 139 |
-
models=[]
|
| 140 |
-
for m in [os.getenv("QWEN_VL_MODEL",""),"Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen2.5-VL-3B-Instruct","Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-3B-Instruct","Qwen/Qwen2.5-1.5B-Instruct"]:
|
| 141 |
-
if m and m not in models: models.append(m)
|
| 142 |
headers={"Authorization":"Bearer "+token,"Content-Type":"application/json"}
|
| 143 |
-
for model in
|
|
|
|
| 144 |
try:
|
| 145 |
-
is_vl
|
| 146 |
-
|
| 147 |
-
payload={"model":model,"messages":[{"role":"system","content":"Bạn là biên tập viên AI tiếng Việt. Chỉ tóm tắt súc tích nội dung nguồn, không viết lại toàn bài, không lặp ý
|
| 148 |
r=requests.post("https://router.huggingface.co/v1/chat/completions",headers=headers,json=payload,timeout=95)
|
| 149 |
-
if r.status_code>=300:
|
| 150 |
txt=(r.json().get("choices",[{}])[0].get("message",{}).get("content") or "").strip()
|
| 151 |
-
if txt:
|
| 152 |
-
except Exception as e:
|
| 153 |
-
base.LAST_QWEN_ERROR
|
| 154 |
-
return _fallback_summary(prompt,
|
| 155 |
|
| 156 |
|
| 157 |
-
if not hasattr(base,
|
| 158 |
-
base._original_qwen_generate
|
| 159 |
-
base.qwen_generate
|
| 160 |
|
| 161 |
|
| 162 |
-
_PATCHED={('/api/topic_post','POST'),('/api/url_wall','POST'),('/api/rewrite_share','POST'),('/api/ai/short/{post_id}','POST')}
|
| 163 |
app.router.routes=[r for r in app.router.routes if not any(getattr(r,'path',None)==p and m in getattr(r,'methods',set()) for p,m in _PATCHED)]
|
| 164 |
|
| 165 |
-
|
| 166 |
@app.get('/api/wall')
|
| 167 |
-
def compat_wall():
|
| 168 |
-
|
| 169 |
|
| 170 |
@app.post('/api/topic_post')
|
| 171 |
-
async def
|
| 172 |
-
body=await request.json();
|
| 173 |
-
if not topic:
|
| 174 |
-
ctx,
|
| 175 |
image=base.pollinations_image_url(topic)
|
| 176 |
prompt=f"""Tóm tắt tổng hợp chủ đề để đăng Tường AI.
|
| 177 |
|
|
@@ -188,179 +177,157 @@ Yêu cầu:
|
|
| 188 |
Nguồn/bối cảnh internet:
|
| 189 |
{ctx}"""
|
| 190 |
text=await base.qwen_generate(prompt,image_url=image,max_tokens=950)
|
| 191 |
-
text=
|
| 192 |
-
if 'Nguồn tham khảo:' not in text:
|
| 193 |
post=base.make_post('Tổng hợp: '+topic,text,image,'','topic',sources=sources[:5])
|
| 194 |
-
posts=base._load_ai_wall();
|
| 195 |
return JSONResponse({'post':post})
|
| 196 |
|
| 197 |
-
|
| 198 |
@app.post('/api/url_wall')
|
| 199 |
-
async def
|
| 200 |
-
body=await request.json();
|
| 201 |
-
if not url.startswith('http'):
|
| 202 |
-
try:
|
| 203 |
-
except Exception as e:
|
| 204 |
raw=(data.get('summary','')+'\n'+data.get('text','')).strip()
|
| 205 |
-
if len(raw)<120:
|
| 206 |
-
prompt=_make_summary_prompt(data.get('title',''),
|
| 207 |
text=await base.qwen_generate(prompt,image_url=data.get('image') or None,max_tokens=850)
|
| 208 |
-
text=
|
| 209 |
src=[{'title':data.get('title'),'url':url,'excerpt':raw[:500],'via':data.get('via') or base._domain(url)}]
|
| 210 |
-
if 'Nguồn tham khảo:' not in text:
|
| 211 |
post=base.make_post(data.get('title') or 'Bài viết',text,data.get('image') or '',url,'url',sources=src)
|
| 212 |
-
posts=base._load_ai_wall();
|
| 213 |
return JSONResponse({'post':post})
|
| 214 |
|
| 215 |
-
|
| 216 |
@app.post('/api/rewrite_share')
|
| 217 |
-
async def
|
| 218 |
-
body=await request.json();
|
| 219 |
-
if not url.startswith('http'):
|
| 220 |
-
try:
|
| 221 |
-
except Exception as e:
|
| 222 |
raw=(data.get('summary','')+'\n'+data.get('text','')).strip()
|
| 223 |
-
if len(raw)<120:
|
| 224 |
-
prompt=_make_summary_prompt(data.get('title',''),
|
| 225 |
text=await base.qwen_generate(prompt,image_url=data.get('image') or None,max_tokens=850)
|
| 226 |
-
text=
|
| 227 |
src=[{'title':data.get('title'),'url':url,'excerpt':raw[:500],'via':data.get('via') or base._domain(url)}]
|
| 228 |
-
if 'Nguồn tham khảo:' not in text:
|
| 229 |
post=base.make_post(data.get('title') or 'Bài viết',text,data.get('image') or '',url,'summary',sources=src)
|
| 230 |
-
posts=base._load_ai_wall();
|
| 231 |
return JSONResponse({'post':post})
|
| 232 |
|
| 233 |
|
| 234 |
def split_segments(post):
|
| 235 |
-
title=_clean(post.get('title',''))
|
| 236 |
-
text=re.sub(r'Nguồn tham khảo:.*','',post.get('text',''),flags=re.S).strip()
|
| 237 |
lines=[]
|
| 238 |
-
if title:
|
| 239 |
-
for line in re.split(r'\n+',
|
| 240 |
line=_clean(re.sub(r'^[•\-*]\s*','',line))
|
| 241 |
-
if len(line)>8:
|
| 242 |
-
|
| 243 |
-
segs=[]; cur=''
|
| 244 |
for line in lines:
|
| 245 |
-
if len(cur)+len(line)<190:
|
| 246 |
-
cur=(cur+' '+line).strip()
|
| 247 |
else:
|
| 248 |
-
if cur:
|
| 249 |
cur=line
|
| 250 |
-
if cur:
|
| 251 |
return segs[:14]
|
| 252 |
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
words=_clean(text).split(); lines=[]; cur=''
|
| 256 |
for w in words:
|
| 257 |
test=(cur+' '+w).strip()
|
| 258 |
-
try:
|
| 259 |
-
except Exception:
|
| 260 |
-
if width<=max_width:
|
| 261 |
else:
|
| 262 |
-
if cur:
|
| 263 |
cur=w
|
| 264 |
-
if len(lines)>=max_lines:
|
| 265 |
-
if cur and len(lines)<max_lines:
|
| 266 |
return lines
|
| 267 |
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
W,H=1080,1920
|
| 272 |
-
bg=Image.new('RGB',(W,H),(14,14,14))
|
| 273 |
try:
|
| 274 |
-
im=Image.open(img_path).convert('RGB')
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
bg.paste(im.crop((left,top,left+target[0],top+target[1])),(0,0))
|
| 280 |
-
except Exception: pass
|
| 281 |
draw=ImageDraw.Draw(bg)
|
| 282 |
try:
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
except Exception: ft=fb=fl=None
|
| 287 |
-
draw.rectangle((0,720,W,H),fill=(14,14,14))
|
| 288 |
-
margin=48; maxw=W-margin*2
|
| 289 |
draw.text((margin,770),f'VNEWS · Short AI {idx}/{total}',fill=(92,184,122),font=fl)
|
| 290 |
y=840
|
| 291 |
-
for ln in wrap_text(draw,
|
| 292 |
-
draw.text((margin,y),ln,fill=(242,242,242),font=fb)
|
| 293 |
-
y
|
| 294 |
-
if y>1650: break
|
| 295 |
bg.save(out_path,quality=92)
|
| 296 |
|
| 297 |
-
|
| 298 |
-
def make_tts(text, voice, out_path):
|
| 299 |
edge_voice={'nam':'vi-VN-NamMinhNeural','male':'vi-VN-NamMinhNeural','nu':'vi-VN-HoaiMyNeural','female':'vi-VN-HoaiMyNeural','mien-nam':'vi-VN-HoaiMyNeural'}.get(voice,'vi-VN-HoaiMyNeural')
|
| 300 |
-
try:
|
| 301 |
-
subprocess.run(['python','-m','edge_tts','--voice',edge_voice,'--text',text,'--write-media',out_path],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=160)
|
| 302 |
except Exception:
|
| 303 |
tld='com.vn' if voice in ('nu','female','mien-nam') else 'com'
|
| 304 |
-
try:
|
| 305 |
-
except TypeError:
|
| 306 |
-
|
| 307 |
|
| 308 |
@app.post('/api/ai/short/{post_id}')
|
| 309 |
-
async def
|
| 310 |
-
try:
|
| 311 |
-
except Exception:
|
| 312 |
-
voice=str(body.get('voice','nu')).lower().strip();
|
| 313 |
-
posts=base._load_ai_wall();
|
| 314 |
-
if not post:
|
| 315 |
-
os.makedirs(base.SHORTS_DIR,exist_ok=True)
|
| 316 |
-
suffix=f'_{voice}_{emotion}_{str(speed).replace(".","p")}_segments'
|
| 317 |
out_mp4=os.path.join(base.SHORTS_DIR,base._safe_name(post_id+suffix)+'.mp4')
|
| 318 |
-
if os.path.exists(out_mp4):
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
img=os.path.join(work,'image.jpg'); base._download_image(post.get('img'),post.get('title','AI news'),img)
|
| 322 |
segs=split_segments(post)
|
| 323 |
-
if not segs:
|
| 324 |
clips=[]
|
| 325 |
try:
|
| 326 |
for i,seg in enumerate(segs,1):
|
| 327 |
-
frame=os.path.join(work,f'frame_{i:02d}.jpg');
|
| 328 |
prefix={'urgent':'Tin nhanh.','warm':'Câu chuyện đáng chú ý.','serious':'Bản tin nghiêm túc.','energetic':'Cập nhật nổi bật.'}.get(emotion,'')
|
| 329 |
spoken=(prefix+' '+seg).strip() if i==1 and prefix else seg
|
| 330 |
-
make_segment_frame(post,
|
| 331 |
-
make_tts(spoken, voice, aud)
|
| 332 |
subprocess.run(['ffmpeg','-y','-i',aud,'-filter:a',f'atempo={speed}','-vn',audf],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=120)
|
| 333 |
subprocess.run(['ffmpeg','-y','-loop','1','-i',frame,'-i',audf,'-shortest','-c:v','libx264','-tune','stillimage','-pix_fmt','yuv420p','-c:a','aac','-b:a','128k','-vf','scale=1080:1920',clip],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=180)
|
| 334 |
clips.append(clip)
|
| 335 |
listfile=os.path.join(work,'list.txt')
|
| 336 |
with open(listfile,'w',encoding='utf-8') as f:
|
| 337 |
-
for c in clips:
|
| 338 |
-
".format(c.replace("'","'\\''")))
|
| 339 |
subprocess.run(['ffmpeg','-y','-f','concat','-safe','0','-i',listfile,'-c','copy',out_mp4],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=240)
|
| 340 |
-
post['video']='/api/ai/short-file/'+post_id+suffix;
|
| 341 |
-
base._save_ai_wall(posts)
|
| 342 |
return JSONResponse({'video':post['video'],'segments':len(segs),'speed':speed,'subtitles':False})
|
| 343 |
-
except Exception as e:
|
| 344 |
-
return JSONResponse({'error':'Không tạo được shorts: '+str(e)[:180]},status_code=500)
|
| 345 |
-
|
| 346 |
|
| 347 |
@app.get('/api/ai/short-file/{file_id}')
|
| 348 |
-
def
|
| 349 |
path=os.path.join(base.SHORTS_DIR,base._safe_name(file_id)+'.mp4')
|
| 350 |
-
if not os.path.exists(path):
|
| 351 |
return FileResponse(path,media_type='video/mp4',filename=f'vnews-ai-{file_id}.mp4')
|
| 352 |
|
| 353 |
-
|
| 354 |
-
# Keep previous UI patch, but override topic alert/progress wording.
|
| 355 |
app.router.routes=[r for r in app.router.routes if not (getattr(r,'path',None)=='/' and 'GET' in getattr(r,'methods',set()))]
|
| 356 |
-
|
| 357 |
-
async def index_patched():
|
| 358 |
-
with open('/app/static/index.html','r',encoding='utf-8') as f: html=f.read()
|
| 359 |
-
inject=PATCH_INJECT+r'''
|
| 360 |
<script>
|
| 361 |
(function(){
|
|
|
|
| 362 |
window.createTopicPost=function(){let inp=document.getElementById('ai-topic-input');let topic=(inp&&inp.value||'').trim();if(!topic)return alert('Nhập chủ đề trước');fetch('/api/topic_post',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({topic})}).then(r=>r.json().then(j=>({ok:r.ok,j}))).then(({ok,j})=>{if(ok&&j.post){window.location.reload();alert('Đã tổng hợp nội dung các nguồn thành 1 bản tóm tắt trên Tường AI');}else alert(j.error||'Lỗi tạo bài')}).catch(e=>alert(e.message||'Lỗi tạo bài'));};
|
| 363 |
})();
|
| 364 |
</script>
|
| 365 |
'''
|
| 366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, re, html as html_lib, subprocess, requests
|
| 2 |
+
from urllib.parse import quote_plus
|
| 3 |
+
from bs4 import BeautifulSoup
|
|
|
|
| 4 |
from fastapi import Request
|
| 5 |
from fastapi.responses import JSONResponse, HTMLResponse, FileResponse
|
| 6 |
+
import ai_ext as base
|
| 7 |
+
from ai_ext import app
|
| 8 |
|
| 9 |
try:
|
| 10 |
from PIL import Image, ImageDraw, ImageFont
|
|
|
|
| 22 |
return re.sub(r"\s+", " ", s).strip()
|
| 23 |
|
| 24 |
|
| 25 |
+
def _similar(a,b):
|
| 26 |
+
ta,tb=set(_norm(a).split()),set(_norm(b).split())
|
| 27 |
+
if not ta or not tb:return False
|
| 28 |
+
return len(ta&tb)/max(1,min(len(ta),len(tb)))>=0.72
|
| 29 |
|
| 30 |
|
| 31 |
+
def _dedupe_units(units,max_units=7):
|
| 32 |
+
out=[];seen=set()
|
| 33 |
for u in units:
|
| 34 |
+
u=_clean(re.sub(r"^[-•*\d\.\)\s]+","",u))
|
| 35 |
+
if len(u)<18:continue
|
| 36 |
+
nu=_norm(u)
|
| 37 |
+
if nu in seen or any(_similar(u,o) for o in out):continue
|
| 38 |
+
seen.add(nu);out.append(u)
|
| 39 |
+
if len(out)>=max_units:break
|
| 40 |
return out
|
| 41 |
|
| 42 |
|
| 43 |
+
def _postprocess(text,max_units=7):
|
| 44 |
+
text=_clean(text)
|
| 45 |
+
if not text:return text
|
| 46 |
+
lines=[]
|
| 47 |
+
for line in re.split(r"\n+",text):
|
| 48 |
+
line=_clean(line)
|
| 49 |
+
if not line:continue
|
| 50 |
+
low=line.lower()
|
| 51 |
+
if any(low.startswith(p) and len(line)<80 for p in ("dưới đây","sau đây","tôi sẽ","tiêu đề:","sapo:","nội dung:")):continue
|
| 52 |
+
lines.append(line)
|
| 53 |
+
units=[]
|
| 54 |
+
for line in lines:
|
| 55 |
+
units.extend(re.split(r"(?<=[\.\!\?])\s+(?=[A-ZÀ-Ỹ0-9])",line) if len(line)>260 else [line])
|
| 56 |
+
units=_dedupe_units(units,max_units)
|
| 57 |
+
if not units:return text[:900]
|
| 58 |
+
title=""
|
| 59 |
+
if lines and len(lines[0])<=90 and not lines[0].startswith(("-","•","*")):
|
| 60 |
+
title=lines[0];units=[u for u in units if not _similar(u,title)]
|
| 61 |
+
body="\n".join("• "+u for u in units[:max_units])
|
| 62 |
+
return (title+"\n\n"+body).strip() if title else body
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _fallback_summary(prompt,max_units=6):
|
| 66 |
+
text=prompt or ""
|
| 67 |
+
for marker in ["Nội dung nguồn:","Nội dung gốc:","Nội dung:","Nguồn/bối cảnh internet:"]:
|
|
|
|
| 68 |
if marker in text:
|
| 69 |
+
text=text.split(marker,1)[1];break
|
| 70 |
+
text=re.sub(r"https?://\S+","",text);text=_clean(text)
|
| 71 |
+
sents=[_clean(s) for s in re.split(r"(?<=[\.\!\?])\s+(?=[A-ZÀ-Ỹ0-9])",text)]
|
| 72 |
+
units=_dedupe_units([s for s in sents if 45<=len(s)<=260],max_units)
|
| 73 |
+
return "\n".join("• "+u for u in units) if units else ("• "+text[:700].rsplit(" ",1)[0] if text else "• Không có đủ nội dung nguồn để tóm tắt.")
|
|
|
|
| 74 |
|
| 75 |
|
| 76 |
def _source_line(sources):
|
| 77 |
names=[]
|
| 78 |
for s in (sources or [])[:5]:
|
| 79 |
via=s.get("via") or base._domain(s.get("url","")) or s.get("title","")
|
| 80 |
+
if via and via not in names:names.append(via)
|
| 81 |
return "Nguồn tham khảo: "+", ".join(names[:5]) if names else "Nguồn tham khảo: tổng hợp internet"
|
| 82 |
|
| 83 |
|
| 84 |
+
def _make_summary_prompt(title,raw,source_hint=""):
|
| 85 |
return f"""Bạn là biên tập viên tóm tắt tin tức tiếng Việt.
|
| 86 |
|
| 87 |
NHIỆM VỤ BẮT BUỘC:
|
|
|
|
| 89 |
- Không lặp lại cùng một ý, cùng một câu, cùng một chi tiết.
|
| 90 |
- Không thêm thông tin ngoài nguồn.
|
| 91 |
- Tối đa 5 gạch đầu dòng, mỗi gạch đầu dòng 1 câu ngắn.
|
|
|
|
| 92 |
|
| 93 |
Tiêu đề nguồn: {title}
|
| 94 |
Nguồn: {source_hint}
|
|
|
|
| 97 |
{raw[:14000]}"""
|
| 98 |
|
| 99 |
|
| 100 |
+
def _rich_web_context(topic,limit=5):
|
| 101 |
+
try: ctx,sources=base.web_context(topic,limit=limit)
|
| 102 |
+
except Exception: ctx,sources="",[]
|
| 103 |
+
rich=[];rich_sources=[];seen=set()
|
|
|
|
|
|
|
|
|
|
| 104 |
for s in (sources or [])[:limit*2]:
|
| 105 |
+
url=s.get("url") or ""
|
| 106 |
+
if not url.startswith("http") or url in seen:continue
|
| 107 |
seen.add(url)
|
| 108 |
try:
|
| 109 |
+
data=base.scrape_any_url(url)
|
| 110 |
+
raw=(data.get("summary","")+"\n"+data.get("text","")).strip()
|
| 111 |
+
if len(raw)<180:continue
|
| 112 |
+
title=data.get("title") or s.get("title") or url
|
| 113 |
+
via=data.get("via") or s.get("via") or base._domain(url)
|
| 114 |
rich.append(f"### {title} ({via})\n{raw[:2400]}")
|
| 115 |
rich_sources.append({"title":title,"url":url,"excerpt":raw[:700],"via":via})
|
| 116 |
+
if len(rich)>=limit:break
|
| 117 |
+
except Exception:continue
|
| 118 |
+
if rich:return "\n\n".join(rich),rich_sources
|
| 119 |
+
if ctx:return ctx,sources
|
| 120 |
+
return f"Chủ đề cần tóm tắt: {topic}. Không truy xuất được nguồn đầy đủ, hãy tạo bản tóm tắt định hướng ngắn.",[]
|
|
|
|
| 121 |
|
| 122 |
|
| 123 |
+
async def qwen_resilient(prompt,image_url=None,max_tokens=1200):
|
| 124 |
+
token=base._hf_token();errors=[]
|
| 125 |
try:
|
| 126 |
+
orig=getattr(base,"_original_qwen_generate",None)
|
| 127 |
+
if orig:
|
| 128 |
+
txt=await orig(prompt,image_url=image_url,max_tokens=max_tokens)
|
| 129 |
+
if txt:base.LAST_QWEN_ERROR="";return txt
|
| 130 |
+
except Exception as e:errors.append(f"sdk:{str(e)[:160]}")
|
|
|
|
| 131 |
if token:
|
|
|
|
|
|
|
|
|
|
| 132 |
headers={"Authorization":"Bearer "+token,"Content-Type":"application/json"}
|
| 133 |
+
for model in [os.getenv("QWEN_VL_MODEL",""),"Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen2.5-VL-3B-Instruct","Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-3B-Instruct","Qwen/Qwen2.5-1.5B-Instruct"]:
|
| 134 |
+
if not model:continue
|
| 135 |
try:
|
| 136 |
+
is_vl="VL" in model and bool(image_url)
|
| 137 |
+
content=([{"type":"image_url","image_url":{"url":image_url}},{"type":"text","text":prompt}] if is_vl else prompt)
|
| 138 |
+
payload={"model":model,"messages":[{"role":"system","content":"Bạn là biên tập viên AI tiếng Việt. Chỉ tóm tắt súc tích nội dung nguồn, không viết lại toàn bài, không lặp ý."},{"role":"user","content":content}],"max_tokens":min(int(max_tokens or 900),1400),"temperature":0.35,"top_p":0.85}
|
| 139 |
r=requests.post("https://router.huggingface.co/v1/chat/completions",headers=headers,json=payload,timeout=95)
|
| 140 |
+
if r.status_code>=300:errors.append(f"{model}:{r.status_code}");continue
|
| 141 |
txt=(r.json().get("choices",[{}])[0].get("message",{}).get("content") or "").strip()
|
| 142 |
+
if txt:base.LAST_QWEN_ERROR="";return txt
|
| 143 |
+
except Exception as e:errors.append(f"{model}:{str(e)[:120]}")
|
| 144 |
+
base.LAST_QWEN_ERROR=" | ".join(errors[-5:]) or "fallback"
|
| 145 |
+
return _fallback_summary(prompt,6)
|
| 146 |
|
| 147 |
|
| 148 |
+
if not hasattr(base,"_original_qwen_generate"):
|
| 149 |
+
base._original_qwen_generate=base.qwen_generate
|
| 150 |
+
base.qwen_generate=qwen_resilient
|
| 151 |
|
| 152 |
|
| 153 |
+
_PATCHED={('/api/topic_post','POST'),('/api/url_wall','POST'),('/api/rewrite_share','POST'),('/api/ai/short/{post_id}','POST'),('/api/ai/short-file/{file_id}','GET')}
|
| 154 |
app.router.routes=[r for r in app.router.routes if not any(getattr(r,'path',None)==p and m in getattr(r,'methods',set()) for p,m in _PATCHED)]
|
| 155 |
|
|
|
|
| 156 |
@app.get('/api/wall')
|
| 157 |
+
def compat_wall():return JSONResponse({'posts':base._load_ai_wall()[:80]})
|
|
|
|
| 158 |
|
| 159 |
@app.post('/api/topic_post')
|
| 160 |
+
async def topic_post(request:Request):
|
| 161 |
+
body=await request.json();topic=base._clean_text(body.get('topic',''))
|
| 162 |
+
if not topic:return JSONResponse({'error':'missing topic'},status_code=400)
|
| 163 |
+
ctx,sources=_rich_web_context(topic,5)
|
| 164 |
image=base.pollinations_image_url(topic)
|
| 165 |
prompt=f"""Tóm tắt tổng hợp chủ đề để đăng Tường AI.
|
| 166 |
|
|
|
|
| 177 |
Nguồn/bối cảnh internet:
|
| 178 |
{ctx}"""
|
| 179 |
text=await base.qwen_generate(prompt,image_url=image,max_tokens=950)
|
| 180 |
+
text=_postprocess(text,7)
|
| 181 |
+
if 'Nguồn tham khảo:' not in text:text+='\n\n'+_source_line(sources)
|
| 182 |
post=base.make_post('Tổng hợp: '+topic,text,image,'','topic',sources=sources[:5])
|
| 183 |
+
posts=base._load_ai_wall();posts.insert(0,post);base._save_ai_wall(posts)
|
| 184 |
return JSONResponse({'post':post})
|
| 185 |
|
|
|
|
| 186 |
@app.post('/api/url_wall')
|
| 187 |
+
async def url_wall(request:Request):
|
| 188 |
+
body=await request.json();url=base._clean_text(body.get('url',''))
|
| 189 |
+
if not url.startswith('http'):return JSONResponse({'error':'missing url'},status_code=400)
|
| 190 |
+
try:data=base.scrape_any_url(url)
|
| 191 |
+
except Exception as e:return JSONResponse({'error':'Không scrape được URL: '+str(e)[:180]},status_code=422)
|
| 192 |
raw=(data.get('summary','')+'\n'+data.get('text','')).strip()
|
| 193 |
+
if len(raw)<120:return JSONResponse({'error':'URL không có đủ nội dung để tóm tắt'},status_code=422)
|
| 194 |
+
prompt=_make_summary_prompt(data.get('title',''),raw,data.get('via','') or base._domain(url))
|
| 195 |
text=await base.qwen_generate(prompt,image_url=data.get('image') or None,max_tokens=850)
|
| 196 |
+
text=_postprocess(text,6)
|
| 197 |
src=[{'title':data.get('title'),'url':url,'excerpt':raw[:500],'via':data.get('via') or base._domain(url)}]
|
| 198 |
+
if 'Nguồn tham khảo:' not in text:text+='\n\n'+_source_line(src)
|
| 199 |
post=base.make_post(data.get('title') or 'Bài viết',text,data.get('image') or '',url,'url',sources=src)
|
| 200 |
+
posts=base._load_ai_wall();posts.insert(0,post);base._save_ai_wall(posts)
|
| 201 |
return JSONResponse({'post':post})
|
| 202 |
|
|
|
|
| 203 |
@app.post('/api/rewrite_share')
|
| 204 |
+
async def rewrite_share(request:Request):
|
| 205 |
+
body=await request.json();url=base._clean_text(body.get('url',''))
|
| 206 |
+
if not url.startswith('http'):return JSONResponse({'error':'missing url'},status_code=400)
|
| 207 |
+
try:data=base.scrape_any_url(url)
|
| 208 |
+
except Exception as e:return JSONResponse({'error':'Không đọc được bài viết: '+str(e)[:180]},status_code=422)
|
| 209 |
raw=(data.get('summary','')+'\n'+data.get('text','')).strip()
|
| 210 |
+
if len(raw)<120:return JSONResponse({'error':'Bài viết không đủ nội dung để tóm tắt'},status_code=422)
|
| 211 |
+
prompt=_make_summary_prompt(data.get('title',''),raw,data.get('via','') or base._domain(url))
|
| 212 |
text=await base.qwen_generate(prompt,image_url=data.get('image') or None,max_tokens=850)
|
| 213 |
+
text=_postprocess(text,6)
|
| 214 |
src=[{'title':data.get('title'),'url':url,'excerpt':raw[:500],'via':data.get('via') or base._domain(url)}]
|
| 215 |
+
if 'Nguồn tham khảo:' not in text:text+='\n\n'+_source_line(src)
|
| 216 |
post=base.make_post(data.get('title') or 'Bài viết',text,data.get('image') or '',url,'summary',sources=src)
|
| 217 |
+
posts=base._load_ai_wall();posts.insert(0,post);base._save_ai_wall(posts)
|
| 218 |
return JSONResponse({'post':post})
|
| 219 |
|
| 220 |
|
| 221 |
def split_segments(post):
|
| 222 |
+
title=_clean(post.get('title',''));text=re.sub(r'Nguồn tham khảo:.*','',post.get('text',''),flags=re.S).strip()
|
|
|
|
| 223 |
lines=[]
|
| 224 |
+
if title:lines.append(title)
|
| 225 |
+
for line in re.split(r'\n+',text):
|
| 226 |
line=_clean(re.sub(r'^[•\-*]\s*','',line))
|
| 227 |
+
if len(line)>8:lines.append(line)
|
| 228 |
+
segs=[];cur=''
|
|
|
|
| 229 |
for line in lines:
|
| 230 |
+
if len(cur)+len(line)<190:cur=(cur+' '+line).strip()
|
|
|
|
| 231 |
else:
|
| 232 |
+
if cur:segs.append(cur)
|
| 233 |
cur=line
|
| 234 |
+
if cur:segs.append(cur)
|
| 235 |
return segs[:14]
|
| 236 |
|
| 237 |
+
def wrap_text(draw,text,font,max_width,max_lines):
|
| 238 |
+
words=_clean(text).split();lines=[];cur=''
|
|
|
|
| 239 |
for w in words:
|
| 240 |
test=(cur+' '+w).strip()
|
| 241 |
+
try:width=draw.textbbox((0,0),test,font=font)[2]
|
| 242 |
+
except Exception:width=len(test)*20
|
| 243 |
+
if width<=max_width:cur=test
|
| 244 |
else:
|
| 245 |
+
if cur:lines.append(cur)
|
| 246 |
cur=w
|
| 247 |
+
if len(lines)>=max_lines:break
|
| 248 |
+
if cur and len(lines)<max_lines:lines.append(cur)
|
| 249 |
return lines
|
| 250 |
|
| 251 |
+
def make_segment_frame(post,segment,idx,total,img_path,out_path):
|
| 252 |
+
if Image is None:return base._make_short_frame(post,img_path,out_path)
|
| 253 |
+
W,H=1080,1920;bg=Image.new('RGB',(W,H),(14,14,14))
|
|
|
|
|
|
|
| 254 |
try:
|
| 255 |
+
im=Image.open(img_path).convert('RGB');target=(1080,760);ratio=im.width/im.height;tr=target[0]/target[1]
|
| 256 |
+
if ratio>tr:nh=target[1];nw=int(nh*ratio)
|
| 257 |
+
else:nw=target[0];nh=int(nw/ratio)
|
| 258 |
+
im=im.resize((nw,nh));left=(nw-target[0])//2;top=(nh-target[1])//2;bg.paste(im.crop((left,top,left+target[0],top+target[1])),(0,0))
|
| 259 |
+
except Exception:pass
|
|
|
|
|
|
|
| 260 |
draw=ImageDraw.Draw(bg)
|
| 261 |
try:
|
| 262 |
+
fb=ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf',44);fl=ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf',28)
|
| 263 |
+
except Exception:fb=fl=None
|
| 264 |
+
draw.rectangle((0,720,W,H),fill=(14,14,14));margin=48;maxw=W-margin*2
|
|
|
|
|
|
|
|
|
|
| 265 |
draw.text((margin,770),f'VNEWS · Short AI {idx}/{total}',fill=(92,184,122),font=fl)
|
| 266 |
y=840
|
| 267 |
+
for ln in wrap_text(draw,segment,fb,maxw,16):
|
| 268 |
+
draw.text((margin,y),ln,fill=(242,242,242),font=fb);y+=58
|
| 269 |
+
if y>1650:break
|
|
|
|
| 270 |
bg.save(out_path,quality=92)
|
| 271 |
|
| 272 |
+
def make_tts(text,voice,out_path):
|
|
|
|
| 273 |
edge_voice={'nam':'vi-VN-NamMinhNeural','male':'vi-VN-NamMinhNeural','nu':'vi-VN-HoaiMyNeural','female':'vi-VN-HoaiMyNeural','mien-nam':'vi-VN-HoaiMyNeural'}.get(voice,'vi-VN-HoaiMyNeural')
|
| 274 |
+
try:subprocess.run(['python','-m','edge_tts','--voice',edge_voice,'--text',text,'--write-media',out_path],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=160)
|
|
|
|
| 275 |
except Exception:
|
| 276 |
tld='com.vn' if voice in ('nu','female','mien-nam') else 'com'
|
| 277 |
+
try:base.gTTS(text,lang='vi',tld=tld,slow=False).save(out_path)
|
| 278 |
+
except TypeError:base.gTTS(text,lang='vi',slow=False).save(out_path)
|
|
|
|
| 279 |
|
| 280 |
@app.post('/api/ai/short/{post_id}')
|
| 281 |
+
async def segmented_short(post_id:str,request:Request):
|
| 282 |
+
try:body=await request.json()
|
| 283 |
+
except Exception:body={}
|
| 284 |
+
voice=str(body.get('voice','nu')).lower().strip();emotion=str(body.get('emotion','neutral')).lower().strip();speed=max(0.85,min(1.35,float(body.get('speed',1.2) or 1.2)))
|
| 285 |
+
posts=base._load_ai_wall();post=next((p for p in posts if str(p.get('id'))==str(post_id)),None)
|
| 286 |
+
if not post:return JSONResponse({'error':'post not found'},status_code=404)
|
| 287 |
+
os.makedirs(base.SHORTS_DIR,exist_ok=True);suffix=f'_{voice}_{emotion}_{str(speed).replace(".","p")}_segments'
|
|
|
|
| 288 |
out_mp4=os.path.join(base.SHORTS_DIR,base._safe_name(post_id+suffix)+'.mp4')
|
| 289 |
+
if os.path.exists(out_mp4):post['video']='/api/ai/short-file/'+post_id+suffix;base._save_ai_wall(posts);return JSONResponse({'video':post['video']})
|
| 290 |
+
work=os.path.join(base.SHORTS_DIR,base._safe_name(post_id+suffix));os.makedirs(work,exist_ok=True)
|
| 291 |
+
img=os.path.join(work,'image.jpg');base._download_image(post.get('img'),post.get('title','AI news'),img)
|
|
|
|
| 292 |
segs=split_segments(post)
|
| 293 |
+
if not segs:return JSONResponse({'error':'Không có nội dung để tạo short'},status_code=422)
|
| 294 |
clips=[]
|
| 295 |
try:
|
| 296 |
for i,seg in enumerate(segs,1):
|
| 297 |
+
frame=os.path.join(work,f'frame_{i:02d}.jpg');aud=os.path.join(work,f'aud_{i:02d}.mp3');audf=os.path.join(work,f'audf_{i:02d}.mp3');clip=os.path.join(work,f'clip_{i:02d}.mp4')
|
| 298 |
prefix={'urgent':'Tin nhanh.','warm':'Câu chuyện đáng chú ý.','serious':'Bản tin nghiêm túc.','energetic':'Cập nhật nổi bật.'}.get(emotion,'')
|
| 299 |
spoken=(prefix+' '+seg).strip() if i==1 and prefix else seg
|
| 300 |
+
make_segment_frame(post,seg,i,len(segs),img,frame);make_tts(spoken,voice,aud)
|
|
|
|
| 301 |
subprocess.run(['ffmpeg','-y','-i',aud,'-filter:a',f'atempo={speed}','-vn',audf],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=120)
|
| 302 |
subprocess.run(['ffmpeg','-y','-loop','1','-i',frame,'-i',audf,'-shortest','-c:v','libx264','-tune','stillimage','-pix_fmt','yuv420p','-c:a','aac','-b:a','128k','-vf','scale=1080:1920',clip],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=180)
|
| 303 |
clips.append(clip)
|
| 304 |
listfile=os.path.join(work,'list.txt')
|
| 305 |
with open(listfile,'w',encoding='utf-8') as f:
|
| 306 |
+
for c in clips:f.write("file '{}'\n".format(c.replace("'","'\\''")))
|
|
|
|
| 307 |
subprocess.run(['ffmpeg','-y','-f','concat','-safe','0','-i',listfile,'-c','copy',out_mp4],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=240)
|
| 308 |
+
post['video']='/api/ai/short-file/'+post_id+suffix;post['short_voice']=voice;post['short_emotion']=emotion;post['short_speed']=speed;post['short_subtitles']=False;post['short_segments']=len(segs);base._save_ai_wall(posts)
|
|
|
|
| 309 |
return JSONResponse({'video':post['video'],'segments':len(segs),'speed':speed,'subtitles':False})
|
| 310 |
+
except Exception as e:return JSONResponse({'error':'Không tạo được shorts: '+str(e)[:180]},status_code=500)
|
|
|
|
|
|
|
| 311 |
|
| 312 |
@app.get('/api/ai/short-file/{file_id}')
|
| 313 |
+
def short_file(file_id:str):
|
| 314 |
path=os.path.join(base.SHORTS_DIR,base._safe_name(file_id)+'.mp4')
|
| 315 |
+
if not os.path.exists(path):return JSONResponse({'error':'not found'},status_code=404)
|
| 316 |
return FileResponse(path,media_type='video/mp4',filename=f'vnews-ai-{file_id}.mp4')
|
| 317 |
|
| 318 |
+
# Front-end patch
|
|
|
|
| 319 |
app.router.routes=[r for r in app.router.routes if not (getattr(r,'path',None)=='/' and 'GET' in getattr(r,'methods',set()))]
|
| 320 |
+
PATCH_INJECT=r'''
|
|
|
|
|
|
|
|
|
|
| 321 |
<script>
|
| 322 |
(function(){
|
| 323 |
+
if(window.aiMakeShortPatched){const old=window.aiMakeShortPatched;window.aiMakeShortPatched=function(i){return old(i)}}
|
| 324 |
window.createTopicPost=function(){let inp=document.getElementById('ai-topic-input');let topic=(inp&&inp.value||'').trim();if(!topic)return alert('Nhập chủ đề trước');fetch('/api/topic_post',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({topic})}).then(r=>r.json().then(j=>({ok:r.ok,j}))).then(({ok,j})=>{if(ok&&j.post){window.location.reload();alert('Đã tổng hợp nội dung các nguồn thành 1 bản tóm tắt trên Tường AI');}else alert(j.error||'Lỗi tạo bài')}).catch(e=>alert(e.message||'Lỗi tạo bài'));};
|
| 325 |
})();
|
| 326 |
</script>
|
| 327 |
'''
|
| 328 |
+
@app.get('/')
|
| 329 |
+
async def index_patched():
|
| 330 |
+
with open('/app/static/index.html','r',encoding='utf-8') as f:html=f.read()
|
| 331 |
+
# Preserve original ai_ext injection if present there via main import? This app now injects only needed override.
|
| 332 |
+
extra=getattr(base,'AI_INJECT','') if hasattr(base,'AI_INJECT') else ''
|
| 333 |
+
return HTMLResponse(html.replace('</body>',extra+PATCH_INJECT+'\n</body>'))
|