bep40 commited on
Commit
90c15ed
·
verified ·
1 Parent(s): 0d29d76

Fix ai_patch syntax/self import and stabilize segmented shorts

Browse files
Files changed (1) hide show
  1. ai_patch.py +175 -208
ai_patch.py CHANGED
@@ -1,9 +1,10 @@
1
- import os, re, time, random, json, html as html_lib, subprocess
2
- import requests
3
- import ai_ext as base
4
- from ai_ext import app
5
  from fastapi import Request
6
  from fastapi.responses import JSONResponse, HTMLResponse, FileResponse
 
 
7
 
8
  try:
9
  from PIL import Image, ImageDraw, ImageFont
@@ -21,68 +22,66 @@ def _norm(s):
21
  return re.sub(r"\s+", " ", s).strip()
22
 
23
 
24
- def _similar(a, b):
25
- ta, tb = set(_norm(a).split()), set(_norm(b).split())
26
- if not ta or not tb: return False
27
- return len(ta & tb) / max(1, min(len(ta), len(tb))) >= 0.72
28
 
29
 
30
- def _dedupe_units(units, max_units=7):
31
- out, seen = [], set()
32
  for u in units:
33
- u = _clean(re.sub(r"^[-•*\d\.\)\s]+", "", u))
34
- if len(u) < 18: continue
35
- nu = _norm(u)
36
- if nu in seen or any(_similar(u, old) for old in out): continue
37
- seen.add(nu); out.append(u)
38
- if len(out) >= max_units: break
39
  return out
40
 
41
 
42
- def _postprocess_ai_text(text, max_units=7):
43
- text = _clean(text)
44
- if not text: return text
45
- raw = []
46
- for line in re.split(r"\n+", text):
47
- line = _clean(line)
48
- if not line: continue
49
- low = line.lower()
50
- if any(low.startswith(p) and len(line) < 80 for p in ("dưới đây", "sau đây", "tôi sẽ", "tiêu đề:", "sapo:", "nội dung:")):
51
- continue
52
- raw.append(line)
53
- units = []
54
- for line in raw:
55
- units.extend(re.split(r"(?<=[\.\!\?])\s+(?=[A-ZÀ-Ỹ0-9])", line) if len(line) > 260 else [line])
56
- units = _dedupe_units(units, max_units=max_units)
57
- if not units: return text[:900]
58
- title = ""
59
- if raw and len(raw[0]) <= 90 and not raw[0].startswith(("-", "•", "*")):
60
- title = raw[0]; units = [u for u in units if not _similar(u, title)]
61
- body = "\n".join("• " + u for u in units[:max_units])
62
- return (title + "\n\n" + body).strip() if title else body
63
-
64
-
65
- def _fallback_summary(prompt, max_units=6):
66
- text = prompt or ""
67
- for marker in ["Nội dung nguồn:", "Nội dung gốc:", "Nội dung:", "Nguồn/bối cảnh internet:"]:
68
  if marker in text:
69
- text = text.split(marker, 1)[1]; break
70
- text = re.sub(r"https?://\S+", "", text)
71
- text = _clean(text)
72
- sents = [_clean(s) for s in re.split(r"(?<=[\.\!\?])\s+(?=[A-ZÀ-Ỹ0-9])", text)]
73
- units = _dedupe_units([s for s in sents if 45 <= len(s) <= 260], max_units=max_units)
74
- return "\n".join("• " + u for u in units) if units else ("• " + text[:700].rsplit(" ", 1)[0] if text else "• Không có đủ nội dung nguồn để tóm tắt.")
75
 
76
 
77
  def _source_line(sources):
78
  names=[]
79
  for s in (sources or [])[:5]:
80
  via=s.get("via") or base._domain(s.get("url","")) or s.get("title","")
81
- if via and via not in names: names.append(via)
82
  return "Nguồn tham khảo: "+", ".join(names[:5]) if names else "Nguồn tham khảo: tổng hợp internet"
83
 
84
 
85
- def _make_summary_prompt(title, raw, source_hint=""):
86
  return f"""Bạn là biên tập viên tóm tắt tin tức tiếng Việt.
87
 
88
  NHIỆM VỤ BẮT BUỘC:
@@ -90,7 +89,6 @@ NHIỆM VỤ BẮT BUỘC:
90
  - Không lặp lại cùng một ý, cùng một câu, cùng một chi tiết.
91
  - Không thêm thông tin ngoài nguồn.
92
  - Tối đa 5 gạch đầu dòng, mỗi gạch đầu dòng 1 câu ngắn.
93
- - Nếu bài có số liệu/nhân vật/thời điểm quan trọng thì giữ lại.
94
 
95
  Tiêu đề nguồn: {title}
96
  Nguồn: {source_hint}
@@ -99,79 +97,70 @@ Nội dung nguồn:
99
  {raw[:14000]}"""
100
 
101
 
102
- def _rich_web_context(topic, limit=5):
103
- """Best effort: search, then scrape body of top source pages. Some sites block scraping, so this cannot be 100%."""
104
- try:
105
- ctx, sources = base.web_context(topic, limit=limit)
106
- except Exception:
107
- ctx, sources = "", []
108
- rich, rich_sources, seen = [], [], set()
109
  for s in (sources or [])[:limit*2]:
110
- url = s.get("url") or ""
111
- if not url.startswith("http") or url in seen: continue
112
  seen.add(url)
113
  try:
114
- data = base.scrape_any_url(url)
115
- raw = (data.get("summary","") + "\n" + data.get("text","")).strip()
116
- if len(raw) < 180: continue
117
- title = data.get("title") or s.get("title") or url
118
- via = data.get("via") or s.get("via") or base._domain(url)
119
  rich.append(f"### {title} ({via})\n{raw[:2400]}")
120
  rich_sources.append({"title":title,"url":url,"excerpt":raw[:700],"via":via})
121
- if len(rich) >= limit: break
122
- except Exception:
123
- continue
124
- if rich: return "\n\n".join(rich), rich_sources
125
- if ctx: return ctx, sources
126
- return f"Chủ đề cần tóm tắt: {topic}. Không truy xuất được nguồn đầy đủ, hãy tạo bản tóm tắt định hướng ngắn.", []
127
 
128
 
129
- async def qwen_generate_resilient(prompt, image_url=None, max_tokens=1200):
130
- token = base._hf_token(); errors=[]
131
  try:
132
- original = getattr(base, "_original_qwen_generate", None)
133
- if original:
134
- txt = await original(prompt, image_url=image_url, max_tokens=max_tokens)
135
- if txt: base.LAST_QWEN_ERROR=""; return txt
136
- if getattr(base, "LAST_QWEN_ERROR", ""): errors.append("sdk: "+str(base.LAST_QWEN_ERROR)[:220])
137
- except Exception as e: errors.append(f"sdk: {type(e).__name__}: {str(e)[:220]}")
138
  if token:
139
- models=[]
140
- for m in [os.getenv("QWEN_VL_MODEL",""),"Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen2.5-VL-3B-Instruct","Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-3B-Instruct","Qwen/Qwen2.5-1.5B-Instruct"]:
141
- if m and m not in models: models.append(m)
142
  headers={"Authorization":"Bearer "+token,"Content-Type":"application/json"}
143
- for model in models:
 
144
  try:
145
- is_vl = "VL" in model and bool(image_url)
146
- user_content = ([{"type":"image_url","image_url":{"url":image_url}},{"type":"text","text":prompt}] if is_vl else prompt)
147
- payload={"model":model,"messages":[{"role":"system","content":"Bạn là biên tập viên AI tiếng Việt. Chỉ tóm tắt súc tích nội dung nguồn, không viết lại toàn bài, không lặp ý, không bịa chi tiết."},{"role":"user","content":user_content}],"max_tokens":min(int(max_tokens or 900),1400),"temperature":0.35,"top_p":0.85}
148
  r=requests.post("https://router.huggingface.co/v1/chat/completions",headers=headers,json=payload,timeout=95)
149
- if r.status_code>=300: errors.append(f"{model}: HTTP {r.status_code} {r.text[:160]}"); continue
150
  txt=(r.json().get("choices",[{}])[0].get("message",{}).get("content") or "").strip()
151
- if txt: base.LAST_QWEN_ERROR=""; return txt
152
- except Exception as e: errors.append(f"{model}: {type(e).__name__}: {str(e)[:180]}")
153
- base.LAST_QWEN_ERROR = " | ".join(errors[-5:]) or "Qwen unavailable; extractive fallback"
154
- return _fallback_summary(prompt, 6)
155
 
156
 
157
- if not hasattr(base, "_original_qwen_generate"):
158
- base._original_qwen_generate = base.qwen_generate
159
- base.qwen_generate = qwen_generate_resilient
160
 
161
 
162
- _PATCHED={('/api/topic_post','POST'),('/api/url_wall','POST'),('/api/rewrite_share','POST'),('/api/ai/short/{post_id}','POST')}
163
  app.router.routes=[r for r in app.router.routes if not any(getattr(r,'path',None)==p and m in getattr(r,'methods',set()) for p,m in _PATCHED)]
164
 
165
-
166
  @app.get('/api/wall')
167
- def compat_wall(): return JSONResponse({'posts': base._load_ai_wall()[:80]})
168
-
169
 
170
  @app.post('/api/topic_post')
171
- async def compat_topic_post(request:Request):
172
- body=await request.json(); topic=base._clean_text(body.get('topic',''))
173
- if not topic: return JSONResponse({'error':'missing topic'},status_code=400)
174
- ctx, sources = _rich_web_context(topic, limit=5)
175
  image=base.pollinations_image_url(topic)
176
  prompt=f"""Tóm tắt tổng hợp chủ đề để đăng Tường AI.
177
 
@@ -188,179 +177,157 @@ Yêu cầu:
188
  Nguồn/bối cảnh internet:
189
  {ctx}"""
190
  text=await base.qwen_generate(prompt,image_url=image,max_tokens=950)
191
- text=_postprocess_ai_text(text,7)
192
- if 'Nguồn tham khảo:' not in text: text += '\n\n' + _source_line(sources)
193
  post=base.make_post('Tổng hợp: '+topic,text,image,'','topic',sources=sources[:5])
194
- posts=base._load_ai_wall(); posts.insert(0,post); base._save_ai_wall(posts)
195
  return JSONResponse({'post':post})
196
 
197
-
198
  @app.post('/api/url_wall')
199
- async def compat_url_wall(request:Request):
200
- body=await request.json(); url=base._clean_text(body.get('url',''))
201
- if not url.startswith('http'): return JSONResponse({'error':'missing url'},status_code=400)
202
- try: data=base.scrape_any_url(url)
203
- except Exception as e: return JSONResponse({'error':'Không scrape được URL: '+str(e)[:180]},status_code=422)
204
  raw=(data.get('summary','')+'\n'+data.get('text','')).strip()
205
- if len(raw)<120: return JSONResponse({'error':'URL không có đủ nội dung để tóm tắt'},status_code=422)
206
- prompt=_make_summary_prompt(data.get('title',''), raw, data.get('via','') or base._domain(url))
207
  text=await base.qwen_generate(prompt,image_url=data.get('image') or None,max_tokens=850)
208
- text=_postprocess_ai_text(text,6)
209
  src=[{'title':data.get('title'),'url':url,'excerpt':raw[:500],'via':data.get('via') or base._domain(url)}]
210
- if 'Nguồn tham khảo:' not in text: text+='\n\n'+_source_line(src)
211
  post=base.make_post(data.get('title') or 'Bài viết',text,data.get('image') or '',url,'url',sources=src)
212
- posts=base._load_ai_wall(); posts.insert(0,post); base._save_ai_wall(posts)
213
  return JSONResponse({'post':post})
214
 
215
-
216
  @app.post('/api/rewrite_share')
217
- async def compat_rewrite_share(request:Request):
218
- body=await request.json(); url=base._clean_text(body.get('url',''))
219
- if not url.startswith('http'): return JSONResponse({'error':'missing url'},status_code=400)
220
- try: data=base.scrape_any_url(url)
221
- except Exception as e: return JSONResponse({'error':'Không đọc được bài viết: '+str(e)[:180]},status_code=422)
222
  raw=(data.get('summary','')+'\n'+data.get('text','')).strip()
223
- if len(raw)<120: return JSONResponse({'error':'Bài viết không đủ nội dung để tóm tắt'},status_code=422)
224
- prompt=_make_summary_prompt(data.get('title',''), raw, data.get('via','') or base._domain(url))
225
  text=await base.qwen_generate(prompt,image_url=data.get('image') or None,max_tokens=850)
226
- text=_postprocess_ai_text(text,6)
227
  src=[{'title':data.get('title'),'url':url,'excerpt':raw[:500],'via':data.get('via') or base._domain(url)}]
228
- if 'Nguồn tham khảo:' not in text: text+='\n\n'+_source_line(src)
229
  post=base.make_post(data.get('title') or 'Bài viết',text,data.get('image') or '',url,'summary',sources=src)
230
- posts=base._load_ai_wall(); posts.insert(0,post); base._save_ai_wall(posts)
231
  return JSONResponse({'post':post})
232
 
233
 
234
  def split_segments(post):
235
- title=_clean(post.get('title',''))
236
- text=re.sub(r'Nguồn tham khảo:.*','',post.get('text',''),flags=re.S).strip()
237
  lines=[]
238
- if title: lines.append(title)
239
- for line in re.split(r'\n+', text):
240
  line=_clean(re.sub(r'^[•\-*]\s*','',line))
241
- if len(line)>8: lines.append(line)
242
- # combine very short lines, keep each segment readable.
243
- segs=[]; cur=''
244
  for line in lines:
245
- if len(cur)+len(line)<190:
246
- cur=(cur+' '+line).strip()
247
  else:
248
- if cur: segs.append(cur)
249
  cur=line
250
- if cur: segs.append(cur)
251
  return segs[:14]
252
 
253
-
254
- def wrap_text(draw, text, font, max_width, max_lines):
255
- words=_clean(text).split(); lines=[]; cur=''
256
  for w in words:
257
  test=(cur+' '+w).strip()
258
- try: width=draw.textbbox((0,0),test,font=font)[2]
259
- except Exception: width=len(test)*20
260
- if width<=max_width: cur=test
261
  else:
262
- if cur: lines.append(cur)
263
  cur=w
264
- if len(lines)>=max_lines: break
265
- if cur and len(lines)<max_lines: lines.append(cur)
266
  return lines
267
 
268
-
269
- def make_segment_frame(post, segment, idx, total, img_path, out_path):
270
- if Image is None: return base._make_short_frame(post,img_path,out_path)
271
- W,H=1080,1920
272
- bg=Image.new('RGB',(W,H),(14,14,14))
273
  try:
274
- im=Image.open(img_path).convert('RGB')
275
- target=(1080,760); ratio=im.width/im.height; tr=target[0]/target[1]
276
- if ratio>tr: nh=target[1]; nw=int(nh*ratio)
277
- else: nw=target[0]; nh=int(nw/ratio)
278
- im=im.resize((nw,nh)); left=(nw-target[0])//2; top=(nh-target[1])//2
279
- bg.paste(im.crop((left,top,left+target[0],top+target[1])),(0,0))
280
- except Exception: pass
281
  draw=ImageDraw.Draw(bg)
282
  try:
283
- ft=ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf',46)
284
- fb=ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf',44)
285
- fl=ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf',28)
286
- except Exception: ft=fb=fl=None
287
- draw.rectangle((0,720,W,H),fill=(14,14,14))
288
- margin=48; maxw=W-margin*2
289
  draw.text((margin,770),f'VNEWS · Short AI {idx}/{total}',fill=(92,184,122),font=fl)
290
  y=840
291
- for ln in wrap_text(draw, segment, fb, maxw, 16):
292
- draw.text((margin,y),ln,fill=(242,242,242),font=fb)
293
- y+=58
294
- if y>1650: break
295
  bg.save(out_path,quality=92)
296
 
297
-
298
- def make_tts(text, voice, out_path):
299
  edge_voice={'nam':'vi-VN-NamMinhNeural','male':'vi-VN-NamMinhNeural','nu':'vi-VN-HoaiMyNeural','female':'vi-VN-HoaiMyNeural','mien-nam':'vi-VN-HoaiMyNeural'}.get(voice,'vi-VN-HoaiMyNeural')
300
- try:
301
- subprocess.run(['python','-m','edge_tts','--voice',edge_voice,'--text',text,'--write-media',out_path],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=160)
302
  except Exception:
303
  tld='com.vn' if voice in ('nu','female','mien-nam') else 'com'
304
- try: base.gTTS(text,lang='vi',tld=tld,slow=False).save(out_path)
305
- except TypeError: base.gTTS(text,lang='vi',slow=False).save(out_path)
306
-
307
 
308
  @app.post('/api/ai/short/{post_id}')
309
- async def patched_segment_short(post_id:str, request:Request):
310
- try: body=await request.json()
311
- except Exception: body={}
312
- voice=str(body.get('voice','nu')).lower().strip(); emotion=str(body.get('emotion','neutral')).lower().strip(); speed=max(0.85,min(1.35,float(body.get('speed',1.2) or 1.2)))
313
- posts=base._load_ai_wall(); post=next((p for p in posts if str(p.get('id'))==str(post_id)),None)
314
- if not post: return JSONResponse({'error':'post not found'},status_code=404)
315
- os.makedirs(base.SHORTS_DIR,exist_ok=True)
316
- suffix=f'_{voice}_{emotion}_{str(speed).replace(".","p")}_segments'
317
  out_mp4=os.path.join(base.SHORTS_DIR,base._safe_name(post_id+suffix)+'.mp4')
318
- if os.path.exists(out_mp4):
319
- post['video']='/api/ai/short-file/'+post_id+suffix; base._save_ai_wall(posts); return JSONResponse({'video':post['video']})
320
- work=os.path.join(base.SHORTS_DIR,base._safe_name(post_id+suffix)); os.makedirs(work,exist_ok=True)
321
- img=os.path.join(work,'image.jpg'); base._download_image(post.get('img'),post.get('title','AI news'),img)
322
  segs=split_segments(post)
323
- if not segs: return JSONResponse({'error':'Không có nội dung để tạo short'},status_code=422)
324
  clips=[]
325
  try:
326
  for i,seg in enumerate(segs,1):
327
- frame=os.path.join(work,f'frame_{i:02d}.jpg'); aud=os.path.join(work,f'aud_{i:02d}.mp3'); audf=os.path.join(work,f'audf_{i:02d}.mp3'); clip=os.path.join(work,f'clip_{i:02d}.mp4')
328
  prefix={'urgent':'Tin nhanh.','warm':'Câu chuyện đáng chú ý.','serious':'Bản tin nghiêm túc.','energetic':'Cập nhật nổi bật.'}.get(emotion,'')
329
  spoken=(prefix+' '+seg).strip() if i==1 and prefix else seg
330
- make_segment_frame(post, seg, i, len(segs), img, frame)
331
- make_tts(spoken, voice, aud)
332
  subprocess.run(['ffmpeg','-y','-i',aud,'-filter:a',f'atempo={speed}','-vn',audf],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=120)
333
  subprocess.run(['ffmpeg','-y','-loop','1','-i',frame,'-i',audf,'-shortest','-c:v','libx264','-tune','stillimage','-pix_fmt','yuv420p','-c:a','aac','-b:a','128k','-vf','scale=1080:1920',clip],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=180)
334
  clips.append(clip)
335
  listfile=os.path.join(work,'list.txt')
336
  with open(listfile,'w',encoding='utf-8') as f:
337
- for c in clips: f.write("file '{}'
338
- ".format(c.replace("'","'\\''")))
339
  subprocess.run(['ffmpeg','-y','-f','concat','-safe','0','-i',listfile,'-c','copy',out_mp4],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=240)
340
- post['video']='/api/ai/short-file/'+post_id+suffix; post['short_voice']=voice; post['short_emotion']=emotion; post['short_speed']=speed; post['short_subtitles']=False; post['short_segments']=len(segs)
341
- base._save_ai_wall(posts)
342
  return JSONResponse({'video':post['video'],'segments':len(segs),'speed':speed,'subtitles':False})
343
- except Exception as e:
344
- return JSONResponse({'error':'Không tạo được shorts: '+str(e)[:180]},status_code=500)
345
-
346
 
347
  @app.get('/api/ai/short-file/{file_id}')
348
- def patched_short_file(file_id:str):
349
  path=os.path.join(base.SHORTS_DIR,base._safe_name(file_id)+'.mp4')
350
- if not os.path.exists(path): return JSONResponse({'error':'not found'},status_code=404)
351
  return FileResponse(path,media_type='video/mp4',filename=f'vnews-ai-{file_id}.mp4')
352
 
353
-
354
- # Keep previous UI patch, but override topic alert/progress wording.
355
  app.router.routes=[r for r in app.router.routes if not (getattr(r,'path',None)=='/' and 'GET' in getattr(r,'methods',set()))]
356
- @app.get('/')
357
- async def index_patched():
358
- with open('/app/static/index.html','r',encoding='utf-8') as f: html=f.read()
359
- inject=PATCH_INJECT+r'''
360
  <script>
361
  (function(){
 
362
  window.createTopicPost=function(){let inp=document.getElementById('ai-topic-input');let topic=(inp&&inp.value||'').trim();if(!topic)return alert('Nhập chủ đề trước');fetch('/api/topic_post',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({topic})}).then(r=>r.json().then(j=>({ok:r.ok,j}))).then(({ok,j})=>{if(ok&&j.post){window.location.reload();alert('Đã tổng hợp nội dung các nguồn thành 1 bản tóm tắt trên Tường AI');}else alert(j.error||'Lỗi tạo bài')}).catch(e=>alert(e.message||'Lỗi tạo bài'));};
363
  })();
364
  </script>
365
  '''
366
- return HTMLResponse(html.replace('</body>',inject+'\n</body>'))
 
 
 
 
 
 
1
+ import os, re, html as html_lib, subprocess, requests
2
+ from urllib.parse import quote_plus
3
+ from bs4 import BeautifulSoup
 
4
  from fastapi import Request
5
  from fastapi.responses import JSONResponse, HTMLResponse, FileResponse
6
+ import ai_ext as base
7
+ from ai_ext import app
8
 
9
  try:
10
  from PIL import Image, ImageDraw, ImageFont
 
22
  return re.sub(r"\s+", " ", s).strip()
23
 
24
 
25
+ def _similar(a,b):
26
+ ta,tb=set(_norm(a).split()),set(_norm(b).split())
27
+ if not ta or not tb:return False
28
+ return len(ta&tb)/max(1,min(len(ta),len(tb)))>=0.72
29
 
30
 
31
+ def _dedupe_units(units,max_units=7):
32
+ out=[];seen=set()
33
  for u in units:
34
+ u=_clean(re.sub(r"^[-•*\d\.\)\s]+","",u))
35
+ if len(u)<18:continue
36
+ nu=_norm(u)
37
+ if nu in seen or any(_similar(u,o) for o in out):continue
38
+ seen.add(nu);out.append(u)
39
+ if len(out)>=max_units:break
40
  return out
41
 
42
 
43
+ def _postprocess(text,max_units=7):
44
+ text=_clean(text)
45
+ if not text:return text
46
+ lines=[]
47
+ for line in re.split(r"\n+",text):
48
+ line=_clean(line)
49
+ if not line:continue
50
+ low=line.lower()
51
+ if any(low.startswith(p) and len(line)<80 for p in ("dưới đây","sau đây","tôi sẽ","tiêu đề:","sapo:","nội dung:")):continue
52
+ lines.append(line)
53
+ units=[]
54
+ for line in lines:
55
+ units.extend(re.split(r"(?<=[\.\!\?])\s+(?=[A-ZÀ-Ỹ0-9])",line) if len(line)>260 else [line])
56
+ units=_dedupe_units(units,max_units)
57
+ if not units:return text[:900]
58
+ title=""
59
+ if lines and len(lines[0])<=90 and not lines[0].startswith(("-","•","*")):
60
+ title=lines[0];units=[u for u in units if not _similar(u,title)]
61
+ body="\n".join("• "+u for u in units[:max_units])
62
+ return (title+"\n\n"+body).strip() if title else body
63
+
64
+
65
+ def _fallback_summary(prompt,max_units=6):
66
+ text=prompt or ""
67
+ for marker in ["Nội dung nguồn:","Nội dung gốc:","Nội dung:","Nguồn/bối cảnh internet:"]:
 
68
  if marker in text:
69
+ text=text.split(marker,1)[1];break
70
+ text=re.sub(r"https?://\S+","",text);text=_clean(text)
71
+ sents=[_clean(s) for s in re.split(r"(?<=[\.\!\?])\s+(?=[A-ZÀ-Ỹ0-9])",text)]
72
+ units=_dedupe_units([s for s in sents if 45<=len(s)<=260],max_units)
73
+ return "\n".join("• "+u for u in units) if units else ("• "+text[:700].rsplit(" ",1)[0] if text else "• Không có đủ nội dung nguồn để tóm tắt.")
 
74
 
75
 
76
  def _source_line(sources):
77
  names=[]
78
  for s in (sources or [])[:5]:
79
  via=s.get("via") or base._domain(s.get("url","")) or s.get("title","")
80
+ if via and via not in names:names.append(via)
81
  return "Nguồn tham khảo: "+", ".join(names[:5]) if names else "Nguồn tham khảo: tổng hợp internet"
82
 
83
 
84
+ def _make_summary_prompt(title,raw,source_hint=""):
85
  return f"""Bạn là biên tập viên tóm tắt tin tức tiếng Việt.
86
 
87
  NHIỆM VỤ BẮT BUỘC:
 
89
  - Không lặp lại cùng một ý, cùng một câu, cùng một chi tiết.
90
  - Không thêm thông tin ngoài nguồn.
91
  - Tối đa 5 gạch đầu dòng, mỗi gạch đầu dòng 1 câu ngắn.
 
92
 
93
  Tiêu đề nguồn: {title}
94
  Nguồn: {source_hint}
 
97
  {raw[:14000]}"""
98
 
99
 
100
+ def _rich_web_context(topic,limit=5):
101
+ try: ctx,sources=base.web_context(topic,limit=limit)
102
+ except Exception: ctx,sources="",[]
103
+ rich=[];rich_sources=[];seen=set()
 
 
 
104
  for s in (sources or [])[:limit*2]:
105
+ url=s.get("url") or ""
106
+ if not url.startswith("http") or url in seen:continue
107
  seen.add(url)
108
  try:
109
+ data=base.scrape_any_url(url)
110
+ raw=(data.get("summary","")+"\n"+data.get("text","")).strip()
111
+ if len(raw)<180:continue
112
+ title=data.get("title") or s.get("title") or url
113
+ via=data.get("via") or s.get("via") or base._domain(url)
114
  rich.append(f"### {title} ({via})\n{raw[:2400]}")
115
  rich_sources.append({"title":title,"url":url,"excerpt":raw[:700],"via":via})
116
+ if len(rich)>=limit:break
117
+ except Exception:continue
118
+ if rich:return "\n\n".join(rich),rich_sources
119
+ if ctx:return ctx,sources
120
+ return f"Chủ đề cần tóm tắt: {topic}. Không truy xuất được nguồn đầy đủ, hãy tạo bản tóm tắt định hướng ngắn.",[]
 
121
 
122
 
123
+ async def qwen_resilient(prompt,image_url=None,max_tokens=1200):
124
+ token=base._hf_token();errors=[]
125
  try:
126
+ orig=getattr(base,"_original_qwen_generate",None)
127
+ if orig:
128
+ txt=await orig(prompt,image_url=image_url,max_tokens=max_tokens)
129
+ if txt:base.LAST_QWEN_ERROR="";return txt
130
+ except Exception as e:errors.append(f"sdk:{str(e)[:160]}")
 
131
  if token:
 
 
 
132
  headers={"Authorization":"Bearer "+token,"Content-Type":"application/json"}
133
+ for model in [os.getenv("QWEN_VL_MODEL",""),"Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen2.5-VL-3B-Instruct","Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-3B-Instruct","Qwen/Qwen2.5-1.5B-Instruct"]:
134
+ if not model:continue
135
  try:
136
+ is_vl="VL" in model and bool(image_url)
137
+ content=([{"type":"image_url","image_url":{"url":image_url}},{"type":"text","text":prompt}] if is_vl else prompt)
138
+ payload={"model":model,"messages":[{"role":"system","content":"Bạn là biên tập viên AI tiếng Việt. Chỉ tóm tắt súc tích nội dung nguồn, không viết lại toàn bài, không lặp ý."},{"role":"user","content":content}],"max_tokens":min(int(max_tokens or 900),1400),"temperature":0.35,"top_p":0.85}
139
  r=requests.post("https://router.huggingface.co/v1/chat/completions",headers=headers,json=payload,timeout=95)
140
+ if r.status_code>=300:errors.append(f"{model}:{r.status_code}");continue
141
  txt=(r.json().get("choices",[{}])[0].get("message",{}).get("content") or "").strip()
142
+ if txt:base.LAST_QWEN_ERROR="";return txt
143
+ except Exception as e:errors.append(f"{model}:{str(e)[:120]}")
144
+ base.LAST_QWEN_ERROR=" | ".join(errors[-5:]) or "fallback"
145
+ return _fallback_summary(prompt,6)
146
 
147
 
148
+ if not hasattr(base,"_original_qwen_generate"):
149
+ base._original_qwen_generate=base.qwen_generate
150
+ base.qwen_generate=qwen_resilient
151
 
152
 
153
+ _PATCHED={('/api/topic_post','POST'),('/api/url_wall','POST'),('/api/rewrite_share','POST'),('/api/ai/short/{post_id}','POST'),('/api/ai/short-file/{file_id}','GET')}
154
  app.router.routes=[r for r in app.router.routes if not any(getattr(r,'path',None)==p and m in getattr(r,'methods',set()) for p,m in _PATCHED)]
155
 
 
156
  @app.get('/api/wall')
157
+ def compat_wall():return JSONResponse({'posts':base._load_ai_wall()[:80]})
 
158
 
159
  @app.post('/api/topic_post')
160
+ async def topic_post(request:Request):
161
+ body=await request.json();topic=base._clean_text(body.get('topic',''))
162
+ if not topic:return JSONResponse({'error':'missing topic'},status_code=400)
163
+ ctx,sources=_rich_web_context(topic,5)
164
  image=base.pollinations_image_url(topic)
165
  prompt=f"""Tóm tắt tổng hợp chủ đề để đăng Tường AI.
166
 
 
177
  Nguồn/bối cảnh internet:
178
  {ctx}"""
179
  text=await base.qwen_generate(prompt,image_url=image,max_tokens=950)
180
+ text=_postprocess(text,7)
181
+ if 'Nguồn tham khảo:' not in text:text+='\n\n'+_source_line(sources)
182
  post=base.make_post('Tổng hợp: '+topic,text,image,'','topic',sources=sources[:5])
183
+ posts=base._load_ai_wall();posts.insert(0,post);base._save_ai_wall(posts)
184
  return JSONResponse({'post':post})
185
 
 
186
  @app.post('/api/url_wall')
187
+ async def url_wall(request:Request):
188
+ body=await request.json();url=base._clean_text(body.get('url',''))
189
+ if not url.startswith('http'):return JSONResponse({'error':'missing url'},status_code=400)
190
+ try:data=base.scrape_any_url(url)
191
+ except Exception as e:return JSONResponse({'error':'Không scrape được URL: '+str(e)[:180]},status_code=422)
192
  raw=(data.get('summary','')+'\n'+data.get('text','')).strip()
193
+ if len(raw)<120:return JSONResponse({'error':'URL không có đủ nội dung để tóm tắt'},status_code=422)
194
+ prompt=_make_summary_prompt(data.get('title',''),raw,data.get('via','') or base._domain(url))
195
  text=await base.qwen_generate(prompt,image_url=data.get('image') or None,max_tokens=850)
196
+ text=_postprocess(text,6)
197
  src=[{'title':data.get('title'),'url':url,'excerpt':raw[:500],'via':data.get('via') or base._domain(url)}]
198
+ if 'Nguồn tham khảo:' not in text:text+='\n\n'+_source_line(src)
199
  post=base.make_post(data.get('title') or 'Bài viết',text,data.get('image') or '',url,'url',sources=src)
200
+ posts=base._load_ai_wall();posts.insert(0,post);base._save_ai_wall(posts)
201
  return JSONResponse({'post':post})
202
 
 
203
  @app.post('/api/rewrite_share')
204
+ async def rewrite_share(request:Request):
205
+ body=await request.json();url=base._clean_text(body.get('url',''))
206
+ if not url.startswith('http'):return JSONResponse({'error':'missing url'},status_code=400)
207
+ try:data=base.scrape_any_url(url)
208
+ except Exception as e:return JSONResponse({'error':'Không đọc được bài viết: '+str(e)[:180]},status_code=422)
209
  raw=(data.get('summary','')+'\n'+data.get('text','')).strip()
210
+ if len(raw)<120:return JSONResponse({'error':'Bài viết không đủ nội dung để tóm tắt'},status_code=422)
211
+ prompt=_make_summary_prompt(data.get('title',''),raw,data.get('via','') or base._domain(url))
212
  text=await base.qwen_generate(prompt,image_url=data.get('image') or None,max_tokens=850)
213
+ text=_postprocess(text,6)
214
  src=[{'title':data.get('title'),'url':url,'excerpt':raw[:500],'via':data.get('via') or base._domain(url)}]
215
+ if 'Nguồn tham khảo:' not in text:text+='\n\n'+_source_line(src)
216
  post=base.make_post(data.get('title') or 'Bài viết',text,data.get('image') or '',url,'summary',sources=src)
217
+ posts=base._load_ai_wall();posts.insert(0,post);base._save_ai_wall(posts)
218
  return JSONResponse({'post':post})
219
 
220
 
221
  def split_segments(post):
222
+ title=_clean(post.get('title',''));text=re.sub(r'Nguồn tham khảo:.*','',post.get('text',''),flags=re.S).strip()
 
223
  lines=[]
224
+ if title:lines.append(title)
225
+ for line in re.split(r'\n+',text):
226
  line=_clean(re.sub(r'^[•\-*]\s*','',line))
227
+ if len(line)>8:lines.append(line)
228
+ segs=[];cur=''
 
229
  for line in lines:
230
+ if len(cur)+len(line)<190:cur=(cur+' '+line).strip()
 
231
  else:
232
+ if cur:segs.append(cur)
233
  cur=line
234
+ if cur:segs.append(cur)
235
  return segs[:14]
236
 
237
+ def wrap_text(draw,text,font,max_width,max_lines):
238
+ words=_clean(text).split();lines=[];cur=''
 
239
  for w in words:
240
  test=(cur+' '+w).strip()
241
+ try:width=draw.textbbox((0,0),test,font=font)[2]
242
+ except Exception:width=len(test)*20
243
+ if width<=max_width:cur=test
244
  else:
245
+ if cur:lines.append(cur)
246
  cur=w
247
+ if len(lines)>=max_lines:break
248
+ if cur and len(lines)<max_lines:lines.append(cur)
249
  return lines
250
 
251
+ def make_segment_frame(post,segment,idx,total,img_path,out_path):
252
+ if Image is None:return base._make_short_frame(post,img_path,out_path)
253
+ W,H=1080,1920;bg=Image.new('RGB',(W,H),(14,14,14))
 
 
254
  try:
255
+ im=Image.open(img_path).convert('RGB');target=(1080,760);ratio=im.width/im.height;tr=target[0]/target[1]
256
+ if ratio>tr:nh=target[1];nw=int(nh*ratio)
257
+ else:nw=target[0];nh=int(nw/ratio)
258
+ im=im.resize((nw,nh));left=(nw-target[0])//2;top=(nh-target[1])//2;bg.paste(im.crop((left,top,left+target[0],top+target[1])),(0,0))
259
+ except Exception:pass
 
 
260
  draw=ImageDraw.Draw(bg)
261
  try:
262
+ fb=ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf',44);fl=ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf',28)
263
+ except Exception:fb=fl=None
264
+ draw.rectangle((0,720,W,H),fill=(14,14,14));margin=48;maxw=W-margin*2
 
 
 
265
  draw.text((margin,770),f'VNEWS · Short AI {idx}/{total}',fill=(92,184,122),font=fl)
266
  y=840
267
+ for ln in wrap_text(draw,segment,fb,maxw,16):
268
+ draw.text((margin,y),ln,fill=(242,242,242),font=fb);y+=58
269
+ if y>1650:break
 
270
  bg.save(out_path,quality=92)
271
 
272
+ def make_tts(text,voice,out_path):
 
273
  edge_voice={'nam':'vi-VN-NamMinhNeural','male':'vi-VN-NamMinhNeural','nu':'vi-VN-HoaiMyNeural','female':'vi-VN-HoaiMyNeural','mien-nam':'vi-VN-HoaiMyNeural'}.get(voice,'vi-VN-HoaiMyNeural')
274
+ try:subprocess.run(['python','-m','edge_tts','--voice',edge_voice,'--text',text,'--write-media',out_path],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=160)
 
275
  except Exception:
276
  tld='com.vn' if voice in ('nu','female','mien-nam') else 'com'
277
+ try:base.gTTS(text,lang='vi',tld=tld,slow=False).save(out_path)
278
+ except TypeError:base.gTTS(text,lang='vi',slow=False).save(out_path)
 
279
 
280
  @app.post('/api/ai/short/{post_id}')
281
+ async def segmented_short(post_id:str,request:Request):
282
+ try:body=await request.json()
283
+ except Exception:body={}
284
+ voice=str(body.get('voice','nu')).lower().strip();emotion=str(body.get('emotion','neutral')).lower().strip();speed=max(0.85,min(1.35,float(body.get('speed',1.2) or 1.2)))
285
+ posts=base._load_ai_wall();post=next((p for p in posts if str(p.get('id'))==str(post_id)),None)
286
+ if not post:return JSONResponse({'error':'post not found'},status_code=404)
287
+ os.makedirs(base.SHORTS_DIR,exist_ok=True);suffix=f'_{voice}_{emotion}_{str(speed).replace(".","p")}_segments'
 
288
  out_mp4=os.path.join(base.SHORTS_DIR,base._safe_name(post_id+suffix)+'.mp4')
289
+ if os.path.exists(out_mp4):post['video']='/api/ai/short-file/'+post_id+suffix;base._save_ai_wall(posts);return JSONResponse({'video':post['video']})
290
+ work=os.path.join(base.SHORTS_DIR,base._safe_name(post_id+suffix));os.makedirs(work,exist_ok=True)
291
+ img=os.path.join(work,'image.jpg');base._download_image(post.get('img'),post.get('title','AI news'),img)
 
292
  segs=split_segments(post)
293
+ if not segs:return JSONResponse({'error':'Không có nội dung để tạo short'},status_code=422)
294
  clips=[]
295
  try:
296
  for i,seg in enumerate(segs,1):
297
+ frame=os.path.join(work,f'frame_{i:02d}.jpg');aud=os.path.join(work,f'aud_{i:02d}.mp3');audf=os.path.join(work,f'audf_{i:02d}.mp3');clip=os.path.join(work,f'clip_{i:02d}.mp4')
298
  prefix={'urgent':'Tin nhanh.','warm':'Câu chuyện đáng chú ý.','serious':'Bản tin nghiêm túc.','energetic':'Cập nhật nổi bật.'}.get(emotion,'')
299
  spoken=(prefix+' '+seg).strip() if i==1 and prefix else seg
300
+ make_segment_frame(post,seg,i,len(segs),img,frame);make_tts(spoken,voice,aud)
 
301
  subprocess.run(['ffmpeg','-y','-i',aud,'-filter:a',f'atempo={speed}','-vn',audf],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=120)
302
  subprocess.run(['ffmpeg','-y','-loop','1','-i',frame,'-i',audf,'-shortest','-c:v','libx264','-tune','stillimage','-pix_fmt','yuv420p','-c:a','aac','-b:a','128k','-vf','scale=1080:1920',clip],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=180)
303
  clips.append(clip)
304
  listfile=os.path.join(work,'list.txt')
305
  with open(listfile,'w',encoding='utf-8') as f:
306
+ for c in clips:f.write("file '{}'\n".format(c.replace("'","'\\''")))
 
307
  subprocess.run(['ffmpeg','-y','-f','concat','-safe','0','-i',listfile,'-c','copy',out_mp4],check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,timeout=240)
308
+ post['video']='/api/ai/short-file/'+post_id+suffix;post['short_voice']=voice;post['short_emotion']=emotion;post['short_speed']=speed;post['short_subtitles']=False;post['short_segments']=len(segs);base._save_ai_wall(posts)
 
309
  return JSONResponse({'video':post['video'],'segments':len(segs),'speed':speed,'subtitles':False})
310
+ except Exception as e:return JSONResponse({'error':'Không tạo được shorts: '+str(e)[:180]},status_code=500)
 
 
311
 
312
  @app.get('/api/ai/short-file/{file_id}')
313
+ def short_file(file_id:str):
314
  path=os.path.join(base.SHORTS_DIR,base._safe_name(file_id)+'.mp4')
315
+ if not os.path.exists(path):return JSONResponse({'error':'not found'},status_code=404)
316
  return FileResponse(path,media_type='video/mp4',filename=f'vnews-ai-{file_id}.mp4')
317
 
318
+ # Front-end patch
 
319
  app.router.routes=[r for r in app.router.routes if not (getattr(r,'path',None)=='/' and 'GET' in getattr(r,'methods',set()))]
320
+ PATCH_INJECT=r'''
 
 
 
321
  <script>
322
  (function(){
323
+ if(window.aiMakeShortPatched){const old=window.aiMakeShortPatched;window.aiMakeShortPatched=function(i){return old(i)}}
324
  window.createTopicPost=function(){let inp=document.getElementById('ai-topic-input');let topic=(inp&&inp.value||'').trim();if(!topic)return alert('Nhập chủ đề trước');fetch('/api/topic_post',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({topic})}).then(r=>r.json().then(j=>({ok:r.ok,j}))).then(({ok,j})=>{if(ok&&j.post){window.location.reload();alert('Đã tổng hợp nội dung các nguồn thành 1 bản tóm tắt trên Tường AI');}else alert(j.error||'Lỗi tạo bài')}).catch(e=>alert(e.message||'Lỗi tạo bài'));};
325
  })();
326
  </script>
327
  '''
328
+ @app.get('/')
329
+ async def index_patched():
330
+ with open('/app/static/index.html','r',encoding='utf-8') as f:html=f.read()
331
+ # Preserve original ai_ext injection if present there via main import? This app now injects only needed override.
332
+ extra=getattr(base,'AI_INJECT','') if hasattr(base,'AI_INJECT') else ''
333
+ return HTMLResponse(html.replace('</body>',extra+PATCH_INJECT+'\n</body>'))