VNEWS

Running

App Files Files Community

bep40 commited on 2 days ago

Commit

44b174d

verified ·

1 Parent(s): 523f7be

Final AI topic aggregate and short timing fixes

Browse files

Files changed (1) hide show

ai_fix2.py +200 -0

ai_fix2.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import os, re, subprocess, html as html_lib
+import requests
+import ai_patch as prev
+from ai_patch import app
+from fastapi import Request
+from fastapi.responses import JSONResponse, HTMLResponse, FileResponse
+base = prev.base
+def clean(s):
+    return re.sub(r"\s+", " ", html_lib.unescape(s or "")).strip()
+def sentence_split(text):
+    text = re.sub(r"^[•\-\*]\s*", "", text or "", flags=re.M)
+    text = re.sub(r"\n+", ". ", text)
+    parts = []
+    for s in re.split(r"(?<=[\.\!\?])\s+", text):
+        s = clean(s)
+        if len(s) >= 8:
+            parts.append(s)
+    return parts
+def srt_time(sec):
+    ms = int((sec - int(sec)) * 1000)
+    sec = int(sec)
+    return f"{sec//3600:02d}:{(sec%3600)//60:02d}:{sec%60:02d},{ms:03d}"
+def write_weighted_srt(script, path, total_duration):
+    subs = sentence_split(script)
+    if not subs:
+        subs = [clean(script)[:140] or "VNEWS"]
+    total_chars = max(1, sum(len(x) for x in subs))
+    usable = max(2.0, float(total_duration) - 0.8)
+    cur = 0.35
+    with open(path, "w", encoding="utf-8") as f:
+        for i, s in enumerate(subs, 1):
+            dur = max(1.4, min(6.0, usable * len(s) / total_chars))
+            start = cur
+            end = min(total_duration - 0.1, cur + dur)
+            cur = end + 0.08
+            f.write(f"{i}\n{srt_time(start)} --> {srt_time(end)}\n{s}\n\n")
+            if cur >= total_duration - 0.2:
+                break
+def tts_script_full(post, emotion):
+    title = clean(post.get("title", ""))
+    text = clean(post.get("text", ""))
+    text = re.sub(r"Nguồn tham khảo:.*", "", text, flags=re.S).strip()
+    prefix = {
+        "urgent": "Tin nhanh.",
+        "warm": "Câu chuyện đáng chú ý.",
+        "serious": "Bản tin nghiêm túc.",
+        "energetic": "Cập nhật nổi bật.",
+    }.get(emotion, "")
+    script = f"{prefix} {title}. {text}".strip()
+    # Keep full summary, but prevent extremely long generation from hanging forever.
+    if len(script) > 2600:
+        cut = max(script[:2600].rfind("."), script[:2600].rfind("!"), script[:2600].rfind("?"))
+        script = script[:cut + 1] if cut > 1000 else script[:2600]
+    script = re.sub(r"([\.\!\?])\s*", r"\1\n", script)
+    script = re.sub(r"\n{2,}", "\n", script).strip()
+    return script
+# Remove previous duplicated routes and add corrected versions last.
+_PATCH = {('/api/topic_post','POST'),('/api/ai/short/{post_id}','POST'),('/api/ai/short-file/{file_id}','GET'),('/','GET')}
+app.router.routes = [r for r in app.router.routes if not any(getattr(r,'path',None)==p and m in getattr(r,'methods',set()) for p,m in _PATCH)]
+@app.post('/api/topic_post')
+async def topic_post_aggregate(request: Request):
+    body = await request.json()
+    topic = base._clean_text(body.get('topic',''))
+    if not topic:
+        return JSONResponse({'error':'missing topic'}, status_code=400)
+    articles = prev._topic_source_articles(topic, limit=5)
+    if not articles:
+        return JSONResponse({'error':'Không lấy được bài viết nguồn cho chủ đề này.'}, status_code=422)
+    source_blocks = []
+    sources = []
+    image = ""
+    for i, art in enumerate(articles, 1):
+        raw = art.get('raw','')
+        if len(raw) < 80:
+            continue
+        source_blocks.append(f"[Nguồn {i}] {art.get('title','')} ({art.get('via','')})\n{raw[:2600]}")
+        sources.append(art.get('source') or {'title': art.get('title'), 'url': art.get('url'), 'via': art.get('via'), 'excerpt': raw[:600]})
+        if not image and art.get('image'):
+            image = art.get('image')
+    if not source_blocks:
+        return JSONResponse({'error':'Các nguồn tìm được không có đủ nội dung để tóm tắt.'}, status_code=422)
+    ctx = "\n\n".join(source_blocks)
+    prompt = f"""Bạn là biên tập viên tổng hợp tin tức tiếng Việt.
+Chủ đề: {topic}
+NHIỆM VỤ:
+- Đọc nội dung của TẤT CẢ các bài nguồn bên dưới.
+- Tổng hợp thành 1 bản tóm tắt chung duy nhất, giống cách tóm tắt qua URL.
+- Không tạo mỗi tiêu đề thành một bài riêng.
+- Không chỉ liệt kê tiêu đề; phải dựa vào nội dung trong từng bài.
+- Không lặp ý giữa các nguồn.
+- Tối đa 6 gạch đầu dòng, mỗi dòng 1 câu rõ ràng.
+- Nếu các nguồn có góc nhìn khác nhau, gộp lại thành ý tổng hợp.
+- Cuối cùng thêm dòng: Nguồn tham khảo: tên website.
+Nội dung nguồn:
+{ctx[:15000]}"""
+    text = await base.qwen_generate(prompt, image_url=image or None, max_tokens=1000)
+    text = prev._postprocess_ai_text(text, max_units=7)
+    if 'Nguồn tham khảo:' not in text:
+        text += '\n\n' + prev._source_line(sources)
+    post = base.make_post('Tổng hợp: ' + topic, text, image or base.pollinations_image_url(topic), '', 'topic_aggregate', sources=sources[:5])
+    posts = base._load_ai_wall(); posts.insert(0, post); base._save_ai_wall(posts)
+    return JSONResponse({'post': post, 'count_sources': len(sources)})
+@app.post('/api/ai/short/{post_id}')
+async def ai_short_full(post_id: str, request: Request):
+    try:
+        body = await request.json()
+    except Exception:
+        body = {}
+    voice = str(body.get('voice','nu')).lower().strip()
+    emotion = str(body.get('emotion','neutral')).lower().strip()
+    speed = max(0.85, min(1.35, float(body.get('speed', 1.2) or 1.2)))
+    posts = base._load_ai_wall()
+    post = next((p for p in posts if str(p.get('id')) == str(post_id)), None)
+    if not post:
+        return JSONResponse({'error':'post not found'}, status_code=404)
+    os.makedirs(base.SHORTS_DIR, exist_ok=True)
+    suffix = f"_{voice}_{emotion}_{str(speed).replace('.', 'p')}_full"
+    out_mp4 = os.path.join(base.SHORTS_DIR, base._safe_name(post_id + suffix) + '.mp4')
+    if os.path.exists(out_mp4):
+        post['video'] = '/api/ai/short-file/' + post_id + suffix
+        base._save_ai_wall(posts)
+        return JSONResponse({'video': post['video'], 'speed': speed, 'subtitles': True})
+    work = os.path.join(base.SHORTS_DIR, base._safe_name(post_id + suffix)); os.makedirs(work, exist_ok=True)
+    img = os.path.join(work,'image.jpg'); frame = os.path.join(work,'frame.jpg'); audio = os.path.join(work,'voice.mp3'); audio_fast=os.path.join(work,'voice_fast.mp3'); srt=os.path.join(work,'subtitles.srt')
+    try:
+        base._download_image(post.get('img'), post.get('title','AI news'), img)
+        prev._make_short_frame_full(post, img, frame)
+        script = tts_script_full(post, emotion)
+        edge_voice = {'nam':'vi-VN-NamMinhNeural','male':'vi-VN-NamMinhNeural','nu':'vi-VN-HoaiMyNeural','female':'vi-VN-HoaiMyNeural','mien-nam':'vi-VN-HoaiMyNeural'}.get(voice,'vi-VN-HoaiMyNeural')
+        try:
+            subprocess.run(['python','-m','edge_tts','--voice',edge_voice,'--text',script,'--write-media',audio], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=240)
+        except Exception:
+            tld = 'com.vn' if voice in ('nu','female','mien-nam') else 'com'
+            try:
+                base.gTTS(script, lang='vi', tld=tld, slow=False).save(audio)
+            except TypeError:
+                base.gTTS(script, lang='vi', slow=False).save(audio)
+        subprocess.run(['ffmpeg','-y','-i',audio,'-filter:a',f'atempo={speed}','-vn',audio_fast], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=180)
+        duration = 45.0
+        try:
+            pr = subprocess.run(['ffprobe','-v','error','-show_entries','format=duration','-of','default=noprint_wrappers=1:nokey=1',audio_fast], stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=20)
+            duration = float((pr.stdout or b'45').decode().strip() or 45)
+        except Exception:
+            pass
+        write_weighted_srt(script, srt, duration)
+        vf = "scale=1080:1920,subtitles='{}':force_style='FontName=DejaVu Sans,FontSize=22,PrimaryColour=&H00FFFFFF,OutlineColour=&HAA000000,BorderStyle=1,Outline=2,Shadow=0,Alignment=2,MarginV=55'".format(srt.replace("'", "\\'"))
+        cmd = ['ffmpeg','-y','-loop','1','-i',frame,'-i',audio_fast,'-shortest','-c:v','libx264','-tune','stillimage','-pix_fmt','yuv420p','-c:a','aac','-b:a','128k','-vf',vf,out_mp4]
+        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=360)
+        post['video'] = '/api/ai/short-file/' + post_id + suffix
+        post['short_voice'] = voice; post['short_emotion'] = emotion; post['short_speed'] = speed; post['short_subtitles'] = True
+        base._save_ai_wall(posts)
+        return JSONResponse({'video': post['video'], 'voice': voice, 'emotion': emotion, 'speed': speed, 'subtitles': True, 'duration': duration})
+    except Exception as e:
+        return JSONResponse({'error':'Không tạo được shorts: '+str(e)[:180]}, status_code=500)
+@app.get('/api/ai/short-file/{file_id}')
+def ai_short_file_full(file_id: str):
+    path = os.path.join(base.SHORTS_DIR, base._safe_name(file_id) + '.mp4')
+    if not os.path.exists(path):
+        return JSONResponse({'error':'not found'}, status_code=404)
+    return FileResponse(path, media_type='video/mp4', filename=f'vnews-ai-{file_id}.mp4')
+# Use previous front-end injection but override topic alert wording and progress text.
+app.router.routes = [r for r in app.router.routes if not (getattr(r,'path',None)=='/' and 'GET' in getattr(r,'methods',set()))]
+@app.get('/')
+async def index_fix2():
+    with open('/app/static/index.html','r',encoding='utf-8') as f:
+        html = f.read()
+    inject = prev.PATCH_INJECT + r'''
+<script>
+(function(){
+const oldCreateTopic=window.createTopicPost;
+window.createTopicPost=function(){let inp=document.getElementById('ai-topic-input');let topic=(inp&&inp.value||'').trim();if(!topic)return alert('Nhập chủ đề trước');fetch('/api/topic_post',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({topic})}).then(r=>r.json().then(j=>({ok:r.ok,j}))).then(({ok,j})=>{if(ok&&j.post){window.location.reload();alert('Đã tổng hợp nội dung nhiều bài nguồn thành 1 bản tóm tắt trên Tường AI');}else alert(j.error||'Lỗi tạo bài')}).catch(e=>alert(e.message||'Lỗi tạo bài'));};
+})();
+</script>
+'''
+    return HTMLResponse(html.replace('</body>', inject+'\n</body>'))