bep40 commited on
Commit
44b174d
·
verified ·
1 Parent(s): 523f7be

Final AI topic aggregate and short timing fixes

Browse files
Files changed (1) hide show
  1. ai_fix2.py +200 -0
ai_fix2.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, re, subprocess, html as html_lib
2
+ import requests
3
+ import ai_patch as prev
4
+ from ai_patch import app
5
+ from fastapi import Request
6
+ from fastapi.responses import JSONResponse, HTMLResponse, FileResponse
7
+
8
+ base = prev.base
9
+
10
+
11
+ def clean(s):
12
+ return re.sub(r"\s+", " ", html_lib.unescape(s or "")).strip()
13
+
14
+
15
+ def sentence_split(text):
16
+ text = re.sub(r"^[•\-\*]\s*", "", text or "", flags=re.M)
17
+ text = re.sub(r"\n+", ". ", text)
18
+ parts = []
19
+ for s in re.split(r"(?<=[\.\!\?])\s+", text):
20
+ s = clean(s)
21
+ if len(s) >= 8:
22
+ parts.append(s)
23
+ return parts
24
+
25
+
26
+ def srt_time(sec):
27
+ ms = int((sec - int(sec)) * 1000)
28
+ sec = int(sec)
29
+ return f"{sec//3600:02d}:{(sec%3600)//60:02d}:{sec%60:02d},{ms:03d}"
30
+
31
+
32
+ def write_weighted_srt(script, path, total_duration):
33
+ subs = sentence_split(script)
34
+ if not subs:
35
+ subs = [clean(script)[:140] or "VNEWS"]
36
+ total_chars = max(1, sum(len(x) for x in subs))
37
+ usable = max(2.0, float(total_duration) - 0.8)
38
+ cur = 0.35
39
+ with open(path, "w", encoding="utf-8") as f:
40
+ for i, s in enumerate(subs, 1):
41
+ dur = max(1.4, min(6.0, usable * len(s) / total_chars))
42
+ start = cur
43
+ end = min(total_duration - 0.1, cur + dur)
44
+ cur = end + 0.08
45
+ f.write(f"{i}\n{srt_time(start)} --> {srt_time(end)}\n{s}\n\n")
46
+ if cur >= total_duration - 0.2:
47
+ break
48
+
49
+
50
+ def tts_script_full(post, emotion):
51
+ title = clean(post.get("title", ""))
52
+ text = clean(post.get("text", ""))
53
+ text = re.sub(r"Nguồn tham khảo:.*", "", text, flags=re.S).strip()
54
+ prefix = {
55
+ "urgent": "Tin nhanh.",
56
+ "warm": "Câu chuyện đáng chú ý.",
57
+ "serious": "Bản tin nghiêm túc.",
58
+ "energetic": "Cập nhật nổi bật.",
59
+ }.get(emotion, "")
60
+ script = f"{prefix} {title}. {text}".strip()
61
+ # Keep full summary, but prevent extremely long generation from hanging forever.
62
+ if len(script) > 2600:
63
+ cut = max(script[:2600].rfind("."), script[:2600].rfind("!"), script[:2600].rfind("?"))
64
+ script = script[:cut + 1] if cut > 1000 else script[:2600]
65
+ script = re.sub(r"([\.\!\?])\s*", r"\1\n", script)
66
+ script = re.sub(r"\n{2,}", "\n", script).strip()
67
+ return script
68
+
69
+
70
+ # Remove previous duplicated routes and add corrected versions last.
71
+ _PATCH = {('/api/topic_post','POST'),('/api/ai/short/{post_id}','POST'),('/api/ai/short-file/{file_id}','GET'),('/','GET')}
72
+ app.router.routes = [r for r in app.router.routes if not any(getattr(r,'path',None)==p and m in getattr(r,'methods',set()) for p,m in _PATCH)]
73
+
74
+
75
+ @app.post('/api/topic_post')
76
+ async def topic_post_aggregate(request: Request):
77
+ body = await request.json()
78
+ topic = base._clean_text(body.get('topic',''))
79
+ if not topic:
80
+ return JSONResponse({'error':'missing topic'}, status_code=400)
81
+ articles = prev._topic_source_articles(topic, limit=5)
82
+ if not articles:
83
+ return JSONResponse({'error':'Không lấy được bài viết nguồn cho chủ đề này.'}, status_code=422)
84
+ source_blocks = []
85
+ sources = []
86
+ image = ""
87
+ for i, art in enumerate(articles, 1):
88
+ raw = art.get('raw','')
89
+ if len(raw) < 80:
90
+ continue
91
+ source_blocks.append(f"[Nguồn {i}] {art.get('title','')} ({art.get('via','')})\n{raw[:2600]}")
92
+ sources.append(art.get('source') or {'title': art.get('title'), 'url': art.get('url'), 'via': art.get('via'), 'excerpt': raw[:600]})
93
+ if not image and art.get('image'):
94
+ image = art.get('image')
95
+ if not source_blocks:
96
+ return JSONResponse({'error':'Các nguồn tìm được không có đủ nội dung để tóm tắt.'}, status_code=422)
97
+ ctx = "\n\n".join(source_blocks)
98
+ prompt = f"""Bạn là biên tập viên tổng hợp tin tức tiếng Việt.
99
+
100
+ Chủ đề: {topic}
101
+
102
+ NHIỆM VỤ:
103
+ - Đọc nội dung của TẤT CẢ các bài nguồn bên dưới.
104
+ - Tổng hợp thành 1 bản tóm tắt chung duy nhất, giống cách tóm tắt qua URL.
105
+ - Không tạo mỗi tiêu đề thành một bài riêng.
106
+ - Không chỉ liệt kê tiêu đề; phải dựa vào nội dung trong từng bài.
107
+ - Không lặp ý giữa các nguồn.
108
+ - Tối đa 6 gạch đầu dòng, mỗi dòng 1 câu rõ ràng.
109
+ - Nếu các nguồn có góc nhìn khác nhau, gộp lại thành ý tổng hợp.
110
+ - Cuối cùng thêm dòng: Nguồn tham khảo: tên website.
111
+
112
+ Nội dung nguồn:
113
+ {ctx[:15000]}"""
114
+ text = await base.qwen_generate(prompt, image_url=image or None, max_tokens=1000)
115
+ text = prev._postprocess_ai_text(text, max_units=7)
116
+ if 'Nguồn tham khảo:' not in text:
117
+ text += '\n\n' + prev._source_line(sources)
118
+ post = base.make_post('Tổng hợp: ' + topic, text, image or base.pollinations_image_url(topic), '', 'topic_aggregate', sources=sources[:5])
119
+ posts = base._load_ai_wall(); posts.insert(0, post); base._save_ai_wall(posts)
120
+ return JSONResponse({'post': post, 'count_sources': len(sources)})
121
+
122
+
123
+ @app.post('/api/ai/short/{post_id}')
124
+ async def ai_short_full(post_id: str, request: Request):
125
+ try:
126
+ body = await request.json()
127
+ except Exception:
128
+ body = {}
129
+ voice = str(body.get('voice','nu')).lower().strip()
130
+ emotion = str(body.get('emotion','neutral')).lower().strip()
131
+ speed = max(0.85, min(1.35, float(body.get('speed', 1.2) or 1.2)))
132
+ posts = base._load_ai_wall()
133
+ post = next((p for p in posts if str(p.get('id')) == str(post_id)), None)
134
+ if not post:
135
+ return JSONResponse({'error':'post not found'}, status_code=404)
136
+ os.makedirs(base.SHORTS_DIR, exist_ok=True)
137
+ suffix = f"_{voice}_{emotion}_{str(speed).replace('.', 'p')}_full"
138
+ out_mp4 = os.path.join(base.SHORTS_DIR, base._safe_name(post_id + suffix) + '.mp4')
139
+ if os.path.exists(out_mp4):
140
+ post['video'] = '/api/ai/short-file/' + post_id + suffix
141
+ base._save_ai_wall(posts)
142
+ return JSONResponse({'video': post['video'], 'speed': speed, 'subtitles': True})
143
+ work = os.path.join(base.SHORTS_DIR, base._safe_name(post_id + suffix)); os.makedirs(work, exist_ok=True)
144
+ img = os.path.join(work,'image.jpg'); frame = os.path.join(work,'frame.jpg'); audio = os.path.join(work,'voice.mp3'); audio_fast=os.path.join(work,'voice_fast.mp3'); srt=os.path.join(work,'subtitles.srt')
145
+ try:
146
+ base._download_image(post.get('img'), post.get('title','AI news'), img)
147
+ prev._make_short_frame_full(post, img, frame)
148
+ script = tts_script_full(post, emotion)
149
+ edge_voice = {'nam':'vi-VN-NamMinhNeural','male':'vi-VN-NamMinhNeural','nu':'vi-VN-HoaiMyNeural','female':'vi-VN-HoaiMyNeural','mien-nam':'vi-VN-HoaiMyNeural'}.get(voice,'vi-VN-HoaiMyNeural')
150
+ try:
151
+ subprocess.run(['python','-m','edge_tts','--voice',edge_voice,'--text',script,'--write-media',audio], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=240)
152
+ except Exception:
153
+ tld = 'com.vn' if voice in ('nu','female','mien-nam') else 'com'
154
+ try:
155
+ base.gTTS(script, lang='vi', tld=tld, slow=False).save(audio)
156
+ except TypeError:
157
+ base.gTTS(script, lang='vi', slow=False).save(audio)
158
+ subprocess.run(['ffmpeg','-y','-i',audio,'-filter:a',f'atempo={speed}','-vn',audio_fast], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=180)
159
+ duration = 45.0
160
+ try:
161
+ pr = subprocess.run(['ffprobe','-v','error','-show_entries','format=duration','-of','default=noprint_wrappers=1:nokey=1',audio_fast], stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=20)
162
+ duration = float((pr.stdout or b'45').decode().strip() or 45)
163
+ except Exception:
164
+ pass
165
+ write_weighted_srt(script, srt, duration)
166
+ vf = "scale=1080:1920,subtitles='{}':force_style='FontName=DejaVu Sans,FontSize=22,PrimaryColour=&H00FFFFFF,OutlineColour=&HAA000000,BorderStyle=1,Outline=2,Shadow=0,Alignment=2,MarginV=55'".format(srt.replace("'", "\\'"))
167
+ cmd = ['ffmpeg','-y','-loop','1','-i',frame,'-i',audio_fast,'-shortest','-c:v','libx264','-tune','stillimage','-pix_fmt','yuv420p','-c:a','aac','-b:a','128k','-vf',vf,out_mp4]
168
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=360)
169
+ post['video'] = '/api/ai/short-file/' + post_id + suffix
170
+ post['short_voice'] = voice; post['short_emotion'] = emotion; post['short_speed'] = speed; post['short_subtitles'] = True
171
+ base._save_ai_wall(posts)
172
+ return JSONResponse({'video': post['video'], 'voice': voice, 'emotion': emotion, 'speed': speed, 'subtitles': True, 'duration': duration})
173
+ except Exception as e:
174
+ return JSONResponse({'error':'Không tạo được shorts: '+str(e)[:180]}, status_code=500)
175
+
176
+
177
+ @app.get('/api/ai/short-file/{file_id}')
178
+ def ai_short_file_full(file_id: str):
179
+ path = os.path.join(base.SHORTS_DIR, base._safe_name(file_id) + '.mp4')
180
+ if not os.path.exists(path):
181
+ return JSONResponse({'error':'not found'}, status_code=404)
182
+ return FileResponse(path, media_type='video/mp4', filename=f'vnews-ai-{file_id}.mp4')
183
+
184
+
185
+ # Use previous front-end injection but override topic alert wording and progress text.
186
+ app.router.routes = [r for r in app.router.routes if not (getattr(r,'path',None)=='/' and 'GET' in getattr(r,'methods',set()))]
187
+
188
+ @app.get('/')
189
+ async def index_fix2():
190
+ with open('/app/static/index.html','r',encoding='utf-8') as f:
191
+ html = f.read()
192
+ inject = prev.PATCH_INJECT + r'''
193
+ <script>
194
+ (function(){
195
+ const oldCreateTopic=window.createTopicPost;
196
+ window.createTopicPost=function(){let inp=document.getElementById('ai-topic-input');let topic=(inp&&inp.value||'').trim();if(!topic)return alert('Nhập chủ đề trước');fetch('/api/topic_post',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({topic})}).then(r=>r.json().then(j=>({ok:r.ok,j}))).then(({ok,j})=>{if(ok&&j.post){window.location.reload();alert('Đã tổng hợp nội dung nhiều bài nguồn thành 1 bản tóm tắt trên Tường AI');}else alert(j.error||'Lỗi tạo bài')}).catch(e=>alert(e.message||'Lỗi tạo bài'));};
197
+ })();
198
+ </script>
199
+ '''
200
+ return HTMLResponse(html.replace('</body>', inject+'\n</body>'))