194130157a commited on
Commit
35b53af
·
verified ·
1 Parent(s): c8aa71e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +390 -0
app.py ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import subprocess
4
+ import json
5
+ import time
6
+ import re
7
+ import zipfile
8
+ import datetime
9
+ from concurrent.futures import ThreadPoolExecutor
10
+
11
+ # 1. 自动安装依赖
12
+ def ensure_dependencies():
13
+ required_packages = ["gradio", "requests", "opencc-python-reimplemented"]
14
+ try:
15
+ import gradio
16
+ import requests
17
+ import opencc
18
+ except ImportError:
19
+ print("正在安装依赖,请稍等片刻...")
20
+ subprocess.check_call([sys.executable, "-m", "pip", "install"] + required_packages)
21
+
22
+ ensure_dependencies()
23
+
24
+ import gradio as gr
25
+ import requests
26
+ import opencc
27
+
28
+ # ================= 您的专属配置 =================
29
+ DEFAULT_API_KEY = "sk-DZ5g7Zu0lFDlR7mBkbNsZLFTt1KBqA8ocsAH1mcvsZDWtydx"
30
+ MERCHANT_BASE_URL = "https://xingjiabiapi.com"
31
+
32
+ # 模型配置
33
+ TEXT_MODEL = "gemini-3-pro-preview-thinking"
34
+ MAX_WORKERS = 20 # 并发数
35
+ # ===============================================
36
+
37
+ # 初始化繁体转换器 (s2twp: 简体到台湾正体,包含惯用词转换)
38
+ cc = opencc.OpenCC('s2twp')
39
+
40
+ # --- 角色1:主编 (脑暴) ---
41
+ EDITOR_SYSTEM_PROMPT = """
42
+ 你是一位**“今日头条/百家号”的情感版块金牌主编**,深谙**“人性弱点”**与**“流量密码”**。
43
+ 你的任务是根据关键词,炮制出**10个**让人点击欲爆棚的**情感故事标题**。
44
+
45
+ **选题核心策略 (必须包含强烈的伦理冲突或窥探欲)**:
46
+ 1. **家庭伦理**:婆媳大战、姑嫂矛盾、凤凰男、扶弟魔、上门女婿。
47
+ 2. **婚外情愫**:闺蜜背叛、同学会旧情、邻居的秘密、出差时的意外。
48
+ 3. **私密空间**:试衣间、深夜办公室、合租房的隔音、私家车后的秘密。
49
+ 4. **标题风格**:要像真实的倾诉,带有悬念和反转。
50
+
51
+ **严格输出格式**:只输出10行文字,每行一个标题,不要带序号,不要带引号。
52
+ """
53
+
54
+ # --- 角色2:作家 (写作) ---
55
+ WRITER_SYSTEM_PROMPT = """
56
+ 你是一位**“百万阅读量”的情感专栏作家**,擅长用最真实的笔触,描写成年人世界里**“隐秘的渴望”**与**“失控的边缘”**。
57
+ 你的文章风格要像**“深夜电台倾诉”**,让读者读完感到**脸红心跳、头皮发麻**,但又挑不出任何违规的毛病。
58
+
59
+ **写作铁律 (严格遵守)**:
60
+ 1. **极致的拉扯感**:不要直接写发生了什么,要写**“将发未发”**时的紧张。
61
+ 2. **合法的刺激**:
62
+ * **严禁**描写性器官和具体的性行为过程。
63
+ * **必须**用隐晦的暗喻:例如“大脑一片空白”、“身体像过电一样”。
64
+ * 重点描写**心理的背德感**和**偷尝禁果的刺激感**。
65
+ 3. **真实感沉浸**:用第一人称“我”来叙述。多用短句。
66
+ 4. **格式清洗**:**全文除了逗号和句号,绝对不要出现任何其他符号(如引号、感叹号、括号等),也不要出现空格**。
67
+
68
+ 任务:基于用户给定的“主题”,创作一篇让人欲罢不能、肾上腺素飙升的情感实录。
69
+ """
70
+
71
+ def clean_content_strict(text):
72
+ """
73
+ 核心清洗逻辑:清洗符号 + 转换为台湾繁体
74
+ """
75
+ if not text: return ""
76
+
77
+ # 1. 去除空格
78
+ text = text.replace(" ", "").replace("\t", "")
79
+
80
+ # 2. 保留白名单字符
81
+ pattern = r'[^\u4e00-\u9fa5a-zA-Z0-9,,.。\n]'
82
+ cleaned_text = re.sub(pattern, '', text)
83
+
84
+ # 3. 处理连续空行
85
+ cleaned_text = re.sub(r'\n+', '\n', cleaned_text)
86
+
87
+ # 4. 繁体转换 (关键步骤)
88
+ cleaned_text = cc.convert(cleaned_text.strip())
89
+
90
+ return cleaned_text
91
+
92
+ def format_to_srt(text):
93
+ """
94
+ 将纯文本转换为 SRT 字幕格式
95
+ """
96
+ parts = re.split(r'([,,.。\n])', text)
97
+ sentences = []
98
+ current_sentence = ""
99
+
100
+ for part in parts:
101
+ if re.match(r'[,,.。\n]', part):
102
+ if current_sentence:
103
+ sentences.append(current_sentence + part)
104
+ current_sentence = ""
105
+ else:
106
+ current_sentence += part
107
+
108
+ if current_sentence:
109
+ sentences.append(current_sentence)
110
+
111
+ sentences = [s.strip() for s in sentences if s.strip()]
112
+
113
+ srt_content = ""
114
+ start_time = datetime.timedelta(seconds=0)
115
+
116
+ for index, sentence in enumerate(sentences):
117
+ # 估算时长:假设每秒读 4 个字,最少 1.5 秒
118
+ duration_seconds = max(1.5, len(sentence) / 4.0)
119
+ end_time = start_time + datetime.timedelta(seconds=duration_seconds)
120
+
121
+ def format_timestamp(td):
122
+ total_seconds = int(td.total_seconds())
123
+ hours = total_seconds // 3600
124
+ minutes = (total_seconds % 3600) // 60
125
+ seconds = total_seconds % 60
126
+ milliseconds = int(td.microseconds / 1000)
127
+ return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
128
+
129
+ srt_content += f"{index + 1}\n"
130
+ srt_content += f"{format_timestamp(start_time)} --> {format_timestamp(end_time)}\n"
131
+ srt_content += f"{sentence}\n\n"
132
+
133
+ start_time = end_time
134
+
135
+ return srt_content
136
+
137
+ def stream_chat_request(api_key, url, headers, data):
138
+ """流式请求"""
139
+ try:
140
+ data['stream'] = True
141
+ response = requests.post(url, headers=headers, json=data, stream=True, timeout=120)
142
+
143
+ if response.status_code != 200:
144
+ return None, f"HTTP错误 {response.status_code}: {response.text[:100]}"
145
+
146
+ full_content = ""
147
+ for line in response.iter_lines():
148
+ if not line: continue
149
+ decoded_line = line.decode('utf-8').strip()
150
+ if decoded_line.startswith("data: "):
151
+ decoded_line = decoded_line[6:]
152
+ if decoded_line == "[DONE]": break
153
+ try:
154
+ chunk = json.loads(decoded_line)
155
+ delta = chunk['choices'][0].get('delta', {})
156
+ content_piece = delta.get('content', '')
157
+ if content_piece:
158
+ full_content += content_piece
159
+ except:
160
+ continue
161
+ return full_content, "success"
162
+ except Exception as e:
163
+ return None, f"流式请求异常: {str(e)}"
164
+
165
+ def generate_themes_from_keywords(api_key, keywords):
166
+ """Step 1: 脑暴选题"""
167
+ if not keywords: return [gr.update()] * 10
168
+ if not api_key: return [gr.update(placeholder="请先输入API Key")] * 10
169
+
170
+ print(f"🧠 主编正在构思选题: {keywords}...")
171
+
172
+ url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
173
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
174
+ data = {
175
+ "model": TEXT_MODEL,
176
+ "messages": [{"role": "system", "content": EDITOR_SYSTEM_PROMPT}, {"role": "user", "content": f"关键词:{keywords}\n请生成10个爆款标题:"}],
177
+ "temperature": 0.95,
178
+ "stream": False
179
+ }
180
+
181
+ try:
182
+ resp = requests.post(url, headers=headers, json=data, timeout=120)
183
+ if resp.status_code != 200:
184
+ return [f"API Error: {resp.status_code}"] * 10
185
+ content = resp.json()['choices'][0]['message']['content']
186
+ themes = [line.strip() for line in content.split('\n') if line.strip()]
187
+ themes = [re.sub(r'^\d+[\.,、]\s*', '', t) for t in themes]
188
+
189
+ # 标题转繁体
190
+ themes = [cc.convert(t) for t in themes]
191
+
192
+ if len(themes) < 10: themes += [""] * (10 - len(themes))
193
+ return themes[:10]
194
+ except Exception as e:
195
+ return [f"错误: {e}"] * 10
196
+
197
+ def generate_story_task(api_key, topic, index, sub_index, system_prompt, word_count):
198
+ """Step 3: 单篇写作"""
199
+ if not topic or not topic.strip(): return None
200
+ log_prefix = f"[主题{index}-{sub_index}]"
201
+
202
+ url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
203
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
204
+
205
+ variations = ["第一人称沉浸式", "侧重心理极度拉扯", "侧重伦理禁忌感", "侧重反转与悔恨", "侧重深夜私密氛围"]
206
+ style_guide = variations[(sub_index - 1) % len(variations)]
207
+
208
+ data = {
209
+ "model": TEXT_MODEL,
210
+ "messages": [
211
+ {"role": "system", "content": system_prompt},
212
+ {"role": "user", "content": f"标题:{topic}\n要求字数:{word_count}字以上\n重点技法:{style_guide}\n请开始讲述这个故事,记住只能用逗号和句号:"}
213
+ ],
214
+ "temperature": 1.0,
215
+ "max_tokens": 65536
216
+ }
217
+
218
+ for attempt in range(3):
219
+ print(f"{log_prefix} 第{attempt+1}次尝试撰写...")
220
+ content, msg = stream_chat_request(api_key, url, headers, data)
221
+
222
+ if msg == "success" and content:
223
+ # 清洗 + 转繁体
224
+ cleaned_content = clean_content_strict(content)
225
+ # 生成 SRT
226
+ srt_content = format_to_srt(cleaned_content)
227
+
228
+ return {
229
+ "status": "success",
230
+ "title": cc.convert(topic),
231
+ "clean_content": cleaned_content, # TXT 内容
232
+ "srt_content": srt_content, # SRT 内容
233
+ "msg": f"{log_prefix} ✅ 完成 ({len(cleaned_content)}字)"
234
+ }
235
+
236
+ print(f"{log_prefix} ⚠️ 失败: {msg},休息3秒重试...")
237
+ time.sleep(3)
238
+
239
+ return {"status": "error", "msg": f"{log_prefix} ❌ 彻底失败"}
240
+
241
+ def run_writer_factory(api_key, system_prompt, word_count, *args):
242
+ """主流程:同时保存 SRT 和 TXT"""
243
+ if not api_key:
244
+ yield "❌ 请先输入 API Key", None, None; return
245
+
246
+ topics = args[:10]
247
+ counts = args[10:]
248
+ tasks = []
249
+ for i, (t, c) in enumerate(zip(topics, counts)):
250
+ if t.strip(): tasks.append({'topic': t, 'count': int(c), 'id': i+1})
251
+
252
+ total = sum([t['count'] for t in tasks])
253
+ if total == 0: yield "❌ 请先生成或输入主题", None, None; return
254
+
255
+ logs = [f"🔥 繁体双格式工厂启动:共 {total} 篇 (SRT + TXT)"]; yield "\n".join(logs), None, None
256
+
257
+ generated_files = []
258
+ preview_text = ""
259
+ finished = 0
260
+
261
+ timestamp = int(time.time())
262
+ output_dir = f"stories_dual_{timestamp}"
263
+ os.makedirs(output_dir, exist_ok=True)
264
+
265
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
266
+ futures = []
267
+ for task in tasks:
268
+ for j in range(task['count']):
269
+ futures.append(executor.submit(generate_story_task, api_key, task['topic'], task['id'], j+1, system_prompt, word_count))
270
+
271
+ for future in futures:
272
+ res = future.result()
273
+ if res:
274
+ logs.append(res['msg'])
275
+ if res['status'] == 'success':
276
+ finished += 1
277
+
278
+ # === 1. 文件名处理 (确保一致性) ===
279
+ safe_title = re.sub(r'[\\/*?:"<>|]', "", res['title']).strip()
280
+ if len(safe_title) > 50: safe_title = safe_title[:50]
281
+ if not safe_title: safe_title = f"Story_{finished}"
282
+
283
+ # 检查重名,获取唯一的基础文件名
284
+ base_filename = safe_title
285
+ counter = 1
286
+ # 只要存在同名的 .srt 或 .txt,就重命名
287
+ while os.path.exists(os.path.join(output_dir, f"{base_filename}.srt")) or \
288
+ os.path.exists(os.path.join(output_dir, f"{base_filename}.txt")):
289
+ base_filename = f"{safe_title}_{counter}"
290
+ counter += 1
291
+
292
+ # === 2. 保存 SRT 文件 ===
293
+ srt_path = os.path.join(output_dir, f"{base_filename}.srt")
294
+ with open(srt_path, "w", encoding="utf-8") as f:
295
+ f.write(res['srt_content'])
296
+ generated_files.append(srt_path)
297
+
298
+ # === 3. 保存 TXT 文件 ===
299
+ txt_path = os.path.join(output_dir, f"{base_filename}.txt")
300
+ with open(txt_path, "w", encoding="utf-8") as f:
301
+ f.write(res['clean_content'])
302
+ generated_files.append(txt_path)
303
+
304
+ preview_text += f"【{base_filename}】\n(已保存 .srt 和 .txt)\n{res['clean_content'][:80]}...\n\n"
305
+
306
+ yield "\n".join(logs), preview_text, None
307
+
308
+ if generated_files:
309
+ zip_name = f"Stories_DualFormat_{len(generated_files)//2}sets_{timestamp}.zip"
310
+ with zipfile.ZipFile(zip_name, 'w') as zf:
311
+ for file_path in generated_files:
312
+ zf.write(file_path, arcname=os.path.basename(file_path))
313
+ logs.append(f"\n🎉 全部完成!已打包 {len(generated_files)} 个文件 (SRT+TXT)。");
314
+ yield "\n".join(logs), preview_text, zip_name
315
+ else:
316
+ logs.append("\n❌ 失败: 未生成任何内容")
317
+ yield "\n".join(logs), preview_text, None
318
+
319
+ # === 界面布局 ===
320
+ with gr.Blocks(title="情感故事工厂 Pro Max (双格式版)") as app:
321
+ gr.Markdown("# 🔥 情感故事工厂 Pro Max (繁体双格式版)")
322
+ gr.Markdown("特性:**同时输出 SRT字幕 和 TXT纯文本** | **台湾繁体** | **合法刺激**")
323
+
324
+ with gr.Row(variant="panel"):
325
+ api_key_input = gr.Textbox(
326
+ label="🔑 API Key (必填)",
327
+ value=DEFAULT_API_KEY,
328
+ type="password"
329
+ )
330
+
331
+ # --- Step 1: 脑暴区 ---
332
+ with gr.Row():
333
+ with gr.Column(scale=4):
334
+ keywords_input = gr.Textbox(
335
+ label="Step 1: 输入流量关键词",
336
+ placeholder="例如:婆婆、同学会、前男友、出差、隔壁邻居、上错车",
337
+ lines=2
338
+ )
339
+ with gr.Column(scale=1):
340
+ brainstorm_btn = gr.Button("🧠 生成爆款标题 (繁体)", variant="secondary")
341
+
342
+ gr.HTML("<hr>")
343
+
344
+ # --- Step 2: 10个独立槽位 ---
345
+ topic_inputs = []
346
+ count_sliders = []
347
+ with gr.Row():
348
+ with gr.Column(): # 左5个
349
+ for i in range(1, 6):
350
+ with gr.Row():
351
+ t = gr.Textbox(show_label=False, placeholder=f"标题 {i}", scale=3)
352
+ c = gr.Slider(1, 5, 1, 1, label="篇数", scale=1)
353
+ topic_inputs.append(t); count_sliders.append(c)
354
+ with gr.Column(): # 右5个
355
+ for i in range(6, 11):
356
+ with gr.Row():
357
+ t = gr.Textbox(show_label=False, placeholder=f"标题 {i}", scale=3)
358
+ c = gr.Slider(1, 5, 1, 1, label="篇数", scale=1)
359
+ topic_inputs.append(t); count_sliders.append(c)
360
+
361
+ gr.HTML("<hr>")
362
+
363
+ # --- Step 3: 全局控制 ---
364
+ with gr.Row():
365
+ with gr.Column(scale=1):
366
+ word_slider = gr.Slider(500, 15000, 2500, 500, label="单篇字数")
367
+ with gr.Accordion("🎭 作家设定 (已调教为头条情感大V)", open=True):
368
+ system_prompt_input = gr.Textbox(label="System Prompt", value=WRITER_SYSTEM_PROMPT, lines=8)
369
+ run_btn = gr.Button("🚀 Step 3: 启动流水线 (双格式打包)", variant="primary", size="lg")
370
+
371
+ with gr.Column(scale=1):
372
+ log_out = gr.Textbox(label="生产日志", lines=10)
373
+ file_out = gr.File(label="下载结果 (.zip)")
374
+
375
+ result_preview = gr.Textbox(label="内容预览 (繁体TXT)", lines=10)
376
+
377
+ brainstorm_btn.click(
378
+ generate_themes_from_keywords,
379
+ inputs=[api_key_input, keywords_input],
380
+ outputs=topic_inputs
381
+ )
382
+
383
+ run_btn.click(
384
+ run_writer_factory,
385
+ inputs=[api_key_input, system_prompt_input, word_slider] + topic_inputs + count_sliders,
386
+ outputs=[log_out, result_preview, file_out]
387
+ )
388
+
389
+ if __name__ == "__main__":
390
+ app.launch(share=True)