Upload app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,9 @@ import logging
|
|
| 5 |
from podcastfy.client import generate_podcast
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
|
|
|
|
|
|
|
|
|
|
| 8 |
# Configure logging
|
| 9 |
logging.basicConfig(level=logging.DEBUG)
|
| 10 |
logger = logging.getLogger(__name__)
|
|
@@ -48,7 +51,18 @@ VOICE_OPTIONS = [
|
|
| 48 |
{"id": "shimmer", "name": "shimmer"},
|
| 49 |
]
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
def get_api_key(key_name, ui_value):
|
|
|
|
|
|
|
| 52 |
return ui_value if ui_value else os.getenv(key_name)
|
| 53 |
|
| 54 |
def process_inputs(
|
|
@@ -60,7 +74,8 @@ def process_inputs(
|
|
| 60 |
openai_key,
|
| 61 |
openai_base_url, # 新增参数
|
| 62 |
elevenlabs_key,
|
| 63 |
-
|
|
|
|
| 64 |
conversation_style,
|
| 65 |
roles_person1,
|
| 66 |
roles_person2,
|
|
@@ -75,6 +90,11 @@ def process_inputs(
|
|
| 75 |
tts_openai_question,
|
| 76 |
tts_openai_answer,
|
| 77 |
ending_message,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
):
|
| 79 |
try:
|
| 80 |
logger.info("Starting podcast generation process")
|
|
@@ -82,14 +102,20 @@ def process_inputs(
|
|
| 82 |
# API key handling
|
| 83 |
logger.debug("Setting API keys")
|
| 84 |
os.environ["GEMINI_API_KEY"] = get_api_key("GEMINI_API_KEY", gemini_key)
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
if tts_model == "openai":
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
if openai_base_url:
|
| 92 |
-
os.environ["OPENAI_API_BASE"] = openai_base_url
|
| 93 |
|
| 94 |
if tts_model == "elevenlabs":
|
| 95 |
logger.debug("Setting ElevenLabs API key")
|
|
@@ -151,7 +177,8 @@ def process_inputs(
|
|
| 151 |
# Prepare conversation config
|
| 152 |
logger.debug("Preparing conversation config")
|
| 153 |
conversation_config = {
|
| 154 |
-
"
|
|
|
|
| 155 |
"conversation_style": conversation_style.split(','),
|
| 156 |
"roles_person1": roles_person1,
|
| 157 |
"roles_person2": roles_person2,
|
|
@@ -186,6 +213,10 @@ def process_inputs(
|
|
| 186 |
image_paths=image_paths if image_paths else None,
|
| 187 |
tts_model=tts_model,
|
| 188 |
conversation_config=conversation_config,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
)
|
| 190 |
|
| 191 |
logger.info("Podcast generation completed")
|
|
@@ -254,7 +285,7 @@ with gr.Blocks(
|
|
| 254 |
label="Gemini API Key",
|
| 255 |
type="password",
|
| 256 |
value="",
|
| 257 |
-
info="
|
| 258 |
)
|
| 259 |
openai_key = gr.Textbox(
|
| 260 |
label="OpenAI API Key",
|
|
@@ -332,13 +363,45 @@ with gr.Blocks(
|
|
| 332 |
</h3>
|
| 333 |
""",
|
| 334 |
)
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
)
|
| 343 |
|
| 344 |
conversation_style = gr.Textbox(
|
|
@@ -414,12 +477,6 @@ with gr.Blocks(
|
|
| 414 |
info="播客使用的语言"
|
| 415 |
)
|
| 416 |
|
| 417 |
-
# longform = gr.Checkbox(
|
| 418 |
-
# label="长篇模式",
|
| 419 |
-
# value=False,
|
| 420 |
-
# info="启用长篇内容生成模式"
|
| 421 |
-
# )
|
| 422 |
-
|
| 423 |
# Voice Settings
|
| 424 |
gr.Markdown(
|
| 425 |
"""
|
|
@@ -434,22 +491,35 @@ with gr.Blocks(
|
|
| 434 |
info="结束语"
|
| 435 |
)
|
| 436 |
tts_model = gr.Radio(
|
| 437 |
-
choices=["openai", "elevenlabs", "edge"],
|
| 438 |
value="openai",
|
| 439 |
label="文本转语音模型",
|
| 440 |
info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
|
| 441 |
)
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
)
|
| 454 |
|
| 455 |
# Advanced Settings
|
|
@@ -469,16 +539,60 @@ with gr.Blocks(
|
|
| 469 |
info="一些额外的指令,用来帮助AI更好地理解你想要聊天的内容和方向"
|
| 470 |
)
|
| 471 |
|
| 472 |
-
# api_key_label = gr.
|
| 473 |
-
#
|
| 474 |
# value="GEMINI_API_KEY",
|
| 475 |
-
#
|
|
|
|
| 476 |
# )
|
| 477 |
|
| 478 |
-
#
|
| 479 |
-
#
|
| 480 |
-
#
|
| 481 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
# )
|
| 483 |
|
| 484 |
# Output Section
|
|
@@ -504,12 +618,13 @@ with gr.Blocks(
|
|
| 504 |
text_input, urls_input, pdf_files, image_files,
|
| 505 |
gemini_key, openai_key, openai_base_url,
|
| 506 |
elevenlabs_key,
|
| 507 |
-
|
| 508 |
roles_person1, roles_person2,
|
| 509 |
dialogue_structure, podcast_name,
|
| 510 |
podcast_tagline, output_language, tts_model,
|
| 511 |
creativity_level, user_instructions,
|
| 512 |
engagement_techniques, tts_openai_question, tts_openai_answer, ending_message,
|
|
|
|
| 513 |
],
|
| 514 |
outputs=audio_output
|
| 515 |
)
|
|
|
|
| 5 |
from podcastfy.client import generate_podcast
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
|
| 8 |
+
import requests
|
| 9 |
+
import json
|
| 10 |
+
|
| 11 |
# Configure logging
|
| 12 |
logging.basicConfig(level=logging.DEBUG)
|
| 13 |
logger = logging.getLogger(__name__)
|
|
|
|
| 51 |
{"id": "shimmer", "name": "shimmer"},
|
| 52 |
]
|
| 53 |
|
| 54 |
+
# 添加 API Keys 轮询功能
|
| 55 |
+
def get_next_gemini_key(api_keys):
|
| 56 |
+
keys = [k.strip() for k in api_keys.split(',') if k.strip()]
|
| 57 |
+
if not hasattr(get_next_gemini_key, 'current_index'):
|
| 58 |
+
get_next_gemini_key.current_index = 0
|
| 59 |
+
key = keys[get_next_gemini_key.current_index]
|
| 60 |
+
get_next_gemini_key.current_index = (get_next_gemini_key.current_index + 1) % len(keys)
|
| 61 |
+
return key
|
| 62 |
+
|
| 63 |
def get_api_key(key_name, ui_value):
|
| 64 |
+
if key_name == "GEMINI_API_KEY" and ui_value and ',' in ui_value:
|
| 65 |
+
return get_next_gemini_key(ui_value)
|
| 66 |
return ui_value if ui_value else os.getenv(key_name)
|
| 67 |
|
| 68 |
def process_inputs(
|
|
|
|
| 74 |
openai_key,
|
| 75 |
openai_base_url, # 新增参数
|
| 76 |
elevenlabs_key,
|
| 77 |
+
max_num_chunks,
|
| 78 |
+
min_chunk_size,
|
| 79 |
conversation_style,
|
| 80 |
roles_person1,
|
| 81 |
roles_person2,
|
|
|
|
| 90 |
tts_openai_question,
|
| 91 |
tts_openai_answer,
|
| 92 |
ending_message,
|
| 93 |
+
longform,
|
| 94 |
+
llm_model_name,
|
| 95 |
+
#api_key_label,
|
| 96 |
+
#gemini_model,
|
| 97 |
+
#openai_model,
|
| 98 |
):
|
| 99 |
try:
|
| 100 |
logger.info("Starting podcast generation process")
|
|
|
|
| 102 |
# API key handling
|
| 103 |
logger.debug("Setting API keys")
|
| 104 |
os.environ["GEMINI_API_KEY"] = get_api_key("GEMINI_API_KEY", gemini_key)
|
| 105 |
+
|
| 106 |
+
logger.debug("Setting OpenAI API key")
|
| 107 |
+
if not openai_key and not os.getenv("OPENAI_API_KEY"):
|
| 108 |
+
raise ValueError("OpenAI API key is required when using OpenAI TTS model")
|
| 109 |
+
os.environ["OPENAI_API_KEY"] = get_api_key("OPENAI_API_KEY", openai_key)
|
| 110 |
+
|
| 111 |
+
# if api_key_label == "OPENAI_API_KEY":
|
| 112 |
+
os.environ["OPENAI_API_BASE"] = get_api_key("OPENAI_BASE_URL", openai_base_url)
|
| 113 |
+
|
| 114 |
if tts_model == "openai":
|
| 115 |
+
os.environ["OPENAI_BASE_URL"] = get_api_key("OPENAI_BASE_URL", openai_base_url)
|
| 116 |
+
# 根据选择的名称找到对应的 voice ID
|
| 117 |
+
tts_openai_question = next(voice["id"] for voice in VOICE_OPTIONS if voice["name"] == tts_openai_question)
|
| 118 |
+
tts_openai_answer = next(voice["id"] for voice in VOICE_OPTIONS if voice["name"] == tts_openai_answer)
|
|
|
|
|
|
|
| 119 |
|
| 120 |
if tts_model == "elevenlabs":
|
| 121 |
logger.debug("Setting ElevenLabs API key")
|
|
|
|
| 177 |
# Prepare conversation config
|
| 178 |
logger.debug("Preparing conversation config")
|
| 179 |
conversation_config = {
|
| 180 |
+
"max_num_chunks": max_num_chunks,
|
| 181 |
+
"min_chunk_size": min_chunk_size,
|
| 182 |
"conversation_style": conversation_style.split(','),
|
| 183 |
"roles_person1": roles_person1,
|
| 184 |
"roles_person2": roles_person2,
|
|
|
|
| 213 |
image_paths=image_paths if image_paths else None,
|
| 214 |
tts_model=tts_model,
|
| 215 |
conversation_config=conversation_config,
|
| 216 |
+
longform=longform,
|
| 217 |
+
llm_model_name=llm_model_name,
|
| 218 |
+
api_key_label="OPENAI_API_KEY",
|
| 219 |
+
#llm_model_name=get_active_model(api_key_label, gemini_model, openai_model),
|
| 220 |
)
|
| 221 |
|
| 222 |
logger.info("Podcast generation completed")
|
|
|
|
| 285 |
label="Gemini API Key",
|
| 286 |
type="password",
|
| 287 |
value="",
|
| 288 |
+
info="必须的,多个key请用逗号分隔"
|
| 289 |
)
|
| 290 |
openai_key = gr.Textbox(
|
| 291 |
label="OpenAI API Key",
|
|
|
|
| 363 |
</h3>
|
| 364 |
""",
|
| 365 |
)
|
| 366 |
+
llm_model_name = gr.Radio(
|
| 367 |
+
choices=["gemini-1.5-pro-latest", "gemini-exp-1121", "learnlm-1.5-pro-experimental", "o1-mini", "o1-preview", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-4-turbo-2024-04-09", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"],
|
| 368 |
+
value="gemini-1.5-pro-latest",
|
| 369 |
+
label="文本生成模型",
|
| 370 |
+
info="默认使用 gemini-1.5-pro-latest "
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
longform = gr.Checkbox(
|
| 374 |
+
label="长篇模式",
|
| 375 |
+
value=False,
|
| 376 |
+
info="启用长篇内容生成模式,启用长篇需要Google Cloud支持,设置好GOOGLE_API_KEY"
|
| 377 |
+
)
|
| 378 |
+
with gr.Group(visible=False) as longform_settings_group:
|
| 379 |
+
max_num_chunks = gr.Slider(
|
| 380 |
+
minimum=1,
|
| 381 |
+
maximum=20,
|
| 382 |
+
value=8,
|
| 383 |
+
step=1,
|
| 384 |
+
label="最大轮数",
|
| 385 |
+
info="长篇模式下,生成的最大轮数"
|
| 386 |
+
)
|
| 387 |
+
min_chunk_size = gr.Slider(
|
| 388 |
+
minimum=300,
|
| 389 |
+
maximum=2000,
|
| 390 |
+
value=600,
|
| 391 |
+
step=100,
|
| 392 |
+
label="一轮最小字符数",
|
| 393 |
+
info="长篇模式下,生成一轮所需的最小字符数"
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
# 添加更新可见性的函数
|
| 397 |
+
def update_longform_settings(is_longform):
|
| 398 |
+
return gr.update(visible=is_longform)
|
| 399 |
+
|
| 400 |
+
# 添加事件监听
|
| 401 |
+
longform.change(
|
| 402 |
+
fn=update_longform_settings,
|
| 403 |
+
inputs=[longform],
|
| 404 |
+
outputs=[longform_settings_group]
|
| 405 |
)
|
| 406 |
|
| 407 |
conversation_style = gr.Textbox(
|
|
|
|
| 477 |
info="播客使用的语言"
|
| 478 |
)
|
| 479 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
# Voice Settings
|
| 481 |
gr.Markdown(
|
| 482 |
"""
|
|
|
|
| 491 |
info="结束语"
|
| 492 |
)
|
| 493 |
tts_model = gr.Radio(
|
| 494 |
+
choices=["openai", "geminimulti", "elevenlabs", "gemini", "edge"],
|
| 495 |
value="openai",
|
| 496 |
label="文本转语音模型",
|
| 497 |
info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
|
| 498 |
)
|
| 499 |
+
with gr.Group(visible=True) as openai_voice_group:
|
| 500 |
+
tts_openai_question = gr.Dropdown(
|
| 501 |
+
choices=[voice["name"] for voice in VOICE_OPTIONS],
|
| 502 |
+
value=VOICE_OPTIONS[27]["name"],
|
| 503 |
+
label="OpenAI TTS 主持人",
|
| 504 |
+
info="选择OpenAI TTS 主持人角色语音"
|
| 505 |
+
)
|
| 506 |
+
|
| 507 |
+
tts_openai_answer = gr.Dropdown(
|
| 508 |
+
choices=[voice["name"] for voice in VOICE_OPTIONS],
|
| 509 |
+
value=VOICE_OPTIONS[31]["name"],
|
| 510 |
+
label="OpenAI TTS 嘉宾",
|
| 511 |
+
info="选择OpenAI TTS 嘉宾角色语音"
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
+
# 添加更新可见性的函数
|
| 515 |
+
def update_voice_options(tts_model):
|
| 516 |
+
return gr.update(visible=(tts_model == "openai"))
|
| 517 |
+
|
| 518 |
+
# 添加事件监听
|
| 519 |
+
tts_model.change(
|
| 520 |
+
fn=update_voice_options,
|
| 521 |
+
inputs=[tts_model],
|
| 522 |
+
outputs=[openai_voice_group]
|
| 523 |
)
|
| 524 |
|
| 525 |
# Advanced Settings
|
|
|
|
| 539 |
info="一些额外的指令,用来帮助AI更好地理解你想要聊天的内容和方向"
|
| 540 |
)
|
| 541 |
|
| 542 |
+
# api_key_label = gr.Radio(
|
| 543 |
+
# choices=["GEMINI_API_KEY", "OPENAI_API_KEY"],
|
| 544 |
# value="GEMINI_API_KEY",
|
| 545 |
+
# label="文本生成模型供应商",
|
| 546 |
+
# info="默认使用 Gemini "
|
| 547 |
# )
|
| 548 |
|
| 549 |
+
# with gr.Group(visible=True) as gemini_llm_group:
|
| 550 |
+
# gemini_model = gr.Radio(
|
| 551 |
+
# choices=["gemini-1.5-pro-latest", "gemini-exp-1121", "learnlm-1.5-pro-experimental"],
|
| 552 |
+
# value="gemini-1.5-pro-latest",
|
| 553 |
+
# label="Gemini 文本生成模型",
|
| 554 |
+
# info="默认使用 gemini-1.5-pro-latest "
|
| 555 |
+
# )
|
| 556 |
+
|
| 557 |
+
# def fetch_openai_models():
|
| 558 |
+
# try:
|
| 559 |
+
# response = requests.get("https://api.168369.xyz/v1/models")
|
| 560 |
+
# data = response.json()
|
| 561 |
+
# 提取所有模型的 id
|
| 562 |
+
# model_ids = [model["id"] for model in data["data"]]
|
| 563 |
+
# return model_ids
|
| 564 |
+
# except Exception as e:
|
| 565 |
+
# print(f"获取模型列表失败: {str(e)}")
|
| 566 |
+
# return ["获取模型列表失败"]
|
| 567 |
+
|
| 568 |
+
# with gr.Group(visible=False) as openai_llm_group:
|
| 569 |
+
# openai_model = gr.Radio(
|
| 570 |
+
#choices=fetch_openai_models(), # 从 API 获取模型列表
|
| 571 |
+
# choices=["o1-mini", "o1-preview", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-4-turbo-2024-04-09"],
|
| 572 |
+
# value="gpt-4o-mini",
|
| 573 |
+
# label="Openai 文本生成模型",
|
| 574 |
+
# info="默认为 gpt-4o-mini"
|
| 575 |
+
# )
|
| 576 |
+
|
| 577 |
+
# 添加获取当前有效模型的函数
|
| 578 |
+
# def get_active_model(api_key_label, gemini_model, openai_model):
|
| 579 |
+
# if api_key_label == "GEMINI_API_KEY":
|
| 580 |
+
# return gemini_model
|
| 581 |
+
# else: # OPENAI_API_KEY
|
| 582 |
+
# return openai_model
|
| 583 |
+
|
| 584 |
+
# 添加更新可见性的函数
|
| 585 |
+
# def update_llm_options(api_key_label):
|
| 586 |
+
# if api_key_label == "GEMINI_API_KEY":
|
| 587 |
+
# return gr.update(visible=True), gr.update(visible=False)
|
| 588 |
+
# else: # OPENAI_API_KEY
|
| 589 |
+
# return gr.update(visible=False), gr.update(visible=True)
|
| 590 |
+
|
| 591 |
+
# 添加事件监听
|
| 592 |
+
# api_key_label.change(
|
| 593 |
+
# fn=update_llm_options,
|
| 594 |
+
# inputs=[api_key_label],
|
| 595 |
+
# outputs=[gemini_llm_group, openai_llm_group]
|
| 596 |
# )
|
| 597 |
|
| 598 |
# Output Section
|
|
|
|
| 618 |
text_input, urls_input, pdf_files, image_files,
|
| 619 |
gemini_key, openai_key, openai_base_url,
|
| 620 |
elevenlabs_key,
|
| 621 |
+
max_num_chunks, min_chunk_size, conversation_style,
|
| 622 |
roles_person1, roles_person2,
|
| 623 |
dialogue_structure, podcast_name,
|
| 624 |
podcast_tagline, output_language, tts_model,
|
| 625 |
creativity_level, user_instructions,
|
| 626 |
engagement_techniques, tts_openai_question, tts_openai_answer, ending_message,
|
| 627 |
+
longform, llm_model_name, #api_key_label, gemini_model, openai_model,
|
| 628 |
],
|
| 629 |
outputs=audio_output
|
| 630 |
)
|