Spaces:

deeme
/

pod

Paused

App Files Files Community

deeme commited on Dec 3, 2024

Commit

54dd079

verified ·

1 Parent(s): 6543342

Upload 2 files

Browse files

Files changed (2) hide show

app.py +462 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,462 @@

+import gradio as gr
+import os
+import tempfile
+import logging
+from podcastfy.client import generate_podcast
+from dotenv import load_dotenv
+# Configure logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+# Load environment variables
+load_dotenv()
+def get_api_key(key_name, ui_value):
+    return ui_value if ui_value else os.getenv(key_name)
+def process_inputs(
+    text_input,
+    urls_input,
+    pdf_files,
+    image_files,
+    gemini_key,
+    openai_key,
+    openai_base_url,  # 新增参数
+    elevenlabs_key,
+    word_count,
+    conversation_style,
+    roles_person1,
+    roles_person2,
+    dialogue_structure,
+    podcast_name,
+    podcast_tagline,
+    output_language,
+    tts_model,
+    creativity_level,
+    user_instructions,
+    api_key_label,
+    llm_model_name,
+    longform,
+):
+    try:
+        logger.info("Starting podcast generation process")
+        # API key handling
+        logger.debug("Setting API keys")
+        os.environ["GEMINI_API_KEY"] = get_api_key("GEMINI_API_KEY", gemini_key)
+        if tts_model == "openai":
+            logger.debug("Setting OpenAI API key")
+            if not openai_key and not os.getenv("OPENAI_API_KEY"):
+                raise ValueError("OpenAI API key is required when using OpenAI TTS model")
+            os.environ["OPENAI_API_KEY"] = get_api_key("OPENAI_API_KEY", openai_key)
+            if openai_base_url:
+                os.environ["OPENAI_API_BASE"] = openai_base_url
+        if tts_model == "elevenlabs":
+            logger.debug("Setting ElevenLabs API key")
+            if not elevenlabs_key and not os.getenv("ELEVENLABS_API_KEY"):
+                raise ValueError("ElevenLabs API key is required when using ElevenLabs TTS model")
+            os.environ["ELEVENLABS_API_KEY"] = get_api_key("ELEVENLABS_API_KEY", elevenlabs_key)
+        # Process URLs
+        urls = [url.strip() for url in urls_input.split('\n') if url.strip()]
+        logger.debug(f"Processed URLs: {urls}")
+        temp_files = []
+        temp_dirs = []
+        # Handle PDF files
+        if pdf_files is not None and len(pdf_files) > 0:
+            logger.info(f"Processing {len(pdf_files)} PDF files")
+            pdf_temp_dir = tempfile.mkdtemp()
+            temp_dirs.append(pdf_temp_dir)
+            for i, pdf_file in enumerate(pdf_files):
+                pdf_path = os.path.join(pdf_temp_dir, f"input_pdf_{i}.pdf")
+                temp_files.append(pdf_path)
+                with open(pdf_path, 'wb') as f:
+                    f.write(pdf_file)
+                urls.append(pdf_path)
+                logger.debug(f"Saved PDF {i} to {pdf_path}")
+        # Handle image files
+        image_paths = []
+        if image_files is not None and len(image_files) > 0:
+            logger.info(f"Processing {len(image_files)} image files")
+            img_temp_dir = tempfile.mkdtemp()
+            temp_dirs.append(img_temp_dir)
+            for i, img_file in enumerate(image_files):
+                # Get file extension from the original name in the file tuple
+                original_name = img_file.orig_name if hasattr(img_file, 'orig_name') else f"image_{i}.jpg"
+                extension = original_name.split('.')[-1]
+                logger.debug(f"Processing image file {i}: {original_name}")
+                img_path = os.path.join(img_temp_dir, f"input_image_{i}.{extension}")
+                temp_files.append(img_path)
+                try:
+                    # Write the bytes directly to the file
+                    with open(img_path, 'wb') as f:
+                        if isinstance(img_file, (tuple, list)):
+                            f.write(img_file[1])  # Write the bytes content
+                        else:
+                            f.write(img_file)     # Write the bytes directly
+                    image_paths.append(img_path)
+                    logger.debug(f"Saved image {i} to {img_path}")
+                except Exception as e:
+                    logger.error(f"Error saving image {i}: {str(e)}")
+                    raise
+        # Prepare conversation config
+        logger.debug("Preparing conversation config")
+        conversation_config = {
+            "word_count": word_count,
+            "conversation_style": conversation_style.split(','),
+            "roles_person1": roles_person1,
+            "roles_person2": roles_person2,
+            "dialogue_structure": dialogue_structure.split(','),
+            "podcast_name": podcast_name,
+            "podcast_tagline": podcast_tagline,
+            "output_language": output_language,
+            "creativity": creativity_level,
+            "user_instructions": user_instructions,
+            "api_key_label": api_key_label,
+            "llm_model_name": llm_model_name,
+            "longform": longform,
+        }
+        # Generate podcast
+        logger.info("Calling generate_podcast function")
+        logger.debug(f"URLs: {urls}")
+        logger.debug(f"Image paths: {image_paths}")
+        logger.debug(f"Text input present: {'Yes' if text_input else 'No'}")
+        audio_file = generate_podcast(
+            urls=urls if urls else None,
+            text=text_input if text_input else None,
+            image_paths=image_paths if image_paths else None,
+            tts_model=tts_model,
+            conversation_config=conversation_config
+        )
+        logger.info("Podcast generation completed")
+        # Cleanup
+        logger.debug("Cleaning up temporary files")
+        for file_path in temp_files:
+            if os.path.exists(file_path):
+                os.unlink(file_path)
+                logger.debug(f"Removed temp file: {file_path}")
+        for dir_path in temp_dirs:
+            if os.path.exists(dir_path):
+                os.rmdir(dir_path)
+                logger.debug(f"Removed temp directory: {dir_path}")
+        return audio_file
+    except Exception as e:
+        logger.error(f"Error in process_inputs: {str(e)}", exc_info=True)
+        # Cleanup on error
+        for file_path in temp_files:
+            if os.path.exists(file_path):
+                os.unlink(file_path)
+        for dir_path in temp_dirs:
+            if os.path.exists(dir_path):
+                os.rmdir(dir_path)
+        return str(e)
+# Create Gradio interface with updated theme
+with gr.Blocks(
+    title="AI播客plus",
+    theme=gr.themes.Base(
+        primary_hue="blue",
+        secondary_hue="slate",
+        neutral_hue="slate"
+    ),
+    css="""
+        /* Move toggle arrow to left side */
+        .gr-accordion {
+            --accordion-arrow-size: 1.5em;
+        }
+        .gr-accordion > .label-wrap {
+            flex-direction: row !important;
+            justify-content: flex-start !important;
+            gap: 1em;
+        }
+        .gr-accordion > .label-wrap > .icon {
+            order: -1;
+        }
+    """
+) as demo:
+    with gr.Tab("默认环境变量已设置 Gemini、OpenAI API Key "):
+        # API Keys Section
+        with gr.Row():
+            gr.Markdown(
+                """
+                <h2 style='color: #2196F3; margin-bottom: 10px; padding: 10px 0;'>
+                    🔑 API Keys
+                </h2>
+                """,
+                elem_classes=["section-header"]
+            )
+            theme_btn = gr.Button("🌓", scale=0, min_width=0)
+        with gr.Accordion("配置 API Keys", open=False):
+            gemini_key = gr.Textbox(
+                label="Gemini API Key",
+                type="password",
+                value="",
+                info="必须的"
+            )
+            openai_key = gr.Textbox(
+                label="OpenAI API Key",
+                type="password",
+                value="",
+                info="只有在使用OpenAI文本转语音模型的情况下才需要此项"
+            )
+            openai_base_url = gr.Textbox(
+                label="OpenAI Base URL",
+                value="",
+                info="可选，留空使用默认URL：https://api.openai.com/v1"
+            )
+            elevenlabs_key = gr.Textbox(
+                label="ElevenLabs API Key",
+                type="password",
+                value="",
+                info="建议使用ElevenLabs TTS模型，仅在使用该模型时才需要此项"
+            )
+        # Content Input Section
+        gr.Markdown(
+            """
+            <h2 style='color: #2196F3; margin-bottom: 10px; padding: 10px 0;'>
+                📝 输入内容
+            </h2>
+            """,
+            elem_classes=["section-header"]
+        )
+        with gr.Accordion("设置输入内容", open=False):
+            with gr.Group():
+                text_input = gr.Textbox(
+                    label="文本输入",
+                    placeholder="在此输入或粘贴文字...",
+                    lines=3
+                )
+                urls_input = gr.Textbox(
+                    label="URLs",
+                    placeholder="请逐行输入网址，支持网站和YouTube视频链接.",
+                    lines=3
+                )
+                # Place PDF and Image uploads side by side
+                with gr.Row():
+                    with gr.Column():
+                        pdf_files = gr.Files(  # Changed from gr.File to gr.Files
+                            label="上传 PDFs",  # Updated label
+                            file_types=[".pdf"],
+                            type="binary"
+                        )
+                        gr.Markdown("*上传一个或多个PDF文件来创建播客*", elem_classes=["file-info"])
+                    with gr.Column():
+                        image_files = gr.Files(
+                            label="上传图片",
+                            file_types=["image"],
+                            type="binary"
+                        )
+                        gr.Markdown("*上传一个或多个图片文件来创建播客*", elem_classes=["file-info"])
+        # Customization Section
+        gr.Markdown(
+            """
+            <h2 style='color: #2196F3; margin-bottom: 10px; padding: 10px 0;'>
+                ⚙️ 自定义选项
+            </h2>
+            """,
+            elem_classes=["section-header"]
+        )
+        with gr.Accordion("自定义选项", open=False):
+            # Basic Settings
+            gr.Markdown(
+                """
+                <h3 style='color: #1976D2; margin: 15px 0 10px 0;'>
+                    📊 基本设置
+                </h3>
+                """,
+            )
+            word_count = gr.Slider(
+                minimum=500,
+                maximum=5000,
+                value=2000,
+                step=100,
+                label="字数统计",
+                info="目标字数（用于生成内容）"
+            )
+            conversation_style = gr.Textbox(
+                label="对话风格",
+                value="生动活泼,节奏明快,热情洋溢",
+                info="用于对话的风格列表（以逗号分隔）"
+            )
+            # Roles and Structure
+            gr.Markdown(
+                """
+                <h3 style='color: #1976D2; margin: 15px 0 10px 0;'>
+                    👥 角色设定与结构安排
+                </h3>
+                """,
+            )
+            roles_person1 = gr.Textbox(
+                label="第一位发言者的角色",
+                value="主要负责总结的人",
+                info="在对话中，第一个说话人扮演的角色"
+            )
+            roles_person2 = gr.Textbox(
+                label="第二位发言者的角色",
+                value="提问者/释疑者",
+                info="在对话中，第二个说话人所扮演的角色或承担的任务"
+            )
+            dialogue_structure = gr.Textbox(
+                label="对话结构",
+                value="引言,主要内容的概括,总结",
+                info="对话结构的各个部分（用逗号隔开）"
+            )
+            # Podcast Identity
+            gr.Markdown(
+                """
+                <h3 style='color: #1976D2; margin: 15px 0 10px 0;'>
+                    🎙️ 播客特色
+                </h3>
+                """,
+            )
+            podcast_name = gr.Textbox(
+                label="播客名",
+                value="猛然间",
+                info="播客的名字"
+            )
+            podcast_tagline = gr.Textbox(
+                label="播客宣传语",
+                value="猛然回首，太匆匆",
+                info="播客的宣传语或副标题"
+            )
+            output_language = gr.Textbox(
+                label="输出语言",
+                value="Chinese",
+                info="播客使用的语言"
+            )
+            api_key_label = gr.Textbox(
+                label="自定义基于云的 LLM",
+                value="GEMINI_API_KEY",
+                info="可选，默认使用 Gemini，如使用 OPENAI，上面填入 'OPENAI_API_KEY' 并保证设置好环境变量且设置好下面的模型"
+            )
+            llm_model_name = gr.Textbox(
+                label="设置好对应自定义基于云的 LLM 模型",
+                value="gemini-1.5-pro-latest",
+                info="可选，配合上面的参数，默认是 Gemini 的 gemini-1.5-pro-latest，默认 OPENAI 可支持模型 api.168369.xyz/v1/models 获取"
+            )
+            longform = gr.Checkbox(
+                label="长篇模式",
+                value=False,
+                info="启用长篇内容生成模式"
+            )
+            # Voice Settings
+            gr.Markdown(
+                """
+                <h3 style='color: #1976D2; margin: 15px 0 10px 0;'>
+                    🗣️ 语音设置
+                </h3>
+                """,
+            )
+            tts_model = gr.Radio(
+                choices=["openai", "elevenlabs", "edge"],
+                value="openai",
+                label="文本转语音模型",
+                info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
+            )
+            # Advanced Settings
+            gr.Markdown(
+                """
+                <h3 style='color: #1976D2; margin: 15px 0 10px 0;'>
+                    🔧 高级选项
+                </h3>
+                """,
+            )
+            creativity_level = gr.Slider(
+                minimum=0,
+                maximum=1,
+                value=0.7,
+                step=0.1,
+                label="创意等级",
+                info="调节生成对话的创意程度（0 为注重事实，1 为更具创意）"
+            )
+            user_instructions = gr.Textbox(
+                label="个性化指令",
+                value="",
+                lines=2,
+                placeholder="在此处添加你希望AI遵循的具体指令，以控制对话的走向和内容...",
+                info="一些额外的指令，用来帮助AI更好地理解你想要聊天的内容和方向"
+            )
+    # Output Section
+    gr.Markdown(
+        """
+        <h2 style='color: #2196F3; margin-bottom: 10px; padding: 10px 0;'>
+            🎵 生成结果
+        </h2>
+        """,
+        elem_classes=["section-header"]
+    )
+    with gr.Group():
+        generate_btn = gr.Button("🎙️ 生成播客", variant="primary")
+        audio_output = gr.Audio(
+            type="filepath",
+            label="生成的播客"
+        )
+    # Handle generation
+    generate_btn.click(
+        process_inputs,
+        inputs=[
+            text_input, urls_input, pdf_files, image_files,
+            gemini_key, openai_key, openai_base_url,
+            elevenlabs_key,
+            word_count, conversation_style,
+            roles_person1, roles_person2,
+            dialogue_structure, podcast_name,
+            podcast_tagline, output_language, tts_model,
+            creativity_level, user_instructions,
+            api_key_label, llm_model_name, longform
+        ],
+        outputs=audio_output
+    )
+    # Add theme toggle functionality
+    theme_btn.click(
+        None,
+        None,
+        None,
+        js="""
+        function() {
+            document.querySelector('body').classList.toggle('dark');
+            return [];
+        }
+        """
+    )
+if __name__ == "__main__":
+    demo.queue().launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio-client
+gradio
+podcastfy
+python-dotenv