| | """PersonaFlow - Interactive Audio Character Demo for Hugging Face Spaces."""
|
| | import logging
|
| | import os
|
| | from pathlib import Path
|
| |
|
| | import gradio as gr
|
| | import numpy as np
|
| |
|
| |
|
| | logging.basicConfig(
|
| | level=logging.INFO,
|
| | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| | )
|
| | logger = logging.getLogger(__name__)
|
| |
|
| |
|
| | IS_SPACES = os.environ.get("SPACE_ID") is not None
|
| |
|
| |
|
| | if IS_SPACES:
|
| | import spaces
|
| |
|
| |
|
| | from config.characters import get_character, get_all_characters, DEFAULT_CHARACTER_ID
|
| |
|
| |
|
| | _pipeline = None
|
| |
|
| |
|
| | def get_pipeline():
|
| | """Get the audio pipeline, creating it if needed."""
|
| | global _pipeline
|
| | if _pipeline is None:
|
| | from src.pipeline import AudioPipeline
|
| | device = "cuda" if IS_SPACES else "cpu"
|
| | _pipeline = AudioPipeline(device=device)
|
| | return _pipeline
|
| |
|
| |
|
| | def _process_audio_impl(audio_tuple, character_id, conversation_history):
|
| | """Implementation of audio processing pipeline."""
|
| | if audio_tuple is None:
|
| | return None, "", "", "No audio recorded"
|
| |
|
| | sample_rate, audio_data = audio_tuple
|
| |
|
| |
|
| | if len(audio_data) == 0:
|
| | return None, "", "", "No audio detected"
|
| |
|
| |
|
| | character = get_character(character_id)
|
| | if character is None:
|
| | character = get_character(DEFAULT_CHARACTER_ID)
|
| |
|
| | logger.info(f"Processing audio for character: {character.name}")
|
| |
|
| | try:
|
| |
|
| | pipeline = get_pipeline()
|
| | audio_out, user_text, response_text, timings = pipeline.process(
|
| | audio_tuple=audio_tuple,
|
| | system_prompt=character.system_prompt,
|
| | voice=character.voice,
|
| | conversation_history=conversation_history,
|
| | )
|
| |
|
| |
|
| | timing_str = f"STT: {timings['stt']*1000:.0f}ms | LLM: {timings['llm']*1000:.0f}ms | TTS: {timings['tts']*1000:.0f}ms | Total: {timings['total']*1000:.0f}ms"
|
| |
|
| | return audio_out, user_text, response_text, timing_str
|
| |
|
| | except Exception as e:
|
| | logger.error(f"Error processing audio: {e}", exc_info=True)
|
| | return None, "", f"Error: {str(e)}", ""
|
| |
|
| |
|
| |
|
| | if IS_SPACES:
|
| | @spaces.GPU(duration=30)
|
| | def process_audio_gpu(audio_tuple, character_id, conversation_history):
|
| | """Process audio with GPU acceleration on Spaces."""
|
| | return _process_audio_impl(audio_tuple, character_id, conversation_history)
|
| | else:
|
| | def process_audio_gpu(audio_tuple, character_id, conversation_history):
|
| | """Process audio locally (no GPU decorator)."""
|
| | return _process_audio_impl(audio_tuple, character_id, conversation_history)
|
| |
|
| |
|
| | def create_portrait_html(character):
|
| | """Create HTML for the animated portrait."""
|
| | emoji = 'π' if character.id == 'visionary' else 'π€' if character.id == 'skeptic' else 'π'
|
| | return f"""
|
| | <div class="portrait-container portrait-idle" style="
|
| | width: 200px;
|
| | height: 200px;
|
| | border-radius: 50%;
|
| | background: {character.portrait_color};
|
| | margin: 0 auto;
|
| | display: flex;
|
| | align-items: center;
|
| | justify-content: center;
|
| | box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
|
| | position: relative;
|
| | ">
|
| | <div class="portrait-placeholder" style="font-size: 80px;">
|
| | {emoji}
|
| | </div>
|
| | <div class="mouth-overlay mouth-closed" style="
|
| | position: absolute;
|
| | bottom: 25%;
|
| | left: 50%;
|
| | transform: translateX(-50%);
|
| | width: 40px;
|
| | height: 8px;
|
| | background: rgba(0, 0, 0, 0.2);
|
| | border-radius: 4px;
|
| | "></div>
|
| | </div>
|
| | <div class="status-indicator status-idle" style="
|
| | display: flex;
|
| | align-items: center;
|
| | justify-content: center;
|
| | gap: 8px;
|
| | padding: 8px 16px;
|
| | border-radius: 20px;
|
| | margin: 15px auto;
|
| | width: fit-content;
|
| | background: #f3f4f6;
|
| | ">
|
| | <div class="status-dot" style="width: 8px; height: 8px; border-radius: 50%; background: #9ca3af;"></div>
|
| | <span class="status-text">Ready to listen</span>
|
| | </div>
|
| | """
|
| |
|
| |
|
| | def on_audio_record(audio, character_id, history):
|
| | """Handle audio recording completion."""
|
| | if history is None:
|
| | history = []
|
| |
|
| | if audio is None:
|
| | return None, "", history, history
|
| |
|
| |
|
| | conversation_history = []
|
| | for user_msg, assistant_msg in history:
|
| | conversation_history.append({"role": "user", "content": user_msg})
|
| | conversation_history.append({"role": "assistant", "content": assistant_msg})
|
| |
|
| |
|
| | audio_out, user_text, response_text, timing = process_audio_gpu(
|
| | audio, character_id, conversation_history
|
| | )
|
| |
|
| |
|
| | new_history = list(history)
|
| | if user_text and response_text:
|
| | new_history.append((user_text, response_text))
|
| |
|
| | return audio_out, timing, new_history, new_history
|
| |
|
| |
|
| | def update_character_info(character_id):
|
| | """Update character info when selection changes."""
|
| | char = get_character(character_id)
|
| | if char:
|
| | return f"**{char.tagline}**\n\n{char.description}", create_portrait_html(char), [], []
|
| | return "", "", [], []
|
| |
|
| |
|
| | def clear_conversation():
|
| | """Clear the conversation history."""
|
| | return [], []
|
| |
|
| |
|
| |
|
| | css_path = Path(__file__).parent / "static" / "styles.css"
|
| | custom_css = ""
|
| | if css_path.exists():
|
| | custom_css = css_path.read_text()
|
| |
|
| |
|
| |
|
| | with gr.Blocks(
|
| | title="PersonaFlow",
|
| | theme=gr.themes.Soft(),
|
| | css=custom_css,
|
| | ) as demo:
|
| |
|
| | gr.LoginButton(value="Sign in to use your Pro Quota")
|
| |
|
| |
|
| | conversation_state = gr.State([])
|
| |
|
| |
|
| | gr.Markdown("""
|
| | # π PersonaFlow
|
| | ### Speak with AI characters that have distinct personalities and voices
|
| |
|
| | Select a character, then click the microphone to start talking!
|
| | """)
|
| |
|
| | with gr.Row():
|
| |
|
| | with gr.Column(scale=1):
|
| | gr.Markdown("### Choose Your Character")
|
| |
|
| | character_dropdown = gr.Dropdown(
|
| | choices=[(c.name, c.id) for c in get_all_characters()],
|
| | value=DEFAULT_CHARACTER_ID,
|
| | label="Character",
|
| | interactive=True,
|
| | )
|
| |
|
| |
|
| | default_char = get_character(DEFAULT_CHARACTER_ID)
|
| | character_info = gr.Markdown(
|
| | f"**{default_char.tagline}**\n\n{default_char.description}"
|
| | )
|
| |
|
| |
|
| | with gr.Column(scale=2):
|
| |
|
| | portrait_html = gr.HTML(
|
| | value=create_portrait_html(get_character(DEFAULT_CHARACTER_ID)),
|
| | )
|
| |
|
| |
|
| | audio_input = gr.Audio(
|
| | sources=["microphone"],
|
| | type="numpy",
|
| | label="π€ Click to speak",
|
| | max_length=10,
|
| | )
|
| |
|
| |
|
| | audio_output = gr.Audio(
|
| | label="Character Response",
|
| | type="numpy",
|
| | autoplay=True,
|
| | )
|
| |
|
| |
|
| | timing_display = gr.Textbox(
|
| | label="Processing Time",
|
| | interactive=False,
|
| | )
|
| |
|
| |
|
| | with gr.Column(scale=1):
|
| | gr.Markdown("### Conversation")
|
| |
|
| | chatbot = gr.Chatbot(
|
| | label="Chat History",
|
| | height=400,
|
| | )
|
| |
|
| | clear_btn = gr.Button("ποΈ Clear Conversation", variant="secondary")
|
| |
|
| |
|
| | character_dropdown.change(
|
| | fn=update_character_info,
|
| | inputs=[character_dropdown],
|
| | outputs=[character_info, portrait_html, chatbot, conversation_state],
|
| | )
|
| |
|
| |
|
| | audio_input.stop_recording(
|
| | fn=on_audio_record,
|
| | inputs=[audio_input, character_dropdown, conversation_state],
|
| | outputs=[audio_output, timing_display, chatbot, conversation_state],
|
| | )
|
| |
|
| |
|
| | clear_btn.click(
|
| | fn=clear_conversation,
|
| | outputs=[chatbot, conversation_state],
|
| | )
|
| |
|
| | if __name__ == "__main__":
|
| | demo.launch(show_api=False)
|
| |
|