Spaces:
No application file
No application file
| import streamlit as st | |
| import soundfile as sf | |
| from dia.model import Dia | |
| import os | |
| import uuid | |
| import torch | |
| # Set page config | |
| st.set_page_config( | |
| page_title="Dia Text-to-Speech Converter", | |
| page_icon="๐๏ธ", | |
| layout="centered" | |
| ) | |
| # Create directory for audio files | |
| os.makedirs("static/audio", exist_ok=True) | |
| # Initialize session state for model | |
| if 'model' not in st.session_state: | |
| try: | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| st.sidebar.info(f"Using device: {device}") | |
| with st.spinner("Loading Dia model..."): | |
| st.session_state.model = Dia.from_pretrained( | |
| "nari-labs/Dia-1.6B", | |
| compute_dtype="float16", | |
| device=device, | |
| ) | |
| if device == "cpu": | |
| st.session_state.model = st.session_state.model.eval() | |
| torch.set_num_threads(4) | |
| st.sidebar.success("Model loaded successfully with optimizations") | |
| except Exception as e: | |
| st.error(f"Error loading Dia model: {str(e)}") | |
| st.stop() | |
| # Function to generate audio | |
| def generate_audio(text): | |
| try: | |
| if not text: | |
| st.error("Text is required") | |
| return None | |
| output = st.session_state.model.generate(text) | |
| filename = f"audio_{uuid.uuid4()}.wav" | |
| filepath = f"static/audio/{filename}" | |
| os.makedirs(os.path.dirname(filepath), exist_ok=True) | |
| sf.write(filepath, output, 44100) | |
| return filepath | |
| except Exception as e: | |
| st.error(f"Error generating audio: {str(e)}") | |
| return None | |
| # UI | |
| st.title("๐๏ธ Dia - Text to Dialogue Demo") | |
| st.markdown("Enter a multi-speaker script below using `[S1]`, `[S2]`, etc.") | |
| # Text input | |
| text_input = st.text_area( | |
| "Script", | |
| value="[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face.", | |
| height=150 | |
| ) | |
| # Action selection | |
| action = st.selectbox( | |
| "Choose Action", | |
| ["Convert to Audio", "Summarize (Coming Soon)"], | |
| index=0 | |
| ) | |
| # Generate button | |
| if st.button("Generate Audio", type="primary"): | |
| if action == "Convert to Audio": | |
| with st.spinner("Generating audio..."): | |
| audio_file = generate_audio(text_input) | |
| if audio_file: | |
| st.success("Audio generated successfully!") | |
| st.audio(audio_file) | |
| # Download button | |
| with open(audio_file, "rb") as file: | |
| btn = st.download_button( | |
| label="Download Audio", | |
| data=file, | |
| file_name="generated_dialogue.wav", | |
| mime="audio/wav" | |
| ) | |
| else: | |
| st.error("Summarization not implemented yet") | |
| # Display info in sidebar | |
| st.sidebar.markdown("---") | |
| st.sidebar.markdown("Powered by Dia-1.6B AI Text-to-Dialogue Model") |