File size: 3,118 Bytes
8139351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import streamlit as st
import soundfile as sf
from dia.model import Dia
import os
import uuid
import torch

# Set page config
st.set_page_config(
    page_title="Dia Text-to-Speech Converter",
    page_icon="🎙️",
    layout="centered"
)

# Create directory for audio files
os.makedirs("static/audio", exist_ok=True)

# Initialize session state for model
if 'model' not in st.session_state:
    try:
        device = "cuda" if torch.cuda.is_available() else "cpu"
        st.sidebar.info(f"Using device: {device}")
        
        with st.spinner("Loading Dia model..."):
            st.session_state.model = Dia.from_pretrained(
                "nari-labs/Dia-1.6B",
                compute_dtype="float16", 
                device=device,
            )
            
            if device == "cpu":
                st.session_state.model = st.session_state.model.eval()
                torch.set_num_threads(4)
                
            st.sidebar.success("Model loaded successfully with optimizations")
    except Exception as e:
        st.error(f"Error loading Dia model: {str(e)}")
        st.stop()

# Function to generate audio
def generate_audio(text):
    try:
        if not text:
            st.error("Text is required")
            return None
            
        output = st.session_state.model.generate(text)
        filename = f"audio_{uuid.uuid4()}.wav"
        filepath = f"static/audio/{filename}"
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
        
        sf.write(filepath, output, 44100)
        return filepath
    except Exception as e:
        st.error(f"Error generating audio: {str(e)}")
        return None

# UI
st.title("🎙️ Dia - Text to Dialogue Demo")
st.markdown("Enter a multi-speaker script below using `[S1]`, `[S2]`, etc.")

# Text input
text_input = st.text_area(
    "Script",
    value="[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face.",
    height=150
)

# Action selection
action = st.selectbox(
    "Choose Action",
    ["Convert to Audio", "Summarize (Coming Soon)"],
    index=0
)

# Generate button
if st.button("Generate Audio", type="primary"):
    if action == "Convert to Audio":
        with st.spinner("Generating audio..."):
            audio_file = generate_audio(text_input)
            
        if audio_file:
            st.success("Audio generated successfully!")
            st.audio(audio_file)
            
            # Download button
            with open(audio_file, "rb") as file:
                btn = st.download_button(
                    label="Download Audio",
                    data=file,
                    file_name="generated_dialogue.wav",
                    mime="audio/wav"
                )
    else:
        st.error("Summarization not implemented yet")

# Display info in sidebar
st.sidebar.markdown("---")
st.sidebar.markdown("Powered by Dia-1.6B AI Text-to-Dialogue Model")