testing / app.py
UDface11jkj's picture
Rename data.py to app.py
2c51944 verified
import streamlit as st
import soundfile as sf
from dia.model import Dia
import os
import uuid
import torch
# Set page config
st.set_page_config(
page_title="Dia Text-to-Speech Converter",
page_icon="๐ŸŽ™๏ธ",
layout="centered"
)
# Create directory for audio files
os.makedirs("static/audio", exist_ok=True)
# Initialize session state for model
if 'model' not in st.session_state:
try:
device = "cuda" if torch.cuda.is_available() else "cpu"
st.sidebar.info(f"Using device: {device}")
with st.spinner("Loading Dia model..."):
st.session_state.model = Dia.from_pretrained(
"nari-labs/Dia-1.6B",
compute_dtype="float16",
device=device,
)
if device == "cpu":
st.session_state.model = st.session_state.model.eval()
torch.set_num_threads(4)
st.sidebar.success("Model loaded successfully with optimizations")
except Exception as e:
st.error(f"Error loading Dia model: {str(e)}")
st.stop()
# Function to generate audio
def generate_audio(text):
try:
if not text:
st.error("Text is required")
return None
output = st.session_state.model.generate(text)
filename = f"audio_{uuid.uuid4()}.wav"
filepath = f"static/audio/{filename}"
os.makedirs(os.path.dirname(filepath), exist_ok=True)
sf.write(filepath, output, 44100)
return filepath
except Exception as e:
st.error(f"Error generating audio: {str(e)}")
return None
# UI
st.title("๐ŸŽ™๏ธ Dia - Text to Dialogue Demo")
st.markdown("Enter a multi-speaker script below using `[S1]`, `[S2]`, etc.")
# Text input
text_input = st.text_area(
"Script",
value="[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face.",
height=150
)
# Action selection
action = st.selectbox(
"Choose Action",
["Convert to Audio", "Summarize (Coming Soon)"],
index=0
)
# Generate button
if st.button("Generate Audio", type="primary"):
if action == "Convert to Audio":
with st.spinner("Generating audio..."):
audio_file = generate_audio(text_input)
if audio_file:
st.success("Audio generated successfully!")
st.audio(audio_file)
# Download button
with open(audio_file, "rb") as file:
btn = st.download_button(
label="Download Audio",
data=file,
file_name="generated_dialogue.wav",
mime="audio/wav"
)
else:
st.error("Summarization not implemented yet")
# Display info in sidebar
st.sidebar.markdown("---")
st.sidebar.markdown("Powered by Dia-1.6B AI Text-to-Dialogue Model")