|
|
import streamlit as st |
|
|
import asyncio |
|
|
import tempfile |
|
|
import parameters |
|
|
import utils |
|
|
import classes |
|
|
import json |
|
|
import random |
|
|
from S3_bucket import AWS |
|
|
|
|
|
import warnings |
|
|
warnings.filterwarnings("ignore", category=RuntimeWarning) |
|
|
|
|
|
aws = AWS() |
|
|
|
|
|
if "set_random_next_run" in st.session_state and st.session_state.set_random_next_run: |
|
|
st.session_state.input_text = st.session_state.pending_random_text |
|
|
st.session_state.set_random_next_run = False |
|
|
if "has_audio" not in st.session_state: |
|
|
st.session_state.has_audio = False |
|
|
if "last_msg" not in st.session_state: |
|
|
st.session_state.last_msg = None |
|
|
if "last_audio" not in st.session_state: |
|
|
st.session_state.last_audio = None |
|
|
if "last_sr" not in st.session_state: |
|
|
st.session_state.last_sr = None |
|
|
if "show_feedback" not in st.session_state: |
|
|
st.session_state.show_feedback = False |
|
|
if "last_session_id" not in st.session_state: |
|
|
st.session_state.last_session_id = None |
|
|
if "pronunc_dict" not in st.session_state: |
|
|
with aws.fs.open(parameters.GLOBAL_PRONUNCIATION_DICT_PATH, "r") as f: |
|
|
global_pronunc_dict = json.loads(f.read()) |
|
|
st.session_state.pronunc_dict = global_pronunc_dict |
|
|
if "voice_cache" not in st.session_state: |
|
|
st.session_state.voice_cache = {} |
|
|
|
|
|
if "page" not in st.session_state: |
|
|
st.session_state.page = "Home" |
|
|
|
|
|
|
|
|
col_h, col_u, col_a, _ = st.columns([0.2, 0.2, 0.2, 0.4]) |
|
|
with col_h: |
|
|
if st.session_state.page == "Home": |
|
|
if st.button("🏠 Home", key="nav_home", type='primary', use_container_width=True): |
|
|
st.session_state.page = "Home" |
|
|
else: |
|
|
if st.button("Home", key="nav_home", use_container_width=True): |
|
|
st.session_state.page = "Home" |
|
|
|
|
|
with col_u: |
|
|
if st.session_state.page == "Use": |
|
|
if st.button("Use", key="nav_use", type='primary', use_container_width=True): |
|
|
st.session_state.page = "Use" |
|
|
else: |
|
|
if st.button("How to use app", key="nav_use", use_container_width=True): |
|
|
st.session_state.page = "Use" |
|
|
|
|
|
with col_a: |
|
|
if st.session_state.page == "About": |
|
|
if st.button("ℹ️ About", key="nav_about", type='primary', use_container_width=True): |
|
|
st.session_state.page = "About" |
|
|
else: |
|
|
if st.button("About", key="nav_about", use_container_width=True): |
|
|
st.session_state.page = "About" |
|
|
|
|
|
if st.session_state.page == "Home": |
|
|
|
|
|
st.set_page_config(page_title="Ori TTS & Voice Cloning", layout="wide") |
|
|
st.title("🎙️ Ori TTS & Voice Cloning System") |
|
|
st.markdown("Choose a default speaker or upload reference audio (min 5 sec), select a language, and enter text to generate speech") |
|
|
|
|
|
with st.sidebar: |
|
|
st.title("Home") |
|
|
st.markdown("---") |
|
|
st.header("Models......") |
|
|
model = st.radio("Select Model", ["V2", "V1"]) |
|
|
if model == "V1": |
|
|
st.header("Languages.....") |
|
|
language = st.selectbox("Select Language", list(utils.V1_LANGUAGES.keys())) |
|
|
st.header("Voice Settings.....") |
|
|
voice_mode = st.radio("Voice Selection Mode", ["Default Speaker", "Upload Audio"]) |
|
|
if voice_mode == "Default Speaker": |
|
|
default_speaker = st.selectbox("Select Default Speaker", list(utils.V1_SPEAKERS[utils.V1_LANGUAGES[language]])) |
|
|
reference_audio = None |
|
|
else: |
|
|
st.info("Give a reference audio (min 5 seconds)") |
|
|
audio_source = st.radio( |
|
|
"Reference audio source", |
|
|
["Upload file", "Record audio"], |
|
|
horizontal=True, |
|
|
key="v1_audio_source", |
|
|
) |
|
|
|
|
|
default_speaker = None |
|
|
|
|
|
if audio_source == "Upload file": |
|
|
reference_audio = st.file_uploader( |
|
|
"Upload Reference Audio", |
|
|
type=["wav", "mp3", "flac"], |
|
|
key="v1_file_uploader", |
|
|
) |
|
|
else: |
|
|
reference_audio = st.audio_input( |
|
|
"Record Reference Audio", |
|
|
key="v1_audio_input", |
|
|
) |
|
|
else: |
|
|
st.header("Languages.....") |
|
|
language = st.selectbox("Select Language", list(utils.V2_LANGUAGES.keys())) |
|
|
st.header("Voice Settings.....") |
|
|
voice_mode = st.radio("Voice Selection Mode", ["Default Speaker", "Upload Audio"]) |
|
|
if voice_mode == "Default Speaker": |
|
|
default_speaker = st.selectbox("Select Default Speaker", list(utils.V2_SPEAKERS[utils.V2_LANGUAGES[language]])) |
|
|
reference_audio = None |
|
|
else: |
|
|
st.info("Give a reference audio (min 5 seconds)") |
|
|
audio_source = st.radio( |
|
|
"Reference audio source", |
|
|
["Upload file", "Record audio"], |
|
|
horizontal=True, |
|
|
key="v2_audio_source", |
|
|
) |
|
|
|
|
|
default_speaker = None |
|
|
|
|
|
if audio_source == "Upload file": |
|
|
reference_audio = st.file_uploader( |
|
|
"Upload Reference Audio", |
|
|
type=["wav", "mp3", "flac"], |
|
|
key="v2_file_uploader", |
|
|
) |
|
|
else: |
|
|
reference_audio = st.audio_input( |
|
|
"Record Reference Audio", |
|
|
key="v2_audio_input", |
|
|
) |
|
|
|
|
|
with st.expander("Advanced Settings"): |
|
|
speech_rate = st.slider("Speech Rate", 0.25, 2.0, 1.0, 0.25) |
|
|
speed = st.slider("Speed", 0.5, 2.0, 1.0, 0.1) |
|
|
expressive = st.slider("Expressive", 0.0, 1.0, 0.1, 0.05) |
|
|
stability = st.slider("Stability", 0, 10, 1, 1) |
|
|
clarity = st.slider("Clarity", 0.0, 1.0, 0.1, 0.1) |
|
|
volume_level = st.slider("Volume Level", 0.5, 3.0, 1.0, 0.1) |
|
|
stitch_request = st.checkbox("Stitch Request ()", value=False) |
|
|
|
|
|
|
|
|
|
|
|
col1, col2 = st.columns([2, 1]) |
|
|
|
|
|
with col1: |
|
|
if 'input_text' not in st.session_state: |
|
|
st.session_state['input_text'] = '' |
|
|
if 'set_random_next_run' not in st.session_state: |
|
|
st.session_state.set_random_next_run = False |
|
|
if 'pending_random_text' not in st.session_state: |
|
|
st.session_state.pending_random_text = '' |
|
|
|
|
|
input_text = st.text_area( |
|
|
"Input Text", |
|
|
key='input_text', |
|
|
placeholder="Enter the text you want to synthesize...", |
|
|
height=130 |
|
|
) |
|
|
|
|
|
btn_col1, btn_col2 = st.columns(2) |
|
|
with btn_col1: |
|
|
random_btn = st.button("🎲 Random Text", use_container_width=True) |
|
|
with btn_col2: |
|
|
generate_btn = st.button("🎵 Generate Speech", type="primary", use_container_width=True) |
|
|
|
|
|
with col2: |
|
|
st.markdown("### Add Pronunciation Pair") |
|
|
|
|
|
key_col1, value_col2 = st.columns(2) |
|
|
with key_col1: |
|
|
pr_key = st.text_input( |
|
|
"Pronunciation key 👇", |
|
|
label_visibility="visible", |
|
|
disabled=False, |
|
|
placeholder="Enter word", |
|
|
key="pr_key", |
|
|
) |
|
|
with value_col2: |
|
|
pr_value = st.text_input( |
|
|
"Pronunciation value 👇", |
|
|
label_visibility="visible", |
|
|
disabled=False, |
|
|
placeholder="Enter correct pronunciation", |
|
|
key="pr_value", |
|
|
) |
|
|
add_pair = st.button("Add Pronunciation Pair", type='primary', use_container_width=True) |
|
|
|
|
|
if add_pair: |
|
|
if pr_key.strip() and pr_value.strip(): |
|
|
st.session_state.pronunc_dict[pr_key.strip()] = pr_value.strip() |
|
|
st.success(f"Added pronunciation pair: {pr_key.strip()} → {pr_value.strip()}") |
|
|
|
|
|
else: |
|
|
st.warning("Both key and value are required to add a pronunciation pair.") |
|
|
|
|
|
st.markdown(""" |
|
|
If the model mispronounces some word incorrectly, you can correct it by adding the term as the Pronunciation Key and its phonetic spelling as the Pronunciation Value. For example, if AI/Cholestrol isn't pronounced correctly, respell it as ए आई/colestrol: enter AI/Cholestrol in the Pronunciation Key field and ए आई/colestrol in the Pronunciation Value field, then click **Add Pronunciation Pair**. |
|
|
""") |
|
|
|
|
|
if random_btn: |
|
|
if language in utils.language_sentences.keys(): |
|
|
random_text = random.choice(utils.language_sentences[language]) |
|
|
st.session_state.pending_random_text = random_text |
|
|
st.session_state.set_random_next_run = True |
|
|
st.rerun() |
|
|
else: |
|
|
st.warning(f"No sample sentences available for {language}") |
|
|
|
|
|
|
|
|
if generate_btn: |
|
|
|
|
|
session_id = utils.generate_session_id() |
|
|
|
|
|
|
|
|
pronunciation_dict_str = st.session_state.pronunc_dict |
|
|
|
|
|
input_text = st.session_state.input_text |
|
|
print(f"Clicked Generation btn.....\n input:- {input_text}") |
|
|
if not input_text.strip(): |
|
|
st.warning("Please enter text to synthesize") |
|
|
elif len(input_text) > 1000: |
|
|
st.warning(f"Text length must be less than 1000 characters. Current length: {len(input_text)}") |
|
|
else: |
|
|
try: |
|
|
token = parameters.TTS_SECRET_KEY |
|
|
|
|
|
if model == "V1": |
|
|
language_code = utils.V1_LANGUAGES[language] |
|
|
else: |
|
|
language_code = utils.V2_LANGUAGES[language] |
|
|
|
|
|
user_id = parameters.user_id |
|
|
voice_path = None |
|
|
|
|
|
if voice_mode == "Default Speaker" and model == "V1": |
|
|
if language_code in list(utils.V1_SPEAKERS.keys()): |
|
|
voice_id = default_speaker |
|
|
status_msg = f"Using default speaker: {default_speaker} for {language}" |
|
|
else: |
|
|
st.warning(f"Language {language} not available for {default_speaker}") |
|
|
st.stop() |
|
|
elif voice_mode == "Default Speaker" and model == "V2": |
|
|
if language_code in list(utils.V2_SPEAKERS.keys()): |
|
|
voice_id = default_speaker |
|
|
status_msg = f"Using default speaker: {default_speaker} for {language}" |
|
|
else: |
|
|
st.warning(f"Language {language} not available for {default_speaker}") |
|
|
st.stop() |
|
|
|
|
|
else: |
|
|
if not reference_audio: |
|
|
st.warning("Please upload a reference audio file") |
|
|
st.stop() |
|
|
audio_hash = utils.get_audio_hash(reference_audio) |
|
|
cache_key = f"{audio_hash}_{language_code}_{model}" |
|
|
|
|
|
if cache_key in st.session_state.voice_cache: |
|
|
voice_id = st.session_state.voice_cache[cache_key] |
|
|
voice_path = cache_key |
|
|
status_msg = f"✓ Using cached voice ID for language: {language}" |
|
|
else: |
|
|
with st.spinner("Cloning voice..."): |
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: |
|
|
tmp_file.write(reference_audio.read()) |
|
|
tmp_file.flush() |
|
|
if model == "V1": |
|
|
result = utils.v1_clone_voice(tmp_file.name, user_id, token, language_code ) |
|
|
else: |
|
|
result = utils.v2_clone_voice(tmp_file.name, user_id, token) |
|
|
voice_id = result['voice_id'] |
|
|
reference_audio.seek(0) |
|
|
classes.upload_voice_clone_audio(reference_audio, voice_id) |
|
|
voice_path = cache_key |
|
|
|
|
|
if len(st.session_state.voice_cache) > 5: |
|
|
st.session_state.voice_cache.clear() |
|
|
|
|
|
st.session_state.voice_cache[cache_key] = voice_id |
|
|
print(f"This is the voice id get from {model}:--{voice_id}") |
|
|
status_msg = f"✓ Cloned voice successfully for language: {language}" |
|
|
|
|
|
with st.spinner("Generating speech..."): |
|
|
loop = asyncio.new_event_loop() |
|
|
asyncio.set_event_loop(loop) |
|
|
|
|
|
if model=="V1": |
|
|
sr, audio = loop.run_until_complete( |
|
|
utils.v1_generate_speech_async( |
|
|
session_id, voice_mode, voice_id, model, input_text, language_code, user_id, |
|
|
pronunciation_dict_str, speed, expressive, stability, clarity, |
|
|
volume_level, speech_rate, stitch_request |
|
|
) |
|
|
) |
|
|
else: |
|
|
sr, audio = loop.run_until_complete( |
|
|
utils.v2_generate_speech_async( |
|
|
session_id, voice_mode, voice_id, model, input_text, language_code, user_id, |
|
|
pronunciation_dict_str, speed, expressive, stability, clarity, |
|
|
volume_level, speech_rate, stitch_request |
|
|
) |
|
|
) |
|
|
loop.close() |
|
|
|
|
|
st.session_state.last_msg = status_msg |
|
|
st.session_state.last_audio = audio |
|
|
st.session_state.last_sr = sr |
|
|
st.session_state.last_session_id = session_id |
|
|
st.session_state.has_audio = True |
|
|
st.session_state.show_feedback = True |
|
|
print("Generation completed......") |
|
|
except Exception as e: |
|
|
|
|
|
st.session_state.show_feedback = False |
|
|
st.markdown("---") |
|
|
st.markdown("### 🎧 Output & Feedback") |
|
|
|
|
|
|
|
|
a_col, f_col = st.columns([1, 1]) |
|
|
|
|
|
with a_col: |
|
|
if st.session_state.has_audio and st.session_state.last_audio is not None: |
|
|
st.success(st.session_state.last_msg) |
|
|
st.audio(st.session_state.last_audio, sample_rate=st.session_state.last_sr) |
|
|
else: |
|
|
st.markdown( |
|
|
"<div style='opacity:0.4; border:1px dashed #888; padding:0.75rem; text-align:center;'>" |
|
|
"Audio preview will appear here after you generate speech." |
|
|
"</div>", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
with f_col: |
|
|
|
|
|
disabled = not (st.session_state.show_feedback and st.session_state.has_audio) |
|
|
|
|
|
st.markdown( |
|
|
"<div style='opacity:{};'>".format("1.0" if not disabled else "0.4"), |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
rating_index = st.radio( |
|
|
"Rate this audio:", |
|
|
options=[0, 1, 2, 3, 4], |
|
|
format_func=lambda i: "⭐" * (i + 1), |
|
|
horizontal=True, |
|
|
index=None, |
|
|
key="rating_index", |
|
|
disabled=disabled, |
|
|
) |
|
|
|
|
|
feedback_msg = st.text_area( |
|
|
"✍️ Feedback (optional)", |
|
|
placeholder="Enter your feedback here...", |
|
|
height=80, |
|
|
key="feedback_msg", |
|
|
disabled=disabled, |
|
|
) |
|
|
|
|
|
submit_clicked = st.button( |
|
|
"📤 Submit Feedback", |
|
|
type="primary", |
|
|
disabled=disabled, |
|
|
key="submit_feedback_btn", |
|
|
use_container_width=True |
|
|
) |
|
|
|
|
|
st.markdown("</div>", unsafe_allow_html=True) |
|
|
|
|
|
if submit_clicked: |
|
|
if rating_index is None: |
|
|
st.warning("Please select a rating before submitting.") |
|
|
else: |
|
|
utils.update_rating( |
|
|
session_id=st.session_state.last_session_id, |
|
|
rating_index=rating_index, |
|
|
feedback_msg=feedback_msg or "", |
|
|
) |
|
|
|
|
|
st.session_state.show_feedback = False |
|
|
|
|
|
st.markdown("---") |
|
|
st.caption("Ori TTS & Voice Cloning System | Powered by Oriserve") |
|
|
|
|
|
elif st.session_state.page == "Use": |
|
|
with st.sidebar: |
|
|
st.title("Use this app......") |
|
|
st.markdown("---") |
|
|
|
|
|
st.markdown("### How to Use This App") |
|
|
|
|
|
st.markdown(""" |
|
|
**Step 1: Select Model** |
|
|
- Select **V1** or **V2** model from the sidebar |
|
|
|
|
|
**Step 2: 🌐 Select Language** |
|
|
- Select your desired language from the dropdown |
|
|
|
|
|
**Step 3: 🎤 Select Voice Mode** |
|
|
- **Default Speaker**: Choose from pre-trained voices |
|
|
- **Upload Audio**: Upload or Record your own reference audio (min 5 seconds) for voice cloning |
|
|
|
|
|
**Step 4: ✍️ Enter Text** |
|
|
- Type or paste the text (in selected language) you want to convert to speech |
|
|
- Or you can select any random text by clicking on 🎲 Random Text button |
|
|
|
|
|
**Step 5: ⚙️ Customize Voice Parameters (Optional)** |
|
|
- Expand "Advanced Settings" in sidebar to fine-tune: |
|
|
- Speech rate |
|
|
- Speed |
|
|
- Expressive |
|
|
- Other voice parameters |
|
|
|
|
|
**Step 6: 🎵 Generate Audio** |
|
|
|
|
|
- Click the **"🎵 Generate Speech"** button |
|
|
- Wait for the audio to be generated |
|
|
- Play the audio directly in the browser |
|
|
|
|
|
**Step7: Add Pronunciation Pair** |
|
|
|
|
|
- <div>If the model mispronounces some word incorrectly,<br> |
|
|
you can correct it by adding the term as the Pronunciation Key and <br> |
|
|
its phonetical spelling as the Pronunciation Value. <br> |
|
|
For example, if <i><b style="color:red">AI/Cholestrol</b></i> isn't pronounced correctly, respell it as <i><b style = "color:green">ए आई/colestrol</b></i>: <br> |
|
|
enter <i><b style="color:red">AI/Cholestrol</b></i> in the Pronunciation Key field and <i><b style = "color:green">ए आई/colestrol</b></i> in the Pronunciation Value field, then click Add Pronunciation Pair.</div> |
|
|
|
|
|
**⭐ Provide Feedback** |
|
|
- Rate the generated audio quality |
|
|
- Give us your feedback |
|
|
- Your feedback helps improve our system |
|
|
""", |
|
|
unsafe_allow_html=True) |
|
|
st.markdown("---") |
|
|
st.caption("Ori TTS & Voice Cloning System | Powered by Oriserve") |
|
|
|
|
|
else: |
|
|
with st.sidebar: |
|
|
st.title("About Us......") |
|
|
st.markdown("---") |
|
|
|
|
|
st.markdown( |
|
|
""" |
|
|
<style> |
|
|
.features-container { |
|
|
display: grid; |
|
|
grid-template-columns: repeat(2, 1fr); |
|
|
gap: 20px; |
|
|
} |
|
|
.feature-block { |
|
|
padding: 15px; |
|
|
border-radius: 8px; |
|
|
transition: background-color 0.3s ease; |
|
|
min-height: 200px; |
|
|
display: flex; |
|
|
flex-direction: column; |
|
|
justify-content: flex-start; |
|
|
border: 1px solid #e0e0e0; |
|
|
background-color: #111827; |
|
|
color: #e5e7eb; |
|
|
} |
|
|
.feature-block:hover { |
|
|
background-color: #EA580C; |
|
|
cursor: pointer; |
|
|
} |
|
|
.feature-title { |
|
|
font-size: 1.4em; |
|
|
font-weight: bold; |
|
|
margin-bottom: 10px; |
|
|
} |
|
|
.feature-list { |
|
|
font-size: 1.05em; |
|
|
margin-left: 20px; |
|
|
list-style-type: none; |
|
|
padding-left: 0; |
|
|
} |
|
|
.feature-list li { |
|
|
margin: 8px 0; |
|
|
} |
|
|
.section-header { |
|
|
font-size: 1.8em; |
|
|
font-weight: bold; |
|
|
margin: 25px 0 15px 0; |
|
|
color: #38bdf8; |
|
|
} |
|
|
.intro-text { |
|
|
font-size: 1.1em; |
|
|
line-height: 1.4; |
|
|
margin-bottom: 20px; |
|
|
} |
|
|
.footer { |
|
|
margin-top: 20px; |
|
|
padding: 15px; |
|
|
border-radius: 8px; |
|
|
transition: background-color 0.3s ease; |
|
|
min-height: 150px; |
|
|
display: flex; |
|
|
flex-direction: column; |
|
|
justify-content: flex-start; |
|
|
border: 1px solid #e0e0e0; |
|
|
background-color: #020617; |
|
|
color: #e5e7eb; |
|
|
} |
|
|
.footer:hover{ |
|
|
background-color: #3f3f46; |
|
|
} |
|
|
.footer .feature-list a.hf-link { |
|
|
color: #FFFF; |
|
|
text-decoration: none; |
|
|
transition: all 0.3s ease; |
|
|
display: inline-block; |
|
|
} |
|
|
.footer .feature-list a.hf-link:hover { |
|
|
color: #EA580C; |
|
|
font-weight: 600; |
|
|
transform: translateX(10px); |
|
|
} |
|
|
.footer .feature-list span { |
|
|
color: #FFFF; |
|
|
text-decoration: none; |
|
|
transition: all 0.3s ease; |
|
|
display: inline-block; |
|
|
} |
|
|
.footer .feature-list span:hover { |
|
|
color: #EA580C; |
|
|
font-weight: 600; |
|
|
text-decoration: underline; |
|
|
} |
|
|
@media (max-width: 768px) { |
|
|
.features-container { |
|
|
grid-template-columns: 1fr; |
|
|
} |
|
|
} |
|
|
</style> |
|
|
<div style="text-align: center; font-size: 2.2em; font-weight: bold; margin-bottom: 20px;"> |
|
|
🚀 Welcome to ORI Text-to-Speech |
|
|
</div> |
|
|
<div class="section-header">🌟 About Our Technology</div> |
|
|
<div class="intro-text"> |
|
|
Greetings from Oriserve! We're excited to showcase our refined Text-to-Speech capabilities—powered by generative voice synthesis to deliver |
|
|
<strong>natural-sounding</strong> and <strong>professionally tuned</strong> speech output. |
|
|
</div> |
|
|
<div class="section-header">✨ Key Features</div> |
|
|
<div class="features-container"> |
|
|
<div class="feature-block"> |
|
|
<div class="feature-title">🎯 Core Capabilities</div> |
|
|
<ul class="feature-list"> |
|
|
<li><strong>Robust voice models suited for production use</strong></li> |
|
|
<li><strong>Our model supports 14 Indian Languages (Hindi, Bengali, Kannada, Marathi, Bhojpuri, English, Tamil, Telugu etc..) with more languages on the way</strong></li> |
|
|
<li><strong>Diverse voice styles for varied use cases</strong></li> |
|
|
<li><strong>Responsive audio generation with practical latency</strong></li> |
|
|
</ul> |
|
|
</div> |
|
|
<div class="feature-block"> |
|
|
<div class="feature-title">🛠️ Advanced Controls</div> |
|
|
<ul class="feature-list"> |
|
|
<li><strong>Customizable voice parameters</strong></li> |
|
|
<li><strong>Expressiveness adjustment options</strong></li> |
|
|
<li><strong>Balance tuning for clarity and stability</strong></li> |
|
|
</ul> |
|
|
</div> |
|
|
<div class="feature-block"> |
|
|
<div class="feature-title">💫 Special Features</div> |
|
|
<ul class="feature-list"> |
|
|
<li><strong>Basic context understanding during synthesis</strong></li> |
|
|
<li><strong>Text formatting optimized for speech</strong></li> |
|
|
<li><strong>Improved handling of common pronunciation cases</strong></li> |
|
|
</ul> |
|
|
</div> |
|
|
<div class="feature-block"> |
|
|
<div class="feature-title">⚡ Processing Capabilities</div> |
|
|
<ul class="feature-list"> |
|
|
<li><strong>Near real-time synthesis performance</strong></li> |
|
|
<li><strong>Optimized latency for interactive use</strong></li> |
|
|
<li><strong>Audio streaming with first-byte latency as low as ~100 ms</strong></li> |
|
|
</ul> |
|
|
</div> |
|
|
<div class="feature-block"> |
|
|
<div class="feature-title">🔊 Audio Quality</div> |
|
|
<ul class="feature-list"> |
|
|
<li><strong>Clear and natural-sounding speech</strong></li> |
|
|
<li><strong>Audio fidelity aligned with general production standards</strong></li> |
|
|
<li><strong>Consistent synthesis across sessions</strong></li> |
|
|
</ul> |
|
|
</div> |
|
|
<div class="feature-block"> |
|
|
<div class="feature-title">📈 Future Development</div> |
|
|
<ul class="feature-list"> |
|
|
<li><strong>Continuous quality and performance updates</strong></li> |
|
|
<li><strong>More expressive and natural voice styles in progress</strong></li> |
|
|
<li><strong>More Languages will be added soon</strong></li> |
|
|
</ul> |
|
|
</div> |
|
|
<div class="feature-block"> |
|
|
<div class="feature-title">🚨 Disclaimer</div> |
|
|
<ul class="feature-list"> |
|
|
<li><strong>The voices and utterances produced by this application are generated by an AI model.</strong></li> |
|
|
<li><strong>By using the Voice Clone feature, you confirm you have the necessary rights to any uploaded audio.</strong></li> |
|
|
<li><strong>We make no warranty—express or implied—on the accuracy, appropriateness, or quality of the generated speech.</strong></li> |
|
|
</ul> |
|
|
</div> |
|
|
<div class="feature-block"> |
|
|
<div class="feature-title">How to Reach Us</div> |
|
|
<ul class="feature-list"> |
|
|
<li><strong>Email : <span>ai-team@oriserve.com</span></strong></li> |
|
|
<li><strong>Huggingface : <a href="https://huggingface.co/Oriserve" class="hf-link">Oriserve Hugging Face</a></strong></li> |
|
|
<li><strong>GitHub : <a href="https://github.com/OriserveAI" class="hf-link">OriserveAI GitHub</a></strong></li> |
|
|
<li><strong>Website : <a href="https://oriserve.com/" class="hf-link">Oriserve website</a></strong></li> |
|
|
</ul> |
|
|
</div> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
st.markdown("---") |
|
|
st.caption("Ori TTS & Voice Cloning System | Powered by Oriserve") |
|
|
pass |
|
|
|
|
|
|