ajajali09 commited on
Commit
ebe23c1
·
1 Parent(s): 0faef2a

release 14/05

Browse files
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ __pycache__/S3_bucket.cpython-310.pyc
4
+ test.py
__pycache__/S3_bucket.cpython-310.pyc CHANGED
Binary files a/__pycache__/S3_bucket.cpython-310.pyc and b/__pycache__/S3_bucket.cpython-310.pyc differ
 
__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/__pycache__/utils.cpython-310.pyc and b/__pycache__/utils.cpython-310.pyc differ
 
app.py CHANGED
@@ -431,9 +431,9 @@ def tts_inference(
431
  refine_generation: bool = False,
432
  stream: bool = False,
433
  ) -> Union[Generator[Tuple[int, np.ndarray], None, None], Tuple[int, np.ndarray]]:
434
-
435
- user_id = parameters.user_id
436
 
 
 
437
  if agent is None and recording_data is not None:
438
  audio_manager.update_current_recording(recording_data)
439
  clone_voice_name = process_voice_clone(recording_data, user_id)
@@ -441,132 +441,70 @@ def tts_inference(
441
  voice_name = str(clone_voice_name)
442
  print(f"The voice name, get from voice clone API:::--{voice_name}")
443
  else:
444
- gr.Error("Sorry, we are facing some issues with cloning this voice.\nPlease reload the app and try again.")
445
- print("Did not get any voice name from voice clone api:------")
 
446
  else:
447
  voice_name = [agents[agent]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
 
449
- print("\nParameters Recieved:\n")
450
- print("speechReqId", session_id)
451
- print("text", [text])
452
- print("language", [language_codes[language]])
453
- print("voice_name", voice_name)
454
- print("encoding", "default")
455
- print("expressive", expressiveness)
456
- print("stability", stability)
457
- print("clarity", clarity)
458
- print("speech_rate", speech_rate)
459
- print("loudness", loudness)
460
- print("refine_grneration", refine_generation)
461
- print("\n\n")
462
-
463
- try:
464
- if not text or text.strip() == "":
465
- raise gr.Error("Text input cannot be empty")
466
-
467
- if len(text) > 1000:
468
- raise gr.Error(
469
- f"Text length must be less than 1000 characters. Current length: {len(text)}"
470
- )
471
-
472
- payload = json.dumps(
473
- {
474
- "speechReqId": session_id,
475
- "user_id": user_id,
476
- "text": [text],
477
- "language": [language_codes[language]],
478
- "voice_name": voice_name,
479
- "encoding": "default",
480
- "expressive": expressiveness,
481
- "stability": stability,
482
- "clarity": clarity,
483
- "speech_rate": speech_rate,
484
- "refine_generation": refine_generation,
485
- "pronunciation_dict":pronunc_dict
486
- }
487
- )
488
 
489
- with requests.post(
490
- url=parameters.TTS_URL,
491
- headers={"Authorization": f"Bearer {parameters.TTS_secret_key}"},
492
- data=payload,
493
- verify=False,
494
- stream=True,
495
- ) as response:
496
- if response.status_code != 200:
497
- print(
498
- f"API request failed with status {response.status_code} and error: {response.reason}"
499
- )
500
- log_initial_submission(
501
- code=response.status_code,
502
- session_id=session_id,
503
- language=language,
504
- input_method=None,
505
- agent_used=voice_name,
506
- voice_path=recording_data,
507
- text_input=text,
508
- expressiveness=expressiveness,
509
- stability=stability,
510
- clarity=clarity,
511
- speech_rate=speech_rate,
512
- loudness=loudness,
513
- refine_generation=refine_generation,
514
- err_code=response.status_code,
515
- err_msg=response.reason,
516
  )
517
- raise gr.Error(f"API Error: {response.status_code} - {response.reason}")
518
- sample_rate = 24000
519
-
520
- if stream:
521
- # Streaming mode
522
- last_chunk_time = time.time()
523
- for chunk in response.iter_content(chunk_size=32768):
524
- chunk_received_time = time.time()
525
- chunk_delay = chunk_received_time - last_chunk_time
526
- last_chunk_time = chunk_received_time
527
-
528
- if chunk:
529
- start_processing_time = time.time()
530
- audio_chunk = np.frombuffer(chunk, dtype=np.int16)
531
- processing_time = time.time() - start_processing_time
532
-
533
- if len(audio_chunk) > 0:
534
- yield_time_start = time.time()
535
- yield (sample_rate, audio_chunk)
536
- yield_time_complete = time.time()
537
-
538
- print(
539
- f"Streaming chunk of size {len(audio_chunk)} - "
540
- f"Chunk delay: {chunk_delay:.4f}s, "
541
- f"Processing time: {processing_time:.4f}s, "
542
- f"Yielding time: {(yield_time_complete - yield_time_start):.4f}s"
543
- )
544
- else:
545
- # Non-streaming mode
546
- start_time = time.time()
547
- audio_bytes = b""
548
-
549
- for chunk in response.iter_content(chunk_size=32768):
550
- if chunk:
551
- print(f"Streaming chunk of size {len(chunk)}")
552
- audio_bytes += chunk
553
-
554
- if len(audio_bytes) > 0:
555
- complete_audio = np.frombuffer(audio_bytes, dtype=np.int16)
556
- processing_time = time.time() - start_time
557
-
558
- complete_audio = increase_volume(complete_audio, factor=loudness)
559
- yield (sample_rate, complete_audio)
560
 
561
- saved_path = save_generated_audio(complete_audio, session_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
 
 
 
 
 
 
 
 
 
 
 
 
563
  log_initial_submission(
564
  code=response.status_code,
565
  session_id=session_id,
566
  language=language,
567
- input_method="Select Voice" if agent else "Voice clone",
568
  agent_used=voice_name,
569
- voice_path=saved_path,
570
  text_input=text,
571
  expressiveness=expressiveness,
572
  stability=stability,
@@ -574,15 +512,78 @@ def tts_inference(
574
  speech_rate=speech_rate,
575
  loudness=loudness,
576
  refine_generation=refine_generation,
 
 
577
  )
578
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
  else:
580
- raise ValueError("No audio data received from API")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
 
582
- except requests.RequestException as e:
583
- raise gr.Error(f"Network Error: Failed to connect to the API server - {str(e)}")
584
- except Exception as e:
585
- raise gr.Error(f"An unexpected error occurred: {str(e)}")
 
 
 
586
 
587
 
588
  def disable_rating_box():
@@ -599,7 +600,7 @@ def disable_rating_box():
599
 
600
  def tts_tab():
601
  with aws.fs.open(parameters.GLOBAL_PRONUNCIATION_DICT_PATH,'r') as f:
602
- global_pronunc_dict = json.loads(f.read())
603
 
604
  pronunc_dict_state = gr.State(value=global_pronunc_dict)
605
  session_id = generate_session_id()
@@ -631,33 +632,33 @@ def tts_tab():
631
  <div class="info-heading">🎯 Quick Start Guide</div>
632
  """)
633
  gr.Markdown("""
634
-
635
  🌐 **Select Language**
636
  * Choose from our listed languages
637
-
638
  🎤 **Choose Voice**
639
  * Select from the curated collection of high-quality voices
640
  * Each voice is optimized for natural speech patterns
641
  * You can give your own voice by clicking on Voice clone
642
-
643
  ✍️ **Enter Your Text**
644
  * Type or paste your text in the input box
645
  * Or you can give input by clicking Random Sentence
646
-
647
  ⚙️ **Customize Voice Parameters**
648
  * Fine-tune expressiveness for emotional depth
649
  * Adjust stability for consistent output
650
  * Control clarity for precise articulation
651
-
652
  🎵 **Generate Audio**
653
  * Click the generate button to create your audio
654
  * Processing typically takes a few seconds
655
-
656
  ⭐ **Provide Feedback**
657
  * Rate the generated audio quality
658
  * Give us your feedback
659
  * Your feedback helps improve our system
660
-
661
  💾 **Access Your Audio**
662
  * Download the generated audio for offline use
663
  """)
@@ -805,7 +806,7 @@ def tts_tab():
805
  with gr.Row():
806
  pronunc_dict_key_in = gr.Textbox(label="Pronunciation key",placeholder="Enter word")
807
  pronunc_dict_key_out = gr.Textbox(label="Pronunciation Value",placeholder="Enter word with correct pronunciation")
808
-
809
  add_btn = gr.Button("Add pronunciation pair", variant="primary")
810
 
811
  add_btn.click(
@@ -875,6 +876,7 @@ def tts_tab():
875
  <li>Add your pronunciation of any word that doesn't sound well</li>
876
  <li>If you don't like the pronunciation of any word, then add your word with the key and value pair</li>
877
  <li><b style = "color:red">Note:-</b>Pronunciation pairs are <i style="color:red">case sensitive</i></li>
 
878
  </ul>
879
  """)
880
 
@@ -1094,7 +1096,43 @@ def about_tab():
1094
  line-height: 1.4;
1095
  margin-bottom: 20px;
1096
  }
1097
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1098
  /* For mobile responsiveness */
1099
  @media (max-width: 768px) {
1100
  .features-container {
@@ -1103,7 +1141,7 @@ def about_tab():
1103
  }
1104
  </style>
1105
  <div style="text-align: center; font-size: 3em; font-weight: bold; margin-bottom: 20px;"> 🚀 Welcome to ORI Text-to-Speech </div>
1106
-
1107
  <div class="section-header">🌟 About Our Technology</div>
1108
  <div class="intro-text">Greetings from Oriserve! We're excited to showcase our refined Text-to-Speech capabilities—powered by generative voice synthesis to deliver <strong>natural-sounding</strong> and <strong>professionally tuned</strong> speech output.</div>
1109
 
@@ -1114,12 +1152,12 @@ def about_tab():
1114
  <div class="feature-title">🎯 Core Capabilities</div>
1115
  <ul class="feature-list">
1116
  <li><strong>Robust voice models suited for production use</strong></li>
1117
- <li><strong>Optimized for English and Hindi, with multilingual expansion underway</strong></li>
1118
  <li><strong>Diverse voice styles for varied use cases</strong></li>
1119
  <li><strong>Responsive audio generation with practical latency</strong></li>
1120
  </ul>
1121
  </div>
1122
-
1123
  <div class="feature-block">
1124
  <div class="feature-title">🛠️ Advanced Controls</div>
1125
  <ul class="feature-list">
@@ -1128,7 +1166,7 @@ def about_tab():
1128
  <li><strong>Balance tuning for clarity and stability</strong></li>
1129
  </ul>
1130
  </div>
1131
-
1132
  <div class="feature-block">
1133
  <div class="feature-title">💫 Special Features</div>
1134
  <ul class="feature-list">
@@ -1137,7 +1175,7 @@ def about_tab():
1137
  <li><strong>Improved handling of common pronunciation cases</strong></li>
1138
  </ul>
1139
  </div>
1140
-
1141
  <div class="feature-block">
1142
  <div class="feature-title">⚡ Processing Capabilities</div>
1143
  <ul class="feature-list">
@@ -1146,7 +1184,7 @@ def about_tab():
1146
  <li><strong>Audio streaming with first-byte latency as low as ~150 ms</strong></li>
1147
  </ul>
1148
  </div>
1149
-
1150
  <div class="feature-block">
1151
  <div class="feature-title">🔊 Audio Quality</div>
1152
  <ul class="feature-list">
@@ -1155,7 +1193,7 @@ def about_tab():
1155
  <li><strong>Consistent synthesis across sessions</strong></li>
1156
  </ul>
1157
  </div>
1158
-
1159
  <div class="feature-block">
1160
  <div class="feature-title">📈 Future Development</div>
1161
  <ul class="feature-list">
@@ -1164,6 +1202,15 @@ def about_tab():
1164
  <li><strong>Expanded language and dialect support coming soon</strong></li>
1165
  </ul>
1166
  </div>
 
 
 
 
 
 
 
 
 
1167
  </div>
1168
 
1169
  """
@@ -1171,12 +1218,11 @@ def about_tab():
1171
 
1172
  return gr.Markdown("")
1173
 
1174
-
1175
  def initialize_app():
1176
  # await audio_manager.load_agent_voices(agents)
1177
 
1178
  try:
1179
- with gr.Blocks(js=js) as demo:
1180
  with gr.Tabs() as tabs:
1181
  with gr.Tab("🗣️ TTS"):
1182
  tts_tab()
@@ -1189,4 +1235,4 @@ def initialize_app():
1189
 
1190
 
1191
  demo = initialize_app()
1192
- demo.launch()
 
431
  refine_generation: bool = False,
432
  stream: bool = False,
433
  ) -> Union[Generator[Tuple[int, np.ndarray], None, None], Tuple[int, np.ndarray]]:
 
 
434
 
435
+ user_id = parameters.user_id
436
+ make_request = True
437
  if agent is None and recording_data is not None:
438
  audio_manager.update_current_recording(recording_data)
439
  clone_voice_name = process_voice_clone(recording_data, user_id)
 
441
  voice_name = str(clone_voice_name)
442
  print(f"The voice name, get from voice clone API:::--{voice_name}")
443
  else:
444
+ gr.Info("Sorry, we are facing some issues with cloning this voice.\nPlease reload the app and try again.", title='Error')
445
+ print("Did not get any voice name from voice clone api")
446
+ make_request=False
447
  else:
448
  voice_name = [agents[agent]]
449
+ if make_request:
450
+ print("\nParameters Recieved:\n")
451
+ print("speechReqId", session_id)
452
+ print("text", [text])
453
+ print("language", [language_codes[language]])
454
+ print("voice_name", voice_name)
455
+ print("encoding", "default")
456
+ print("expressive", expressiveness)
457
+ print("stability", stability)
458
+ print("clarity", clarity)
459
+ print("speech_rate", speech_rate)
460
+ print("loudness", loudness)
461
+ print("refine_grneration", refine_generation)
462
+ print("\n\n")
463
 
464
+ try:
465
+ if not text or text.strip() == "":
466
+ raise gr.Error("Text input cannot be empty")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
 
468
+ if len(text) > 1000:
469
+ raise gr.Error(
470
+ f"Text length must be less than 1000 characters. Current length: {len(text)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
+ payload = json.dumps(
474
+ {
475
+ "speechReqId": session_id,
476
+ "user_id": user_id,
477
+ "text": [text],
478
+ "language": [language_codes[language]],
479
+ "voice_name": voice_name,
480
+ "encoding": "default",
481
+ "expressive": expressiveness,
482
+ "stability": stability,
483
+ "clarity": clarity,
484
+ "speech_rate": speech_rate,
485
+ "refine_generation": refine_generation,
486
+ "pronunciation_dict":pronunc_dict
487
+ }
488
+ )
489
 
490
+ with requests.post(
491
+ url=parameters.TTS_URL,
492
+ headers={"Authorization": f"Bearer {parameters.TTS_secret_key}"},
493
+ data=payload,
494
+ verify=False,
495
+ stream=True,
496
+ ) as response:
497
+ if response.status_code != 200:
498
+ print(
499
+ f"API request failed with status {response.status_code} and error: {response.reason}"
500
+ )
501
  log_initial_submission(
502
  code=response.status_code,
503
  session_id=session_id,
504
  language=language,
505
+ input_method=None,
506
  agent_used=voice_name,
507
+ voice_path=recording_data,
508
  text_input=text,
509
  expressiveness=expressiveness,
510
  stability=stability,
 
512
  speech_rate=speech_rate,
513
  loudness=loudness,
514
  refine_generation=refine_generation,
515
+ err_code=response.status_code,
516
+ err_msg=response.reason,
517
  )
518
+ raise gr.Error(f"API Error: {response.status_code} - {response.reason}")
519
+ sample_rate = 24000
520
+
521
+ if stream:
522
+ # Streaming mode
523
+ last_chunk_time = time.time()
524
+ for chunk in response.iter_content(chunk_size=32768):
525
+ chunk_received_time = time.time()
526
+ chunk_delay = chunk_received_time - last_chunk_time
527
+ last_chunk_time = chunk_received_time
528
+
529
+ if chunk:
530
+ start_processing_time = time.time()
531
+ audio_chunk = np.frombuffer(chunk, dtype=np.int16)
532
+ processing_time = time.time() - start_processing_time
533
+
534
+ if len(audio_chunk) > 0:
535
+ yield_time_start = time.time()
536
+ yield (sample_rate, audio_chunk)
537
+ yield_time_complete = time.time()
538
+
539
+ print(
540
+ f"Streaming chunk of size {len(audio_chunk)} - "
541
+ f"Chunk delay: {chunk_delay:.4f}s, "
542
+ f"Processing time: {processing_time:.4f}s, "
543
+ f"Yielding time: {(yield_time_complete - yield_time_start):.4f}s"
544
+ )
545
  else:
546
+ # Non-streaming mode
547
+ start_time = time.time()
548
+ audio_bytes = b""
549
+
550
+ for chunk in response.iter_content(chunk_size=32768):
551
+ if chunk:
552
+ print(f"Streaming chunk of size {len(chunk)}")
553
+ audio_bytes += chunk
554
+
555
+ if len(audio_bytes) > 0:
556
+ complete_audio = np.frombuffer(audio_bytes, dtype=np.int16)
557
+ processing_time = time.time() - start_time
558
+
559
+ complete_audio = increase_volume(complete_audio, factor=loudness)
560
+ yield (sample_rate, complete_audio)
561
+
562
+ saved_path = save_generated_audio(complete_audio, session_id)
563
+
564
+ log_initial_submission(
565
+ code=response.status_code,
566
+ session_id=session_id,
567
+ language=language,
568
+ input_method="Select Voice" if agent else "Voice clone",
569
+ agent_used=voice_name,
570
+ voice_path=saved_path,
571
+ text_input=text,
572
+ expressiveness=expressiveness,
573
+ stability=stability,
574
+ clarity=clarity,
575
+ speech_rate=speech_rate,
576
+ loudness=loudness,
577
+ refine_generation=refine_generation,
578
+ )
579
 
580
+ else:
581
+ raise ValueError("No audio data received from API")
582
+
583
+ except requests.RequestException as e:
584
+ raise gr.Error(f"Network Error: Failed to connect to the API server - {str(e)}")
585
+ except Exception as e:
586
+ raise gr.Error(f"An unexpected error occurred: {str(e)}")
587
 
588
 
589
  def disable_rating_box():
 
600
 
601
  def tts_tab():
602
  with aws.fs.open(parameters.GLOBAL_PRONUNCIATION_DICT_PATH,'r') as f:
603
+ global_pronunc_dict = json.loads(f.read())
604
 
605
  pronunc_dict_state = gr.State(value=global_pronunc_dict)
606
  session_id = generate_session_id()
 
632
  <div class="info-heading">🎯 Quick Start Guide</div>
633
  """)
634
  gr.Markdown("""
635
+
636
  🌐 **Select Language**
637
  * Choose from our listed languages
638
+
639
  🎤 **Choose Voice**
640
  * Select from the curated collection of high-quality voices
641
  * Each voice is optimized for natural speech patterns
642
  * You can give your own voice by clicking on Voice clone
643
+
644
  ✍️ **Enter Your Text**
645
  * Type or paste your text in the input box
646
  * Or you can give input by clicking Random Sentence
647
+
648
  ⚙️ **Customize Voice Parameters**
649
  * Fine-tune expressiveness for emotional depth
650
  * Adjust stability for consistent output
651
  * Control clarity for precise articulation
652
+
653
  🎵 **Generate Audio**
654
  * Click the generate button to create your audio
655
  * Processing typically takes a few seconds
656
+
657
  ⭐ **Provide Feedback**
658
  * Rate the generated audio quality
659
  * Give us your feedback
660
  * Your feedback helps improve our system
661
+
662
  💾 **Access Your Audio**
663
  * Download the generated audio for offline use
664
  """)
 
806
  with gr.Row():
807
  pronunc_dict_key_in = gr.Textbox(label="Pronunciation key",placeholder="Enter word")
808
  pronunc_dict_key_out = gr.Textbox(label="Pronunciation Value",placeholder="Enter word with correct pronunciation")
809
+
810
  add_btn = gr.Button("Add pronunciation pair", variant="primary")
811
 
812
  add_btn.click(
 
876
  <li>Add your pronunciation of any word that doesn't sound well</li>
877
  <li>If you don't like the pronunciation of any word, then add your word with the key and value pair</li>
878
  <li><b style = "color:red">Note:-</b>Pronunciation pairs are <i style="color:red">case sensitive</i></li>
879
+ <li>If the model mispronounces some word incorrectly, you can correct it by adding the term as the Pronunciation Key and its phonetical spelling as the Pronunciation Value. For example, if <i><b style="color:red">AI/Cholestrol</b></i> isn't pronounced correctly, respell it as <i><b style = "color:green">ए आई/colestrol</b></i>: enter <i><b style="color:red">AI/Cholestrol</b></i> in the Pronunciation Key field and <i><b style = "color:green">ए आई/colestrol</b></i> in the Pronunciation Value field, then click Add Pronunciation Pair.</li>
880
  </ul>
881
  """)
882
 
 
1096
  line-height: 1.4;
1097
  margin-bottom: 20px;
1098
  }
1099
+ .footer {
1100
+ margin-top:10px;
1101
+ padding: 15px;
1102
+ border-radius: 8px;
1103
+ transition: background-color 0.3s ease;
1104
+ min-height: 200px; /* Consistent height */
1105
+ display: flex;
1106
+ flex-direction: column;
1107
+ justify-content: flex-start;
1108
+ border: 1px solid #e0e0e0;
1109
+ }
1110
+ .footer:hover{
1111
+ background-color: #3f3f46;
1112
+ }
1113
+ .footer .feature-list a.hf-link {
1114
+ color: #FFFF;
1115
+ text-decoration: none;
1116
+ transition: all 0.3s ease;
1117
+ display: inline-block;
1118
+ }
1119
+ .footer .feature-list a.hf-link:hover {
1120
+ color: #EA580C;
1121
+ font-weight: 600;
1122
+ transform: translateX(10px);
1123
+ }
1124
+ .footer .feature-list spam {
1125
+ color: #FFFF;
1126
+ text-decoration: none;
1127
+ transition: all 0.3s ease;
1128
+ display: inline-block;
1129
+ }
1130
+ .footer .feature-list spam:hover {
1131
+ color: #EA580C;
1132
+ font-weight: 600;
1133
+ text-decoration: underline;
1134
+ }
1135
+
1136
  /* For mobile responsiveness */
1137
  @media (max-width: 768px) {
1138
  .features-container {
 
1141
  }
1142
  </style>
1143
  <div style="text-align: center; font-size: 3em; font-weight: bold; margin-bottom: 20px;"> 🚀 Welcome to ORI Text-to-Speech </div>
1144
+
1145
  <div class="section-header">🌟 About Our Technology</div>
1146
  <div class="intro-text">Greetings from Oriserve! We're excited to showcase our refined Text-to-Speech capabilities—powered by generative voice synthesis to deliver <strong>natural-sounding</strong> and <strong>professionally tuned</strong> speech output.</div>
1147
 
 
1152
  <div class="feature-title">🎯 Core Capabilities</div>
1153
  <ul class="feature-list">
1154
  <li><strong>Robust voice models suited for production use</strong></li>
1155
+ <li><strong>Optimized for English and Hindi, with multilingual expansion underway</strong></li>
1156
  <li><strong>Diverse voice styles for varied use cases</strong></li>
1157
  <li><strong>Responsive audio generation with practical latency</strong></li>
1158
  </ul>
1159
  </div>
1160
+
1161
  <div class="feature-block">
1162
  <div class="feature-title">🛠️ Advanced Controls</div>
1163
  <ul class="feature-list">
 
1166
  <li><strong>Balance tuning for clarity and stability</strong></li>
1167
  </ul>
1168
  </div>
1169
+
1170
  <div class="feature-block">
1171
  <div class="feature-title">💫 Special Features</div>
1172
  <ul class="feature-list">
 
1175
  <li><strong>Improved handling of common pronunciation cases</strong></li>
1176
  </ul>
1177
  </div>
1178
+
1179
  <div class="feature-block">
1180
  <div class="feature-title">⚡ Processing Capabilities</div>
1181
  <ul class="feature-list">
 
1184
  <li><strong>Audio streaming with first-byte latency as low as ~150 ms</strong></li>
1185
  </ul>
1186
  </div>
1187
+
1188
  <div class="feature-block">
1189
  <div class="feature-title">🔊 Audio Quality</div>
1190
  <ul class="feature-list">
 
1193
  <li><strong>Consistent synthesis across sessions</strong></li>
1194
  </ul>
1195
  </div>
1196
+
1197
  <div class="feature-block">
1198
  <div class="feature-title">📈 Future Development</div>
1199
  <ul class="feature-list">
 
1202
  <li><strong>Expanded language and dialect support coming soon</strong></li>
1203
  </ul>
1204
  </div>
1205
+ <div class = "footer">
1206
+ <div class="feature-title">How to Reach Us</div>
1207
+ <ul class="feature-list">
1208
+ <li><strong>Email : <spam>ai-team@oriserve.com</spam></strong></li>
1209
+ <li><strong>Huggingface : <a href="https://huggingface.co/Oriserve" class="hf-link">Oriserve huggingface</a></strong></li>
1210
+ <li><strong>GitHub : <a href="https://github.com/OriserveAI" class="hf-link">OriserveAI github</a></strong></li>
1211
+ <li><strong>Website : <a href="https://oriserve.com/" class="hf-link">Oriserve website</a></strong></li>
1212
+ </ul>
1213
+ </div>
1214
  </div>
1215
 
1216
  """
 
1218
 
1219
  return gr.Markdown("")
1220
 
 
1221
  def initialize_app():
1222
  # await audio_manager.load_agent_voices(agents)
1223
 
1224
  try:
1225
+ with gr.Blocks(js=js,css="footer{display:none !important}") as demo:
1226
  with gr.Tabs() as tabs:
1227
  with gr.Tab("🗣️ TTS"):
1228
  tts_tab()
 
1235
 
1236
 
1237
  demo = initialize_app()
1238
+ demo.launch(show_api=False)