File size: 1,331 Bytes
68a99fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from typing import List
from pathlib import Path
from ..base import BaseTTS

class KittenTTSProcessor(BaseTTS):
	"""Text-to-Speech processor using KittenTTS with streaming support."""

	def __init__(self, stream_audio=False):
		super().__init__("Kitten", stream_audio=stream_audio)
		self.default_voice_index = 7
		self.voices = [  'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',  'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' ]
		print("Initialising Kitten...")
		from kittentts import KittenTTS
		print("Loading Modal...")
		self.pipeline = KittenTTS("KittenML/kitten-tts-nano-0.2")
		print("Model loaded successfully")

	def generate_audio_files(self, text: str, voice: str, speed: float, chunk_id: int = None):
		sentences = self.split_sentences(text)
		audio_files = []
		total_sentences = len(sentences)
		
		print(f"Processing {total_sentences} text sentences...")
		for i, sentence in enumerate(sentences):
			audio = self.pipeline.generate(sentence, voice=voice)
			if self.stream_audio:
				self.queue_audio_for_streaming(audio)
			if self.save_audio_file:
				chunk_file = self.generate_chunk_audio_file(audio, chunk_id if chunk_id else i)
				audio_files.append(chunk_file)
			print(f"Sentence {i + 1} processed -> {chunk_file.name} -> {sentence}")

		return audio_files