Spaces:

PLXR
/

youtube_auto_image1

Sleeping

App Files Files Community

youtube_auto_image1 / logic_tts.py

PLXR

Update logic_tts.py

7f424a1 verified 2 months ago

raw

history blame contribute delete

3.96 kB

	import io
	import wave
	import re

	def split_text_smartly(text, limit=500):
	"""
	1. 줄바꿈(\n)이 있으면 무조건 장면을 나눕니다. (사용자 의도 반영)
	2. 나눠진 문단이 limit(글자수)를 넘을 경우에만 문장 부호 기준으로 추가로 자릅니다.
	"""
	# 1. 먼저 줄바꿈(\n) 기준으로 강제 분할 (문단 나누기)
	raw_paragraphs = text.split('\n')
	final_chunks = []

	for paragraph in raw_paragraphs:
	paragraph = paragraph.strip()
	if not paragraph:
	continue

	# 만약 문단 자체가 limit보다 짧으면? -> 그냥 통째로 하나의 장면!
	if len(paragraph) <= limit:
	final_chunks.append(paragraph)
	continue

	# 2. 문단이 너무 길면? -> 여기서만 문장 부호로 쪼개기 로직 발동
	sentences = re.split(r'(?<=[.?!])\s+', paragraph)
	current_chunk = ""

	for sentence in sentences:
	if not sentence.strip():
	continue

	# 합쳤을 때 제한을 넘으면 -> 저장하고 새 출발
	if len(current_chunk) + len(sentence) > limit:
	if current_chunk:
	final_chunks.append(current_chunk.strip())
	current_chunk = sentence
	else:
	# 제한 안 넘으면 -> 계속 이어 붙임
	current_chunk += " " + sentence

	# 마지막 남은 조각 저장
	if current_chunk:
	final_chunks.append(current_chunk.strip())

	return final_chunks

	def raw_pcm_to_wav(pcm_data, sample_rate=24000):
	"""
	[핵심] Gemini가 주는 날것의 PCM 데이터를
	브라우저가 들을 수 있는 WAV 포맷으로 변환하는 함수
	"""
	try:
	wav_io = io.BytesIO()
	with wave.open(wav_io, "wb") as wav_file:
	wav_file.setnchannels(1) # 모노
	wav_file.setsampwidth(2) # 16-bit (2 bytes)
	wav_file.setframerate(sample_rate)
	wav_file.writeframes(pcm_data)
	return wav_io.getvalue()
	except Exception as e:
	print(f"PCM to WAV Error: {e}")
	return None

	def generate_speech_chunk(client, model_id, text, voice_name):
	"""
	짧은 텍스트를 오디오로 바꾸는 함수 (미리듣기용)
	"""
	try:
	response = client.models.generate_content(
	model=model_id,
	contents=text,
	config={
	"response_modalities": ["AUDIO"],
	"speech_config": {
	"voice_config": {
	"prebuilt_voice_config": {"voice_name": voice_name}
	}
	}
	}
	)

	# 오디오 데이터 추출
	if response.candidates and response.candidates[0].content.parts:
	part = response.candidates[0].content.parts[0]
	if part.inline_data:
	return part.inline_data.data
	return None
	except Exception as e:
	print(f"TTS Chunk Error: {e}")
	return None

	def process_tts_task(index, text, client, model_id, voice_name):
	"""
	병렬 처리를 위한 작업 함수
	"""
	audio_data = generate_speech_chunk(client, model_id, text, voice_name)
	if audio_data:
	# 바로 WAV 헤더를 씌우지 않고, 나중에 합치기 위해 PCM 상태로 반환할 수도 있음
	# 하지만 여기선 간단하게 PCM 바이너리 자체를 반환
	return index, audio_data
	return index, None

	def merge_wav_bytes(audio_chunks_pcm, sample_rate=24000):
	"""
	여러 개의 PCM 조각을 하나의 긴 WAV 파일로 합침
	"""
	try:
	full_pcm = b"".join([chunk for chunk in audio_chunks_pcm if chunk])
	return raw_pcm_to_wav(full_pcm, sample_rate)
	except Exception as e:
	print(f"Merge Error: {e}")
	return None