Spaces:
Sleeping
Sleeping
| import io | |
| import wave | |
| import re | |
| def split_text_smartly(text, limit=500): | |
| """ | |
| 1. ์ค๋ฐ๊ฟ(\n)์ด ์์ผ๋ฉด ๋ฌด์กฐ๊ฑด ์ฅ๋ฉด์ ๋๋๋๋ค. (์ฌ์ฉ์ ์๋ ๋ฐ์) | |
| 2. ๋๋ ์ง ๋ฌธ๋จ์ด limit(๊ธ์์)๋ฅผ ๋์ ๊ฒฝ์ฐ์๋ง ๋ฌธ์ฅ ๋ถํธ ๊ธฐ์ค์ผ๋ก ์ถ๊ฐ๋ก ์๋ฆ ๋๋ค. | |
| """ | |
| # 1. ๋จผ์ ์ค๋ฐ๊ฟ(\n) ๊ธฐ์ค์ผ๋ก ๊ฐ์ ๋ถํ (๋ฌธ๋จ ๋๋๊ธฐ) | |
| raw_paragraphs = text.split('\n') | |
| final_chunks = [] | |
| for paragraph in raw_paragraphs: | |
| paragraph = paragraph.strip() | |
| if not paragraph: | |
| continue | |
| # ๋ง์ฝ ๋ฌธ๋จ ์์ฒด๊ฐ limit๋ณด๋ค ์งง์ผ๋ฉด? -> ๊ทธ๋ฅ ํต์งธ๋ก ํ๋์ ์ฅ๋ฉด! | |
| if len(paragraph) <= limit: | |
| final_chunks.append(paragraph) | |
| continue | |
| # 2. ๋ฌธ๋จ์ด ๋๋ฌด ๊ธธ๋ฉด? -> ์ฌ๊ธฐ์๋ง ๋ฌธ์ฅ ๋ถํธ๋ก ์ชผ๊ฐ๊ธฐ ๋ก์ง ๋ฐ๋ | |
| sentences = re.split(r'(?<=[.?!])\s+', paragraph) | |
| current_chunk = "" | |
| for sentence in sentences: | |
| if not sentence.strip(): | |
| continue | |
| # ํฉ์ณค์ ๋ ์ ํ์ ๋์ผ๋ฉด -> ์ ์ฅํ๊ณ ์ ์ถ๋ฐ | |
| if len(current_chunk) + len(sentence) > limit: | |
| if current_chunk: | |
| final_chunks.append(current_chunk.strip()) | |
| current_chunk = sentence | |
| else: | |
| # ์ ํ ์ ๋์ผ๋ฉด -> ๊ณ์ ์ด์ด ๋ถ์ | |
| current_chunk += " " + sentence | |
| # ๋ง์ง๋ง ๋จ์ ์กฐ๊ฐ ์ ์ฅ | |
| if current_chunk: | |
| final_chunks.append(current_chunk.strip()) | |
| return final_chunks | |
| def raw_pcm_to_wav(pcm_data, sample_rate=24000): | |
| """ | |
| [ํต์ฌ] Gemini๊ฐ ์ฃผ๋ ๋ ๊ฒ์ PCM ๋ฐ์ดํฐ๋ฅผ | |
| ๋ธ๋ผ์ฐ์ ๊ฐ ๋ค์ ์ ์๋ WAV ํฌ๋งท์ผ๋ก ๋ณํํ๋ ํจ์ | |
| """ | |
| try: | |
| wav_io = io.BytesIO() | |
| with wave.open(wav_io, "wb") as wav_file: | |
| wav_file.setnchannels(1) # ๋ชจ๋ ธ | |
| wav_file.setsampwidth(2) # 16-bit (2 bytes) | |
| wav_file.setframerate(sample_rate) | |
| wav_file.writeframes(pcm_data) | |
| return wav_io.getvalue() | |
| except Exception as e: | |
| print(f"PCM to WAV Error: {e}") | |
| return None | |
| def generate_speech_chunk(client, model_id, text, voice_name): | |
| """ | |
| ์งง์ ํ ์คํธ๋ฅผ ์ค๋์ค๋ก ๋ฐ๊พธ๋ ํจ์ (๋ฏธ๋ฆฌ๋ฃ๊ธฐ์ฉ) | |
| """ | |
| try: | |
| response = client.models.generate_content( | |
| model=model_id, | |
| contents=text, | |
| config={ | |
| "response_modalities": ["AUDIO"], | |
| "speech_config": { | |
| "voice_config": { | |
| "prebuilt_voice_config": {"voice_name": voice_name} | |
| } | |
| } | |
| } | |
| ) | |
| # ์ค๋์ค ๋ฐ์ดํฐ ์ถ์ถ | |
| if response.candidates and response.candidates[0].content.parts: | |
| part = response.candidates[0].content.parts[0] | |
| if part.inline_data: | |
| return part.inline_data.data | |
| return None | |
| except Exception as e: | |
| print(f"TTS Chunk Error: {e}") | |
| return None | |
| def process_tts_task(index, text, client, model_id, voice_name): | |
| """ | |
| ๋ณ๋ ฌ ์ฒ๋ฆฌ๋ฅผ ์ํ ์์ ํจ์ | |
| """ | |
| audio_data = generate_speech_chunk(client, model_id, text, voice_name) | |
| if audio_data: | |
| # ๋ฐ๋ก WAV ํค๋๋ฅผ ์์ฐ์ง ์๊ณ , ๋์ค์ ํฉ์น๊ธฐ ์ํด PCM ์ํ๋ก ๋ฐํํ ์๋ ์์ | |
| # ํ์ง๋ง ์ฌ๊ธฐ์ ๊ฐ๋จํ๊ฒ PCM ๋ฐ์ด๋๋ฆฌ ์์ฒด๋ฅผ ๋ฐํ | |
| return index, audio_data | |
| return index, None | |
| def merge_wav_bytes(audio_chunks_pcm, sample_rate=24000): | |
| """ | |
| ์ฌ๋ฌ ๊ฐ์ PCM ์กฐ๊ฐ์ ํ๋์ ๊ธด WAV ํ์ผ๋ก ํฉ์นจ | |
| """ | |
| try: | |
| full_pcm = b"".join([chunk for chunk in audio_chunks_pcm if chunk]) | |
| return raw_pcm_to_wav(full_pcm, sample_rate) | |
| except Exception as e: | |
| print(f"Merge Error: {e}") | |
| return None |