Spaces:

VIDraft
/

Voice-Clone-Podcast

Runtime error

App Files Files Community

Voice-Clone-Podcast / app.py

seawolf2357

Update app.py

d41998a verified 7 months ago

raw

history blame

45.2 kB

	import spaces
	import gradio as gr
	import os
	import asyncio
	import torch
	import io
	import json
	import re
	import httpx
	import tempfile
	import wave
	import base64
	import numpy as np
	import soundfile as sf
	import subprocess
	import shutil
	import requests
	import logging
	import random
	from datetime import datetime, timedelta
	from typing import List, Tuple, Dict, Optional
	from pathlib import Path
	from threading import Thread
	from dotenv import load_dotenv

	# PDF processing imports
	from langchain_community.document_loaders import PyPDFLoader

	# OpenAI imports
	from openai import OpenAI

	# Transformers imports (for legacy local mode)
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	TextIteratorStreamer,
	BitsAndBytesConfig,
	)

	# Llama CPP imports (for new local mode)
	try:
	from llama_cpp import Llama
	from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
	from llama_cpp_agent.providers import LlamaCppPythonProvider
	from llama_cpp_agent.chat_history import BasicChatHistory
	from llama_cpp_agent.chat_history.messages import Roles
	from huggingface_hub import hf_hub_download
	LLAMA_CPP_AVAILABLE = True
	except ImportError:
	LLAMA_CPP_AVAILABLE = False

	# Chatterbox TTS imports
	try:
	from chatterbox.src.chatterbox.tts import ChatterboxTTS
	CHATTERBOX_AVAILABLE = True
	print("✅ Chatterbox TTS imported successfully from chatterbox.src.chatterbox.tts")
	except ImportError:
	try:
	from chatterbox.tts import ChatterboxTTS
	CHATTERBOX_AVAILABLE = True
	print("✅ Chatterbox TTS imported successfully from chatterbox.tts")
	except ImportError:
	try:
	# 다른 가능한 경로 시도
	import sys
	sys.path.append('/usr/local/lib/python3.10/site-packages')
	from chatterbox import ChatterboxTTS
	CHATTERBOX_AVAILABLE = True
	print("✅ Chatterbox TTS imported successfully from chatterbox")
	except ImportError:
	CHATTERBOX_AVAILABLE = False
	print("❌ Chatterbox TTS not available - falling back to text-only mode")

	# Import config and prompts
	from config_prompts import (
	ConversationConfig,
	PromptBuilder,
	DefaultConversations,
	)

	load_dotenv()

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"🚀 Running on device: {DEVICE}")

	# Brave Search API 설정
	BRAVE_KEY = os.getenv("BSEARCH_API")
	BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"


	def set_seed(seed: int):
	"""Sets the random seed for reproducibility across torch, numpy, and random."""
	torch.manual_seed(seed)
	if DEVICE == "cuda":
	torch.cuda.manual_seed(seed)
	torch.cuda.manual_seed_all(seed)
	random.seed(seed)
	np.random.seed(seed)


	def split_text_into_chunks(text: str, max_chars: int = 250) -> list[str]:
	"""
	텍스트를 문장 단위로 나누되, 각 청크가 max_chars를 넘지 않도록 합니다.
	"""
	# 문장 단위로 분리 (기본적인 문장 분리)
	sentences = re.split(r'(?<=[.!?])\s+', text.strip())

	chunks = []
	current_chunk = ""

	for sentence in sentences:
	# 현재 청크에 문장을 추가해도 max_chars를 넘지 않으면 추가
	if len(current_chunk) + len(sentence) + 1 <= max_chars:
	if current_chunk:
	current_chunk += " " + sentence
	else:
	current_chunk = sentence
	else:
	# 현재 청크를 저장하고 새 청크 시작
	if current_chunk:
	chunks.append(current_chunk)

	# 문장 자체가 max_chars보다 긴 경우 강제로 분할
	if len(sentence) > max_chars:
	words = sentence.split()
	temp_chunk = ""
	for word in words:
	if len(temp_chunk) + len(word) + 1 <= max_chars:
	if temp_chunk:
	temp_chunk += " " + word
	else:
	temp_chunk = word
	else:
	if temp_chunk:
	chunks.append(temp_chunk)
	temp_chunk = word
	if temp_chunk:
	current_chunk = temp_chunk
	else:
	current_chunk = sentence

	# 마지막 청크 추가
	if current_chunk:
	chunks.append(current_chunk)

	return chunks


	def brave_search(query: str, count: int = 8, freshness_days: int \| None = None):
	"""Brave Search API를 사용하여 최신 정보 검색"""
	if not BRAVE_KEY:
	return []
	params = {"q": query, "count": str(count)}
	if freshness_days:
	dt_from = (datetime.utcnow() - timedelta(days=freshness_days)).strftime("%Y-%m-%d")
	params["freshness"] = dt_from
	try:
	r = requests.get(
	BRAVE_ENDPOINT,
	headers={"Accept": "application/json", "X-Subscription-Token": BRAVE_KEY},
	params=params,
	timeout=15
	)
	raw = r.json().get("web", {}).get("results") or []
	return [{
	"title": r.get("title", ""),
	"url": r.get("url", r.get("link", "")),
	"snippet": r.get("description", r.get("text", "")),
	"host": re.sub(r"https?://(www\.)?", "", r.get("url", "")).split("/")[0]
	} for r in raw[:count]]
	except Exception as e:
	logging.error(f"Brave search error: {e}")
	return []


	def format_search_results(query: str, for_keyword: bool = False) -> str:
	"""검색 결과를 포맷팅하여 반환"""
	# 키워드 검색의 경우 더 많은 결과 사용
	count = 5 if for_keyword else 3
	rows = brave_search(query, count, freshness_days=7 if not for_keyword else None)
	if not rows:
	return ""

	results = []
	# 키워드 검색의 경우 더 상세한 정보 포함
	max_results = 4 if for_keyword else 2
	for r in rows[:max_results]:
	if for_keyword:
	# 키워드 검색은 더 긴 스니펫 사용
	snippet = r['snippet'][:200] + "..." if len(r['snippet']) > 200 else r['snippet']
	results.append(f"{r['title']}\n{snippet}\nSource: {r['host']}")
	else:
	# 일반 검색은 짧은 스니펫
	snippet = r['snippet'][:100] + "..." if len(r['snippet']) > 100 else r['snippet']
	results.append(f"- {r['title']}: {snippet}")

	return "\n\n".join(results) + "\n"


	def extract_keywords_for_search(text: str, language: str = "English") -> List[str]:
	"""텍스트에서 검색할 키워드 추출"""
	# 텍스트 앞부분만 사용 (너무 많은 텍스트 처리 방지)
	text_sample = text[:500]

	# 영어는 대문자로 시작하는 단어 중 가장 긴 것 1개
	words = text_sample.split()
	keywords = [word.strip('.,!?;:') for word in words
	if len(word) > 4 and word[0].isupper()]
	if keywords:
	return [max(keywords, key=len)] # 가장 긴 단어 1개
	return []


	def search_and_compile_content(keyword: str, language: str = "English") -> str:
	"""키워드로 검색하여 충분한 콘텐츠 컴파일"""
	if not BRAVE_KEY:
	# API 없을 때도 기본 콘텐츠 생성
	return f"""
	Comprehensive information about '{keyword}':

	{keyword} is a significant topic in modern society.
	This subject impacts our lives in various ways and has been
	gaining increasing attention recently.

	Key aspects:
	1. Technological advancement and innovation
	2. Social impact and changes
	3. Future prospects and possibilities
	4. Practical applications
	5. Global trends and developments

	Experts predict that {keyword} will become even more important,
	and it's crucial to develop a deep understanding of this topic.
	"""

	# 영어 검색 쿼리
	queries = [
	f"{keyword} latest news 2024",
	f"{keyword} explained comprehensive",
	f"{keyword} trends forecast",
	f"{keyword} advantages disadvantages",
	f"{keyword} how to use",
	f"{keyword} expert opinions"
	]

	all_content = []
	total_content_length = 0

	for query in queries:
	results = brave_search(query, count=5) # 더 많은 결과 가져오기
	for r in results[:3]: # 각 쿼리당 상위 3개
	content = f"{r['title']}\n{r['snippet']}\nSource: {r['host']}\n"
	all_content.append(content)
	total_content_length += len(r['snippet'])

	# 콘텐츠가 부족하면 추가 생성
	if total_content_length < 1000: # 최소 1000자 확보
	additional_content = f"""
	Additional insights:
	Recent developments in {keyword} show rapid advancement in this field.
	Many experts are actively researching this topic, and its practical
	applications continue to expand.

	Key points to note:
	- Accelerating technological innovation
	- Improving user experience
	- Enhanced accessibility
	- Increased cost efficiency
	- Growing global market

	These factors are making the future of {keyword} increasingly promising.
	"""
	all_content.append(additional_content)

	# 컴파일된 콘텐츠 반환
	compiled = "\n\n".join(all_content)

	# 키워드 기반 소개
	intro = f"### Comprehensive information and latest trends about '{keyword}':\n\n"

	return intro + compiled


	class UnifiedAudioConverter:
	def __init__(self, config: ConversationConfig):
	self.config = config
	self.llm_client = None
	self.legacy_local_model = None
	self.legacy_tokenizer = None
	# 새로운 로컬 LLM 관련
	self.local_llm = None
	self.local_llm_model = None
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	# 프롬프트 빌더 추가
	self.prompt_builder = PromptBuilder()

	def initialize_api_mode(self, api_key: str):
	"""Initialize API mode with Together API"""
	self.llm_client = OpenAI(api_key=api_key, base_url="https://api.together.xyz/v1")

	@spaces.GPU(duration=120)
	def initialize_local_mode(self):
	"""Initialize new local mode with Llama CPP"""
	if not LLAMA_CPP_AVAILABLE:
	raise RuntimeError("Llama CPP dependencies not available. Please install llama-cpp-python and llama-cpp-agent.")

	if self.local_llm is None or self.local_llm_model != self.config.local_model_name:
	try:
	# 모델 다운로드
	model_path = hf_hub_download(
	repo_id=self.config.local_model_repo,
	filename=self.config.local_model_name,
	local_dir="./models"
	)

	model_path_local = os.path.join("./models", self.config.local_model_name)

	if not os.path.exists(model_path_local):
	raise RuntimeError(f"Model file not found at {model_path_local}")

	# Llama 모델 초기화
	self.local_llm = Llama(
	model_path=model_path_local,
	flash_attn=True,
	n_gpu_layers=81 if torch.cuda.is_available() else 0,
	n_batch=1024,
	n_ctx=16384,
	)
	self.local_llm_model = self.config.local_model_name
	print(f"Local LLM initialized: {model_path_local}")

	except Exception as e:
	print(f"Failed to initialize local LLM: {e}")
	raise RuntimeError(f"Failed to initialize local LLM: {e}")

	@spaces.GPU(duration=60)
	def initialize_legacy_local_mode(self):
	"""Initialize legacy local mode with Hugging Face model (fallback)"""
	if self.legacy_local_model is None:
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16
	)
	self.legacy_local_model = AutoModelForCausalLM.from_pretrained(
	self.config.legacy_local_model_name,
	quantization_config=quantization_config
	)
	self.legacy_tokenizer = AutoTokenizer.from_pretrained(
	self.config.legacy_local_model_name,
	revision='8ab73a6800796d84448bc936db9bac5ad9f984ae'
	)

	def fetch_text(self, url: str) -> str:
	"""Fetch text content from URL"""
	if not url:
	raise ValueError("URL cannot be empty")

	if not url.startswith("http://") and not url.startswith("https://"):
	raise ValueError("URL must start with 'http://' or 'https://'")

	full_url = f"{self.config.prefix_url}{url}"
	try:
	response = httpx.get(full_url, timeout=60.0)
	response.raise_for_status()
	return response.text
	except httpx.HTTPError as e:
	raise RuntimeError(f"Failed to fetch URL: {e}")

	def extract_text_from_pdf(self, pdf_file) -> str:
	"""Extract text content from PDF file"""
	try:
	# Gradio returns file path, not file object
	if isinstance(pdf_file, str):
	pdf_path = pdf_file
	else:
	# If it's a file object (shouldn't happen with Gradio)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(pdf_file.read())
	pdf_path = tmp_file.name

	# PDF 로드 및 텍스트 추출
	loader = PyPDFLoader(pdf_path)
	pages = loader.load()

	# 모든 페이지의 텍스트를 결합
	text = "\n".join([page.page_content for page in pages])

	# 임시 파일인 경우 삭제
	if not isinstance(pdf_file, str) and os.path.exists(pdf_path):
	os.unlink(pdf_path)

	return text
	except Exception as e:
	raise RuntimeError(f"Failed to extract text from PDF: {e}")

	def _get_messages_formatter_type(self, model_name):
	"""Get appropriate message formatter for the model"""
	if "Mistral" in model_name or "BitSix" in model_name:
	return MessagesFormatterType.CHATML
	else:
	return MessagesFormatterType.LLAMA_3

	@spaces.GPU(duration=120)
	def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
	"""Extract conversation using new local LLM with enhanced professional style"""
	try:
	# 검색 컨텍스트 생성
	search_context = ""
	if BRAVE_KEY and not text.startswith("Keyword-based content:"):
	try:
	keywords = extract_keywords_for_search(text, language)
	if keywords:
	search_query = f"{keywords[0]} latest news"
	search_context = format_search_results(search_query)
	print(f"Search context added for: {search_query}")
	except Exception as e:
	print(f"Search failed, continuing without context: {e}")

	# 먼저 새로운 로컬 LLM 시도
	self.initialize_local_mode()

	chat_template = self._get_messages_formatter_type(self.config.local_model_name)
	provider = LlamaCppPythonProvider(self.local_llm)

	# 영어 전용 시스템 메시지
	system_message = (
	f"You are a professional podcast scriptwriter creating high-quality, "
	f"insightful discussions in English. Create exactly 12 conversation exchanges "
	f"with professional expertise. All dialogue must be in English. "
	f"Respond only in JSON format."
	)

	agent = LlamaCppAgent(
	provider,
	system_prompt=system_message,
	predefined_messages_formatter_type=chat_template,
	debug_output=False
	)

	settings = provider.get_provider_default_settings()
	settings.temperature = 0.75
	settings.top_k = 40
	settings.top_p = 0.95
	settings.max_tokens = self.config.max_tokens
	settings.repeat_penalty = 1.1
	settings.stream = False

	messages = BasicChatHistory()

	prompt = self.prompt_builder.build_prompt(text, language, search_context)
	response = agent.get_chat_response(
	prompt,
	llm_sampling_settings=settings,
	chat_history=messages,
	returns_streaming_generator=False,
	print_output=False
	)

	# JSON 파싱
	pattern = r"\{(?:[^{}]\|(?:\{[^{}]\}))\}"
	json_match = re.search(pattern, response)

	if json_match:
	conversation_data = json.loads(json_match.group())
	return conversation_data
	else:
	raise ValueError("No valid JSON found in local LLM response")

	except Exception as e:
	print(f"Local LLM failed: {e}, falling back to legacy local method")
	return self.extract_conversation_legacy_local(text, language, progress, search_context)

	@spaces.GPU(duration=120)
	def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
	"""Extract conversation using legacy local model"""
	try:
	self.initialize_legacy_local_mode()

	# 영어 전용 메시지
	messages = self.prompt_builder.build_messages_for_local(text, language, search_context)

	terminators = [
	self.legacy_tokenizer.eos_token_id,
	self.legacy_tokenizer.convert_tokens_to_ids("<\|eot_id\|>")
	]

	chat_messages = self.legacy_tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	model_inputs = self.legacy_tokenizer([chat_messages], return_tensors="pt").to(self.device)

	streamer = TextIteratorStreamer(
	self.legacy_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
	)

	generate_kwargs = dict(
	model_inputs,
	streamer=streamer,
	max_new_tokens=self.config.max_new_tokens,
	do_sample=True,
	temperature=0.75,
	eos_token_id=terminators,
	)

	t = Thread(target=self.legacy_local_model.generate, kwargs=generate_kwargs)
	t.start()

	partial_text = ""
	for new_text in streamer:
	partial_text += new_text

	pattern = r"\{(?:[^{}]\|(?:\{[^{}]\}))\}"
	json_match = re.search(pattern, partial_text)

	if json_match:
	return json.loads(json_match.group())
	else:
	raise ValueError("No valid JSON found in legacy local response")

	except Exception as e:
	print(f"Legacy local model also failed: {e}")
	return DefaultConversations.get_conversation("English")

	def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
	"""Extract conversation using API"""
	if not self.llm_client:
	raise RuntimeError("API mode not initialized")

	try:
	# 검색 컨텍스트 생성
	search_context = ""
	if BRAVE_KEY and not text.startswith("Keyword-based content:"):
	try:
	keywords = extract_keywords_for_search(text, language)
	if keywords:
	search_query = f"{keywords[0]} latest news"
	search_context = format_search_results(search_query)
	print(f"Search context added for: {search_query}")
	except Exception as e:
	print(f"Search failed, continuing without context: {e}")

	# 메시지 빌드
	messages = self.prompt_builder.build_messages_for_local(text, language, search_context)

	chat_completion = self.llm_client.chat.completions.create(
	messages=messages,
	model=self.config.api_model_name,
	temperature=0.75,
	)

	pattern = r"\{(?:[^{}]\|(?:\{[^{}]\}))\}"
	json_match = re.search(pattern, chat_completion.choices[0].message.content)

	if not json_match:
	raise ValueError("No valid JSON found in response")

	return json.loads(json_match.group())
	except Exception as e:
	raise RuntimeError(f"Failed to extract conversation: {e}")

	def parse_conversation_text(self, conversation_text: str) -> Dict:
	"""Parse conversation text back to JSON format"""
	lines = conversation_text.strip().split('\n')
	conversation_data = {"conversation": []}

	for line in lines:
	if ':' in line:
	speaker, text = line.split(':', 1)
	conversation_data["conversation"].append({
	"speaker": speaker.strip(),
	"text": text.strip()
	})

	return conversation_data

	@spaces.GPU(duration=120)
	def generate_tts_audio_gpu(
	self,
	conversation_json: Dict,
	audio_prompt_path_input: str,
	exaggeration_input: float = 0.5,
	temperature_input: float = 0.8,
	seed_num_input: int = 0,
	cfgw_input: float = 0.5,
	chunk_size_input: int = 250
	) -> tuple[int, np.ndarray]:
	"""
	Chatterbox TTS를 사용하여 대화를 음성으로 변환
	"""
	if not CHATTERBOX_AVAILABLE:
	raise RuntimeError("Chatterbox TTS not available. Please install chatterbox package.")

	try:
	# GPU 함수 내에서 모델 로드
	model = ChatterboxTTS.from_pretrained(DEVICE)
	print(f"✅ Chatterbox TTS model loaded on {DEVICE}")
	except Exception as e:
	raise RuntimeError(f"Failed to load Chatterbox TTS model: {e}")

	if seed_num_input != 0:
	set_seed(int(seed_num_input))

	audio_segments = []

	for i, turn in enumerate(conversation_json["conversation"]):
	text = turn["text"]
	if not text.strip():
	continue

	print(f"🎙️ 생성 중: Speaker {i+1} - '{text[:50]}...'")

	try:
	# 텍스트가 짧으면 단일 생성
	if len(text) <= 300:
	wav = model.generate(
	text,
	audio_prompt_path=audio_prompt_path_input,
	exaggeration=exaggeration_input,
	temperature=temperature_input,
	cfg_weight=cfgw_input,
	)
	wav_chunk = wav.squeeze(0).numpy()
	audio_segments.append(wav_chunk)
	else:
	# 긴 텍스트는 청크로 분할
	chunks = split_text_into_chunks(text, max_chars=chunk_size_input)
	print(f"📝 텍스트를 {len(chunks)}개 청크로 분할")

	chunk_audio_segments = []
	for j, chunk in enumerate(chunks):
	print(f" 📄 청크 {j+1}/{len(chunks)} 생성 중...")
	wav = model.generate(
	chunk,
	audio_prompt_path=audio_prompt_path_input,
	exaggeration=exaggeration_input,
	temperature=temperature_input,
	cfg_weight=cfgw_input,
	)
	wav_chunk = wav.squeeze(0).numpy()
	chunk_audio_segments.append(wav_chunk)

	# 청크들을 연결
	if chunk_audio_segments:
	silence_duration = int(0.1 * model.sr) # 0.1초 무음
	silence = np.zeros(silence_duration)

	turn_audio = []
	for j, segment in enumerate(chunk_audio_segments):
	turn_audio.append(segment)
	if j < len(chunk_audio_segments) - 1:
	turn_audio.append(silence)

	concatenated_turn = np.concatenate(turn_audio)
	audio_segments.append(concatenated_turn)

	except Exception as e:
	print(f"❌ Speaker {i+1} 생성 중 오류 발생: {e}")
	# 오류 발생 시 무음으로 대체
	silence_duration = int(2.0 * model.sr) # 2초 무음
	silence = np.zeros(silence_duration)
	audio_segments.append(silence)
	continue

	if not audio_segments:
	raise RuntimeError("모든 오디오 생성에 실패했습니다.")

	# 모든 스피커의 오디오 세그먼트 연결
	speaker_silence_duration = int(0.5 * model.sr) # 스피커 간 0.5초 무음
	speaker_silence = np.zeros(speaker_silence_duration)

	final_audio = []
	for i, segment in enumerate(audio_segments):
	final_audio.append(segment)
	if i < len(audio_segments) - 1:
	final_audio.append(speaker_silence)

	concatenated_audio = np.concatenate(final_audio)

	print(f"🎉 오디오 생성 완료! 총 길이: {len(concatenated_audio) / model.sr:.2f}초")
	return (model.sr, concatenated_audio)

	def _create_output_directory(self) -> str:
	"""Create a unique output directory"""
	random_bytes = os.urandom(8)
	folder_name = base64.urlsafe_b64encode(random_bytes).decode("utf-8")
	os.makedirs(folder_name, exist_ok=True)
	return folder_name


	# Global converter instance
	converter = UnifiedAudioConverter(ConversationConfig())


	async def synthesize(article_input, input_type: str = "URL", mode: str = "Local"):
	"""Main synthesis function - handles URL, PDF, and Keyword inputs"""
	try:
	# Extract text based on input type
	if input_type == "URL":
	if not article_input or not isinstance(article_input, str):
	return "Please provide a valid URL.", None
	text = converter.fetch_text(article_input)
	elif input_type == "PDF":
	if not article_input:
	return "Please upload a PDF file.", None
	text = converter.extract_text_from_pdf(article_input)
	else: # Keyword
	if not article_input or not isinstance(article_input, str):
	return "Please provide a keyword or topic.", None
	text = search_and_compile_content(article_input, "English")
	text = f"Keyword-based content:\n{text}"

	# Limit text to max words
	words = text.split()
	if len(words) > converter.config.max_words:
	text = " ".join(words[:converter.config.max_words])

	# Extract conversation based on mode
	if mode == "Local":
	try:
	conversation_json = converter.extract_conversation_local(text, "English")
	except Exception as e:
	print(f"Local mode failed: {e}, trying API fallback")
	api_key = os.environ.get("TOGETHER_API_KEY")
	if api_key:
	converter.initialize_api_mode(api_key)
	conversation_json = converter.extract_conversation_api(text, "English")
	else:
	raise RuntimeError("Local mode failed and no API key available for fallback")
	else: # API mode
	api_key = os.environ.get("TOGETHER_API_KEY")
	if not api_key:
	print("API key not found, falling back to local mode")
	conversation_json = converter.extract_conversation_local(text, "English")
	else:
	try:
	converter.initialize_api_mode(api_key)
	conversation_json = converter.extract_conversation_api(text, "English")
	except Exception as e:
	print(f"API mode failed: {e}, falling back to local mode")
	conversation_json = converter.extract_conversation_local(text, "English")

	# Generate conversation text
	conversation_text = "\n".join(
	f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
	for i, turn in enumerate(conversation_json["conversation"])
	)

	return conversation_text, None

	except Exception as e:
	return f"Error: {str(e)}", None


	async def regenerate_audio(
	conversation_text: str,
	ref_audio_path: str,
	exaggeration: float = 0.5,
	temperature: float = 0.8,
	seed_num: int = 0,
	cfg_weight: float = 0.5,
	chunk_size: int = 250
	):
	"""Regenerate audio from edited conversation text using Chatterbox TTS"""
	if not conversation_text.strip():
	return "Please provide conversation text.", None

	if not CHATTERBOX_AVAILABLE:
	return "Chatterbox TTS not available. Please check the installation.", None

	try:
	conversation_json = converter.parse_conversation_text(conversation_text)

	if not conversation_json["conversation"]:
	return "No valid conversation found in the text.", None

	# Generate audio using Chatterbox TTS
	try:
	sr, audio = converter.generate_tts_audio_gpu(
	conversation_json,
	ref_audio_path,
	exaggeration,
	temperature,
	seed_num,
	cfg_weight,
	chunk_size
	)

	# Save audio to file
	output_dir = converter._create_output_directory()
	output_file = os.path.join(output_dir, "podcast_audio.wav")
	sf.write(output_file, audio, sr)

	return "🎉 Audio generated successfully!", output_file
	except Exception as e:
	error_msg = str(e)
	if "Chatterbox TTS not available" in error_msg:
	return "❌ Chatterbox TTS is not properly installed. Please check the requirements.", None
	elif "CUDA" in error_msg or "GPU" in error_msg:
	return f"❌ GPU error: {error_msg}. Please try reducing chunk size or use CPU.", None
	else:
	return f"❌ Audio generation error: {error_msg}", None

	except Exception as e:
	return f"❌ Error processing conversation: {str(e)}", None


	def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local"):
	"""Synchronous wrapper for async synthesis"""
	return asyncio.run(synthesize(article_input, input_type, mode))


	def regenerate_audio_sync(conversation_text: str, ref_audio_path: str, exaggeration: float, temperature: float, seed_num: int, cfg_weight: float, chunk_size: int):
	"""Synchronous wrapper for async audio regeneration"""
	return asyncio.run(regenerate_audio(conversation_text, ref_audio_path, exaggeration, temperature, seed_num, cfg_weight, chunk_size))


	def toggle_input_visibility(input_type):
	"""Toggle visibility of URL input, file upload, and keyword input based on input type"""
	if input_type == "URL":
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
	elif input_type == "PDF":
	return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
	else: # Keyword
	return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)


	def update_char_count(text, chunk_size):
	"""텍스트 길이 정보 업데이트"""
	char_len = len(text)
	if char_len <= 300:
	return f"{char_len} characters (single generation)"
	else:
	chunks = split_text_into_chunks(text, max_chars=chunk_size)
	chunk_count = len(chunks)
	estimated_time = chunk_count * 3 # 청크당 약 3초 예상
	return f"{char_len} characters, {chunk_count} chunks (estimated time: ~{estimated_time}s)"


	# 모델 초기화 (앱 시작 시)
	if LLAMA_CPP_AVAILABLE:
	try:
	model_path = hf_hub_download(
	repo_id=converter.config.local_model_repo,
	filename=converter.config.local_model_name,
	local_dir="./models"
	)
	print(f"Model downloaded to: {model_path}")
	except Exception as e:
	print(f"Failed to download model at startup: {e}")


	# Gradio Interface
	with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
	.container {max-width: 1200px; margin: auto; padding: 20px;}
	.header-text {text-align: center; margin-bottom: 30px;}
	.input-group {background: #f7f7f7; padding: 20px; border-radius: 10px; margin-bottom: 20px;}
	.output-group {background: #f0f0f0; padding: 20px; border-radius: 10px;}
	.status-box {background: #e8f4f8; padding: 15px; border-radius: 8px; margin-top: 10px;}
	""") as demo:
	with gr.Column(elem_classes="container"):
	# 헤더
	with gr.Row(elem_classes="header-text"):
	gr.Markdown("""
	# 🎙️ LIVE Podcast Generator with Chatterbox TTS
	### Convert any article, blog, PDF document, or topic into an engaging professional podcast conversation!
	""")

	with gr.Row(elem_classes="discord-badge"):
	gr.HTML("""
	<p style="text-align: center;">
	<a href="https://discord.gg/openfreeai" target="_blank" style="display: inline-block; margin-right: 10px;">
	<img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="badge">
	</a>
	<a href="https://open.spotify.com/show/36GtIP7iqJxCwp7FfXmTYK?si=KsIsUJq7SJiiudPTaMsXAA" target="_blank" style="display: inline-block;">
	<img src="https://img.shields.io/static/v1?label=Spotify&message=Podcast&color=%230000ff&labelColor=%23000080&logo=Spotify&logoColor=white&style=for-the-badge" alt="badge">
	</a>
	<a href="https://huggingface.co/spaces/openfree/AI-Podcast" target="_blank" style="display: inline-block;">
	<img src="https://img.shields.io/static/v1?label=Huggingface&message=AI%20Podcast&color=%230000ff&labelColor=%23ffa500&logo=huggingface&logoColor=white&style=for-the-badge" alt="badge">
	</a>
	</p>
	""")

	# 상태 표시 섹션
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(f"""
	#### 🤖 System Status
	- LLM: {converter.config.local_model_name.split('.')[0]}
	- Fallback: {converter.config.api_model_name.split('/')[-1]}
	- Llama CPP: {"✅ Ready" if LLAMA_CPP_AVAILABLE else "❌ Not Available"}
	- Chatterbox TTS: {"✅ Ready" if CHATTERBOX_AVAILABLE else "❌ Not Available"}
	- Search: {"✅ Brave API" if BRAVE_KEY else "❌ No API"}
	""")
	with gr.Column(scale=1):
	gr.Markdown("""
	#### 🎙️ Chatterbox TTS Features
	- High Quality: Neural voice synthesis
	- Voice Cloning: Upload your reference audio
	- Unlimited Length: Automatic text chunking
	- Professional Style: Expert podcast discussions
	""")

	# 메인 입력 섹션
	with gr.Group(elem_classes="input-group"):
	with gr.Row():
	# 왼쪽: 입력 옵션들
	with gr.Column(scale=2):
	# 입력 타입 선택
	input_type_selector = gr.Radio(
	choices=["URL", "PDF", "Keyword"],
	value="URL",
	label="📥 Input Type",
	info="Choose your content source"
	)

	# URL 입력
	url_input = gr.Textbox(
	label="🔗 Article URL",
	placeholder="Enter the article URL here...",
	value="",
	visible=True,
	lines=2
	)

	# PDF 업로드
	pdf_input = gr.File(
	label="📄 Upload PDF",
	file_types=[".pdf"],
	visible=False
	)

	# 키워드 입력
	keyword_input = gr.Textbox(
	label="🔍 Topic/Keyword",
	placeholder="Enter a topic (e.g., 'AI trends 2024', 'quantum computing')",
	value="",
	visible=False,
	info="System will search and compile latest information",
	lines=2
	)

	# 오른쪽: 설정 옵션들
	with gr.Column(scale=1):
	# 처리 모드
	mode_selector = gr.Radio(
	choices=["Local", "API"],
	value="Local",
	label="⚙️ Processing Mode",
	info="Local: On-device \| API: Cloud"
	)

	# 생성 버튼
	with gr.Row():
	convert_btn = gr.Button(
	"🎯 Generate Professional Conversation",
	variant="primary",
	size="lg",
	scale=1
	)

	# TTS 설정 섹션
	with gr.Group(elem_classes="input-group"):
	gr.Markdown("### 🎙️ Chatterbox TTS Settings")

	with gr.Row():
	with gr.Column(scale=2):
	ref_audio = gr.Audio(
	sources=["upload", "microphone"],
	type="filepath",
	label="Reference Audio File (Upload your voice)",
	value="https://storage.googleapis.com/chatterbox-demo-samples/prompts/female_shadowheart4.flac",
	info="Upload your voice sample for voice cloning"
	)

	with gr.Column(scale=1):
	exaggeration = gr.Slider(
	0.25, 2, step=.05,
	label="Exaggeration (Neutral = 0.5)",
	value=.5
	)
	cfg_weight = gr.Slider(
	0.2, 1, step=.05,
	label="CFG/Pace",
	value=0.5
	)
	chunk_size = gr.Slider(
	100, 300, step=50,
	label="Chunk Size (characters)",
	value=250,
	info="Text chunking for long conversations"
	)

	with gr.Accordion("Advanced Options", open=False):
	seed_num = gr.Number(value=0, label="Random seed (0 for random)")
	temperature = gr.Slider(0.05, 5, step=.05, label="Temperature", value=.8)

	# 출력 섹션
	with gr.Group(elem_classes="output-group"):
	with gr.Row():
	# 왼쪽: 대화 텍스트
	with gr.Column(scale=3):
	conversation_output = gr.Textbox(
	label="💬 Generated Professional Conversation (Editable)",
	lines=25,
	max_lines=50,
	interactive=True,
	placeholder="Professional podcast conversation will appear here...",
	info="Edit the conversation as needed. Format: 'Speaker Name: Text'"
	)

	# 텍스트 길이 표시
	char_count = gr.Textbox(
	label="Text Information",
	value="0 characters",
	interactive=False
	)

	# 오디오 생성 버튼
	with gr.Row():
	generate_audio_btn = gr.Button(
	"🎙️ Generate Audio with Chatterbox TTS",
	variant="secondary",
	size="lg"
	)

	# 오른쪽: 오디오 출력 및 상태
	with gr.Column(scale=2):
	audio_output = gr.Audio(
	label="🎧 Professional Podcast Audio",
	type="filepath",
	interactive=False
	)

	status_output = gr.Textbox(
	label="📊 Status",
	interactive=False,
	lines=3,
	elem_classes="status-box"
	)

	# 도움말
	gr.Markdown("""
	#### 💡 Quick Tips:
	- URL: Paste any article link
	- PDF: Upload documents directly
	- Keyword: Enter topics for AI research
	- Voice Cloning: Upload reference audio
	- Edit conversation before audio generation
	- Longer text automatically chunked
	""")

	# 예제 섹션
	with gr.Accordion("📚 Examples", open=False):
	gr.Examples(
	examples=[
	["https://huggingface.co/blog/openfreeai/cycle-navigator", "URL", "Local"],
	["quantum computing breakthroughs", "Keyword", "Local"],
	["https://huggingface.co/papers/2505.14810", "URL", "Local"],
	["artificial intelligence ethics", "Keyword", "Local"],
	],
	inputs=[url_input, input_type_selector, mode_selector],
	outputs=[conversation_output, status_output],
	fn=synthesize_sync,
	cache_examples=False,
	)

	# Input type change handler
	input_type_selector.change(
	fn=toggle_input_visibility,
	inputs=[input_type_selector],
	outputs=[url_input, pdf_input, keyword_input]
	)

	# 텍스트 입력 시 문자 수 업데이트
	conversation_output.change(
	fn=update_char_count,
	inputs=[conversation_output, chunk_size],
	outputs=[char_count]
	)

	chunk_size.change(
	fn=update_char_count,
	inputs=[conversation_output, chunk_size],
	outputs=[char_count]
	)

	# 이벤트 연결
	def get_article_input(input_type, url_input, pdf_input, keyword_input):
	"""Get the appropriate input based on input type"""
	if input_type == "URL":
	return url_input
	elif input_type == "PDF":
	return pdf_input
	else: # Keyword
	return keyword_input

	convert_btn.click(
	fn=lambda input_type, url_input, pdf_input, keyword_input, mode: synthesize_sync(
	get_article_input(input_type, url_input, pdf_input, keyword_input), input_type, mode
	),
	inputs=[input_type_selector, url_input, pdf_input, keyword_input, mode_selector],
	outputs=[conversation_output, status_output]
	)

	generate_audio_btn.click(
	fn=regenerate_audio_sync,
	inputs=[conversation_output, ref_audio, exaggeration, temperature, seed_num, cfg_weight, chunk_size],
	outputs=[status_output, audio_output]
	)


	# Launch the app
	if __name__ == "__main__":
	demo.queue(api_open=True, default_concurrency_limit=10).launch(
	show_api=True,
	share=False,
	server_name="0.0.0.0",
	server_port=7860
	)