ready

Sleeping

App Files Files Community

ready / app.py

tester1hf

Update app.py

92de730 verified 11 months ago

raw

history blame contribute delete

8.42 kB

	import gradio as gr
	from g4f.client import Client
	import json
	import torch
	import soundfile as sf
	from openvoice_cli.__main__ import tune_one
	import os
	import uuid
	import zipfile
	import logging
	import numpy as np
	from concurrent.futures import ThreadPoolExecutor
	import threading

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Initialize G4F client
	client = Client()

	# Silero TTS configuration
	language = 'ru'
	model_id = 'ru_v3'
	speaker = 'baya' # Russian voice
	sample_rate = 48000 # Sample rate for Russian model
	device = torch.device('cpu')

	# Load Silero TTS model with thread safety
	model, _ = torch.hub.load(repo_or_dir='snakers4/silero-models',
	model='silero_tts',
	language=language,
	speaker=model_id)
	model.to(device)
	tts_lock = threading.Lock() # Lock for TTS model thread safety

	def get_task(prompt):
	return f'''Ты генерируешь скрипт продолжения прямой линии с Владимиром Владимировичем Путиным. Вопросы телезрителей передаёт Киселёв - он максимально подобострастен, поддерживает Путина и Россию. Пиши в духе пропаганды пророссийской. Каждая реплика - не менее 150-200 символов. Английские названия при необходимости всегда транскрибируй кириллицей. Числительные также записывай кириллицей. Пиши в стиле кремлёвской пропаганды. Пиши в стереотипном стиле. Киселёв задаёт вопросы, обрашается к Путину как "Владимир Владимирович" или "Господин Президент". Путин отвечает. Всё максимально аутентично. Фразы Киселёва: {{"Киселёв":"фраза"}} Фразы Путина: {{"Путин":"фраза"}} Ответ дай в формате JSON без дополнительных символов: [{{"Киселёв":"фраза"}}, {{"Путин":"фраза"}} . . . ].
	Вопрос от пользователя поступил: "{prompt}"'''

	def validate_response(response):
	try:
	data = json.loads(response)
	if isinstance(data, list) and all(isinstance(item, dict) and len(item) == 1 for item in data):
	return True
	except json.JSONDecodeError:
	pass
	return False

	def generate_text(prompt):
	max_retries = 4
	for attempt in range(max_retries):
	logger.info(f"Generating response for prompt: {prompt} (attempt {attempt+1})")
	response = client.chat.completions.create(
	model="llama-3.3-70b",
	messages=[{"role": "user", "content": get_task(prompt)}],
	web_search=False
	)
	response_text = response.choices[0].message.content
	logger.info(f"Generated response: {response_text}")

	if validate_response(response_text):
	return response_text
	logger.warning("Invalid response format, retrying...")

	logger.error("Failed to generate valid response after 4 attempts")
	return '[{"Киселёв":"К сожалению, не удалось расслышать вопрос. Пожалуйста, попробуйте еще раз."}, {"Путин":"Мы работаем над улучшением системы. Спасибо за понимание."}]'

	def split_text(text, max_length=800):
	"""Split text into chunks of maximum length, trying to preserve word boundaries"""
	chunks = []
	while len(text) > max_length:
	split_at = text.rfind(' ', 0, max_length)
	if split_at == -1:
	split_at = max_length
	chunks.append(text[:split_at])
	text = text[split_at:].lstrip()
	chunks.append(text)
	return chunks

	def generate_audio(text, speaker_name):
	"""Generate audio with thread-safe splitting and synthesis"""
	logger.info(f"Generating audio for {speaker_name} ({len(text)} chars)")

	chunks = split_text(text)
	audio_arrays = []

	for chunk in chunks:
	with tts_lock: # Ensure thread-safe TTS operations
	audio = model.apply_tts(
	ssml_text=f"<speak>{chunk}</speak>",
	speaker=speaker,
	sample_rate=sample_rate,
	put_accent=True,
	put_yo=True
	)
	audio_arrays.append(audio)

	full_audio = np.concatenate(audio_arrays)
	temp_filename = f"temp_{uuid.uuid4().hex}.wav"
	sf.write(temp_filename, full_audio, sample_rate)
	return temp_filename

	def process_line(args):
	"""Process single dialogue line with enhanced error handling"""
	idx, speaker, text = args
	final_filename = f"t{idx+1}-{speaker}.wav"
	base_audio = None
	output_filename = None

	try:
	logger.info(f"Processing line {idx+1} for {speaker}")

	# Generate base audio
	base_audio = generate_audio(text, speaker)
	if not os.path.exists(base_audio):
	logger.error(f"Base audio not generated for line {idx+1}")
	return None

	# Generate voice cover
	ref_audio = "kisel.mp3" if speaker == "Киселёв" else "putin.mp3"
	output_filename = f"output_{uuid.uuid4().hex[:6]}.wav"

	logger.debug(f"Tuning audio with reference: {ref_audio}")
	tune_one(
	input_file=base_audio,
	ref_file=ref_audio,
	output_file=output_filename,
	device='cpu'
	)

	# Verify output file creation
	if not os.path.exists(output_filename):
	logger.error(f"Voice tuning failed for line {idx+1}")
	return None

	# Rename final file
	os.rename(output_filename, final_filename)
	logger.info(f"Created final file: {final_filename}")

	return final_filename

	except Exception as e:
	logger.error(f"Error processing line {idx+1}: {str(e)}", exc_info=True)
	return None
	finally:
	# Cleanup temporary files
	for f in [base_audio, output_filename]:
	if f and os.path.exists(f):
	os.remove(f)

	def process_prompt(prompt):
	"""Main processing pipeline with parallel execution"""
	logger.info(f"Starting processing for prompt: {prompt}")

	# Generate script
	script = generate_text(prompt)
	script_data = json.loads(script)

	# Prepare tasks for parallel processing
	tasks = [(idx, speaker, text)
	for idx, item in enumerate(script_data)
	for speaker, text in item.items()]

	# Process lines in parallel
	audio_files = []
	with ThreadPoolExecutor(max_workers=4) as executor: # Optimal for CPU-bound tasks
	futures = [executor.submit(process_line, task) for task in tasks]
	for future in futures:
	result = future.result()
	if result:
	audio_files.append(result)

	# Package results
	zip_filename = "output_audio_files.zip"
	with zipfile.ZipFile(zip_filename, 'w') as zipf:
	for file in audio_files:
	zipf.write(file)

	# Cleanup working files
	for file in audio_files:
	os.remove(file)

	return zip_filename

	# Gradio interface
	examples = [
	"Почему такие высокие налоги?",
	"Какие цели СВО?",
	"Когда развалится Америка?"
	]

	with gr.Blocks() as demo:
	gr.Markdown("# Kisel TV")

	with gr.Row():
	prompt_input = gr.Textbox(
	label="Input Prompt",
	placeholder="Enter your text here...",
	lines=3
	)

	generate_btn = gr.Button("Generate", variant="primary")
	output = gr.File(label="Generated Audio Files")

	gr.Examples(
	examples=examples,
	inputs=prompt_input,
	outputs=output,
	fn=process_prompt,
	cache_examples=False
	)

	generate_btn.click(
	fn=process_prompt,
	inputs=prompt_input,
	outputs=output
	)

	if __name__ == "__main__":
	demo.launch()