Spaces:

englissi
/

bgtts

Build error

App Files Files Community

bgtts / app.py

englissi

Update app.py

9ed0b14 verified 2 months ago

raw

history blame contribute delete

3.16 kB

	import gradio as gr
	import PyPDF2
	from transformers import AutoProcessor, AutoModel
	import torch
	import numpy as np
	import nltk

	# NLTK 문장 분리용 데이터 다운로드
	nltk.download('punkt')
	nltk.download('punkt_tab')

	# 1. Hugging Face Bark 모델 및 프로세서 로드
	model_id = "suno/bark-small"
	processor = AutoProcessor.from_pretrained(model_id)
	model = AutoModel.from_pretrained(model_id)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)

	def extract_text_from_pdf(pdf_path):
	"""PDF 파일에서 텍스트를 추출하는 함수 (Gradio 5.x 호환)"""
	if pdf_path is None:
	return ""
	text = ""
	try:
	# 최신 Gradio는 pdf_path에 파일의 임시 저장 경로(문자열)를 바로 전달합니다.
	reader = PyPDF2.PdfReader(pdf_path)
	for page in reader.pages:
	extracted = page.extract_text()
	if extracted:
	text += extracted + "\n"
	except Exception as e:
	return f"PDF 읽기 오류: {str(e)}"
	return text

	def synthesize_speech(text, gender):
	"""텍스트를 음성으로 변환하는 함수"""
	# 불가리아어(bg) 프리셋 (0번은 여성, 1번은 남성 톤에 가깝습니다)
	voice_preset = "v2/bg_speaker_1" if gender == "남성 (Male)" else "v2/bg_speaker_0"

	sentences = nltk.sent_tokenize(text)
	audio_chunks = []

	for sentence in sentences:
	if not sentence.strip():
	continue

	inputs = processor(sentence, voice_preset=voice_preset, return_tensors="pt").to(device)
	with torch.no_grad():
	speech_output = model.generate(**inputs)
	audio_chunks.append(speech_output[0].cpu().numpy())

	if not audio_chunks:
	return None

	final_audio = np.concatenate(audio_chunks)
	sample_rate = model.generation_config.sample_rate
	return (sample_rate, final_audio)

	def process_input(text, pdf_file, gender):
	"""입력 처리 메인 컨트롤러"""
	if pdf_file is not None:
	extracted = extract_text_from_pdf(pdf_file)
	if extracted.strip():
	text = extracted

	if not text.strip():
	return "텍스트를 직접 입력하거나 PDF 파일을 업로드해주세요.", None

	# 서버 과부하 방지용 글자 수 제한
	limited_text = text[:1500]

	audio = synthesize_speech(limited_text, gender)
	return limited_text, audio

	# 2. Gradio UI 구성 (최신 Blocks 문법)
	with gr.Blocks(theme=gr.themes.Soft()) as app:
	gr.Markdown("# 🇧🇬 불가리아어 TTS 리더 (신문/PDF)")
	gr.Markdown("불가리아어 텍스트나 PDF 기사를 입력하면 지정한 성별의 음성으로 읽어줍니다.")

	with gr.Row():
	with gr.Column():
	pdf_input = gr.File(label="PDF 파일 업로드 (선택)", file_types=[".pdf"])
	text_input = gr.Textbox(label="불가리아어 텍스트 직접 입력", lines=8, placeholder="여기에 불가리아어 기사 내용을 입력하세요...")
	gender_input = gr.Radio(["남성 (Male)", "여성 (Female)"], label="목소