Spaces:
Build error
Build error
| import gradio as gr | |
| import PyPDF2 | |
| from transformers import AutoProcessor, AutoModel | |
| import torch | |
| import numpy as np | |
| import nltk | |
| # NLTK ๋ฌธ์ฅ ๋ถ๋ฆฌ์ฉ ๋ฐ์ดํฐ ๋ค์ด๋ก๋ | |
| nltk.download('punkt') | |
| nltk.download('punkt_tab') | |
| # 1. Hugging Face Bark ๋ชจ๋ธ ๋ฐ ํ๋ก์ธ์ ๋ก๋ | |
| model_id = "suno/bark-small" | |
| processor = AutoProcessor.from_pretrained(model_id) | |
| model = AutoModel.from_pretrained(model_id) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| def extract_text_from_pdf(pdf_path): | |
| """PDF ํ์ผ์์ ํ ์คํธ๋ฅผ ์ถ์ถํ๋ ํจ์ (Gradio 5.x ํธํ)""" | |
| if pdf_path is None: | |
| return "" | |
| text = "" | |
| try: | |
| # ์ต์ Gradio๋ pdf_path์ ํ์ผ์ ์์ ์ ์ฅ ๊ฒฝ๋ก(๋ฌธ์์ด)๋ฅผ ๋ฐ๋ก ์ ๋ฌํฉ๋๋ค. | |
| reader = PyPDF2.PdfReader(pdf_path) | |
| for page in reader.pages: | |
| extracted = page.extract_text() | |
| if extracted: | |
| text += extracted + "\n" | |
| except Exception as e: | |
| return f"PDF ์ฝ๊ธฐ ์ค๋ฅ: {str(e)}" | |
| return text | |
| def synthesize_speech(text, gender): | |
| """ํ ์คํธ๋ฅผ ์์ฑ์ผ๋ก ๋ณํํ๋ ํจ์""" | |
| # ๋ถ๊ฐ๋ฆฌ์์ด(bg) ํ๋ฆฌ์ (0๋ฒ์ ์ฌ์ฑ, 1๋ฒ์ ๋จ์ฑ ํค์ ๊ฐ๊น์ต๋๋ค) | |
| voice_preset = "v2/bg_speaker_1" if gender == "๋จ์ฑ (Male)" else "v2/bg_speaker_0" | |
| sentences = nltk.sent_tokenize(text) | |
| audio_chunks = [] | |
| for sentence in sentences: | |
| if not sentence.strip(): | |
| continue | |
| inputs = processor(sentence, voice_preset=voice_preset, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| speech_output = model.generate(**inputs) | |
| audio_chunks.append(speech_output[0].cpu().numpy()) | |
| if not audio_chunks: | |
| return None | |
| final_audio = np.concatenate(audio_chunks) | |
| sample_rate = model.generation_config.sample_rate | |
| return (sample_rate, final_audio) | |
| def process_input(text, pdf_file, gender): | |
| """์ ๋ ฅ ์ฒ๋ฆฌ ๋ฉ์ธ ์ปจํธ๋กค๋ฌ""" | |
| if pdf_file is not None: | |
| extracted = extract_text_from_pdf(pdf_file) | |
| if extracted.strip(): | |
| text = extracted | |
| if not text.strip(): | |
| return "ํ ์คํธ๋ฅผ ์ง์ ์ ๋ ฅํ๊ฑฐ๋ PDF ํ์ผ์ ์ ๋ก๋ํด์ฃผ์ธ์.", None | |
| # ์๋ฒ ๊ณผ๋ถํ ๋ฐฉ์ง์ฉ ๊ธ์ ์ ์ ํ | |
| limited_text = text[:1500] | |
| audio = synthesize_speech(limited_text, gender) | |
| return limited_text, audio | |
| # 2. Gradio UI ๊ตฌ์ฑ (์ต์ Blocks ๋ฌธ๋ฒ) | |
| with gr.Blocks(theme=gr.themes.Soft()) as app: | |
| gr.Markdown("# ๐ง๐ฌ ๋ถ๊ฐ๋ฆฌ์์ด TTS ๋ฆฌ๋ (์ ๋ฌธ/PDF)") | |
| gr.Markdown("๋ถ๊ฐ๋ฆฌ์์ด ํ ์คํธ๋ PDF ๊ธฐ์ฌ๋ฅผ ์ ๋ ฅํ๋ฉด ์ง์ ํ ์ฑ๋ณ์ ์์ฑ์ผ๋ก ์ฝ์ด์ค๋๋ค.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_input = gr.File(label="PDF ํ์ผ ์ ๋ก๋ (์ ํ)", file_types=[".pdf"]) | |
| text_input = gr.Textbox(label="๋ถ๊ฐ๋ฆฌ์์ด ํ ์คํธ ์ง์ ์ ๋ ฅ", lines=8, placeholder="์ฌ๊ธฐ์ ๋ถ๊ฐ๋ฆฌ์์ด ๊ธฐ์ฌ ๋ด์ฉ์ ์ ๋ ฅํ์ธ์...") | |
| gender_input = gr.Radio(["๋จ์ฑ (Male)", "์ฌ์ฑ (Female)"], label="๋ชฉ์ |