| import gradio as gr | |
| from transformers import pipeline | |
| import PyPDF2 | |
| import ebooklib | |
| from ebooklib import epub | |
| import re | |
| import tempfile | |
| import os | |
| from pydub import AudioSegment | |
| def read_pdf(file_path): | |
| with open(file_path, 'rb') as f: | |
| reader = PyPDF2.PdfReader(f) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text | |
| def read_epub(file_path): | |
| book = epub.read_epub(file_path) | |
| text = "" | |
| for doc in book.get_items_of_type(ebooklib.ITEM_DOCUMENT): | |
| text += re.sub('<[^<]+?>', '', doc.get_content().decode()) + "\n" | |
| return text | |
| def split_text_into_chunks(text, max_tokens=500): | |
| sentences = re.split(r'(?<=[.!?]) +', text) | |
| chunks, current_chunk = [], "" | |
| for sentence in sentences: | |
| if len((current_chunk + sentence).split()) > max_tokens: | |
| chunks.append(current_chunk.strip()) | |
| current_chunk = sentence + " " | |
| else: | |
| current_chunk += sentence + " " | |
| if current_chunk.strip(): | |
| chunks.append(current_chunk.strip()) | |
| return chunks | |
| def tts_orpheus(text_chunks, token): | |
| pipe = pipeline("text-to-speech", model="SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1", use_auth_token=token) | |
| combined_audio = AudioSegment.silent(duration=0) | |
| for chunk in text_chunks: | |
| output = pipe(chunk, forward_params={"speaker_id": 0}) | |
| wav_path = output["audio"] | |
| audio_seg = AudioSegment.from_file(wav_path, format="wav") | |
| combined_audio += audio_seg | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
| combined_audio.export(tmp_file.name, format="mp3") | |
| return tmp_file.name | |
| def process_file(file, token, max_tokens): | |
| ext = os.path.splitext(file.name)[-1].lower() | |
| if ext == ".pdf": | |
| text = read_pdf(file.name) | |
| elif ext == ".epub": | |
| text = read_epub(file.name) | |
| else: | |
| return "Ungültiges Dateiformat", None | |
| chunks = split_text_into_chunks(text, max_tokens=max_tokens) | |
| audio_path = tts_orpheus(chunks, token) | |
| return "Fertig!", audio_path | |
| token_input = gr.Textbox(label="Hugging Face Token", type="password") | |
| file_input = gr.File(label="EPUB oder PDF hochladen") | |
| max_tokens_input = gr.Slider(100, 500, value=500, step=50, label="Maximale Tokens pro Chunk") | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Kartoffel Orpheus TTS - EPUB/PDF zu Audio") | |
| with gr.Row(): | |
| with gr.Column(): | |
| token_box = token_input | |
| file_box = file_input | |
| token_limit_box = max_tokens_input | |
| start_btn = gr.Button("Starten") | |
| with gr.Column(): | |
| status = gr.Textbox(label="Status") | |
| audio_out = gr.Audio(label="Ergebnis MP3", type="filepath") | |
| start_btn.click(fn=process_file, inputs=[file_box, token_box, token_limit_box], outputs=[status, audio_out]) | |
| if __name__ == "__main__": | |
| demo.launch() |