Spaces:

bori0824
/

Text-To-Audio

Build error

App Files Files Community

bori0824 commited on Nov 20, 2024

Commit

a91b0e0

verified ·

1 Parent(s): b81e07f

Create app.py

Browse files

Files changed (1) hide show

app.py +95 -0

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import gradio as gr
+from TTS.api import TTS
+import fitz  # PyMuPDF for PDF text extraction
+from docx import Document
+from pydub import AudioSegment
+import tempfile
+import os
+# Initialize TTS models with accents and genders
+TTS_MODELS = {
+    "American - Male": "tts_models/en/ljspeech/tacotron2-DDC",
+    "American - Female": "tts_models/en/ljspeech/tacotron2-DDC",
+    "British - Male": "tts_models/en-gb/apopeye/tacotron2-DDC",
+    "British - Female": "tts_models/en-gb/marlowe/tacotron2-DDC",
+    "Australian - Male": "tts_models/en-au/cameron/tacotron2-DDC",
+    "Australian - Female": "tts_models/en-au/aussie/tacotron2-DDC",
+    "Canadian - Male": "tts_models/en-ca/maple/tacotron2-DDC",
+    "Canadian - Female": "tts_models/en-ca/snow/tacotron2-DDC"
+}
+def extract_text_from_file(file_path, file_extension):
+    """Extract text from a .txt, .pdf, or .docx file."""
+    if file_extension == "txt":
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return f.read()
+    elif file_extension == "pdf":
+        pdf_document = fitz.open(file_path)
+        text = ""
+        for page in pdf_document:
+            text += page.get_text()
+        return text
+    elif file_extension == "docx":
+        doc = Document(file_path)
+        return "\n".join(paragraph.text for paragraph in doc.paragraphs)
+    else:
+        raise ValueError("Unsupported file type. Please upload a .txt, .pdf, or .docx file.")
+def generate_audio(text, accent_gender, speed):
+    """Generate audio from text using selected accent, gender, and speed."""
+    model_name = TTS_MODELS[accent_gender]
+    tts = TTS(model_name=model_name)
+    temp_audio_path = os.path.join(tempfile.gettempdir(), "output.wav")
+    tts.tts_to_file(text=text, file_path=temp_audio_path)
+    # Adjust speed
+    audio = AudioSegment.from_file(temp_audio_path)
+    audio = audio.speedup(playback_speed=speed)
+    # Save as MP3
+    output_mp3_path = os.path.join(tempfile.gettempdir(), "output.mp3")
+    audio.export(output_mp3_path, format="mp3")
+    return output_mp3_path
+def process_input(input_text, uploaded_file, accent_gender, speed):
+    """Process input (text or file) and generate audio."""
+    if not input_text and not uploaded_file:
+        return "Please provide input text or upload a file.", None
+    # Extract text from file if uploaded
+    if uploaded_file:
+        file_extension = uploaded_file.name.split('.')[-1].lower()
+        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+            temp_file.write(uploaded_file.read())
+            temp_file_path = temp_file.name
+        text = extract_text_from_file(temp_file_path, file_extension)
+        os.remove(temp_file_path)
+    else:
+        text = input_text
+    # Generate audio
+    try:
+        mp3_path = generate_audio(text, accent_gender, float(speed))
+        return "Audio generated successfully!", mp3_path
+    except Exception as e:
+        return f"Error: {str(e)}", None
+# Gradio interface
+interface = gr.Interface(
+    fn=process_input,
+    inputs=[
+        gr.Textbox(label="Enter Text", placeholder="Type or paste text here...", lines=5),
+        gr.File(label="Upload File (.txt, .pdf, .docx)", file_types=[".txt", ".pdf", ".docx"]),
+        gr.Dropdown(label="Accent & Gender", choices=list(TTS_MODELS.keys()), value="American - Male"),
+        gr.Slider(label="Speed (e.g., 1.0 = Normal, 0.75 = Slower, 1.25 = Faster)", minimum=0.5, maximum=2.0, value=1.0, step=0.1),
+    ],
+    outputs=[
+        gr.Textbox(label="Result"),
+        gr.Audio(label="Generated Audio"),
+    ],
+    title="Text-to-Speech (TTS) Application",
+    description="Upload a text, PDF, or Word file or enter text directly. Customize accent, gender, and speed. Download the generated audio as MP3."
+)
+# Launch the app
+interface.launch()