bori0824 commited on
Commit
a91b0e0
·
verified ·
1 Parent(s): b81e07f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from TTS.api import TTS
3
+ import fitz # PyMuPDF for PDF text extraction
4
+ from docx import Document
5
+ from pydub import AudioSegment
6
+ import tempfile
7
+ import os
8
+
9
+ # Initialize TTS models with accents and genders
10
+ TTS_MODELS = {
11
+ "American - Male": "tts_models/en/ljspeech/tacotron2-DDC",
12
+ "American - Female": "tts_models/en/ljspeech/tacotron2-DDC",
13
+ "British - Male": "tts_models/en-gb/apopeye/tacotron2-DDC",
14
+ "British - Female": "tts_models/en-gb/marlowe/tacotron2-DDC",
15
+ "Australian - Male": "tts_models/en-au/cameron/tacotron2-DDC",
16
+ "Australian - Female": "tts_models/en-au/aussie/tacotron2-DDC",
17
+ "Canadian - Male": "tts_models/en-ca/maple/tacotron2-DDC",
18
+ "Canadian - Female": "tts_models/en-ca/snow/tacotron2-DDC"
19
+ }
20
+
21
+ def extract_text_from_file(file_path, file_extension):
22
+ """Extract text from a .txt, .pdf, or .docx file."""
23
+ if file_extension == "txt":
24
+ with open(file_path, 'r', encoding='utf-8') as f:
25
+ return f.read()
26
+ elif file_extension == "pdf":
27
+ pdf_document = fitz.open(file_path)
28
+ text = ""
29
+ for page in pdf_document:
30
+ text += page.get_text()
31
+ return text
32
+ elif file_extension == "docx":
33
+ doc = Document(file_path)
34
+ return "\n".join(paragraph.text for paragraph in doc.paragraphs)
35
+ else:
36
+ raise ValueError("Unsupported file type. Please upload a .txt, .pdf, or .docx file.")
37
+
38
+ def generate_audio(text, accent_gender, speed):
39
+ """Generate audio from text using selected accent, gender, and speed."""
40
+ model_name = TTS_MODELS[accent_gender]
41
+ tts = TTS(model_name=model_name)
42
+ temp_audio_path = os.path.join(tempfile.gettempdir(), "output.wav")
43
+ tts.tts_to_file(text=text, file_path=temp_audio_path)
44
+
45
+ # Adjust speed
46
+ audio = AudioSegment.from_file(temp_audio_path)
47
+ audio = audio.speedup(playback_speed=speed)
48
+
49
+ # Save as MP3
50
+ output_mp3_path = os.path.join(tempfile.gettempdir(), "output.mp3")
51
+ audio.export(output_mp3_path, format="mp3")
52
+ return output_mp3_path
53
+
54
+ def process_input(input_text, uploaded_file, accent_gender, speed):
55
+ """Process input (text or file) and generate audio."""
56
+ if not input_text and not uploaded_file:
57
+ return "Please provide input text or upload a file.", None
58
+
59
+ # Extract text from file if uploaded
60
+ if uploaded_file:
61
+ file_extension = uploaded_file.name.split('.')[-1].lower()
62
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
63
+ temp_file.write(uploaded_file.read())
64
+ temp_file_path = temp_file.name
65
+ text = extract_text_from_file(temp_file_path, file_extension)
66
+ os.remove(temp_file_path)
67
+ else:
68
+ text = input_text
69
+
70
+ # Generate audio
71
+ try:
72
+ mp3_path = generate_audio(text, accent_gender, float(speed))
73
+ return "Audio generated successfully!", mp3_path
74
+ except Exception as e:
75
+ return f"Error: {str(e)}", None
76
+
77
+ # Gradio interface
78
+ interface = gr.Interface(
79
+ fn=process_input,
80
+ inputs=[
81
+ gr.Textbox(label="Enter Text", placeholder="Type or paste text here...", lines=5),
82
+ gr.File(label="Upload File (.txt, .pdf, .docx)", file_types=[".txt", ".pdf", ".docx"]),
83
+ gr.Dropdown(label="Accent & Gender", choices=list(TTS_MODELS.keys()), value="American - Male"),
84
+ gr.Slider(label="Speed (e.g., 1.0 = Normal, 0.75 = Slower, 1.25 = Faster)", minimum=0.5, maximum=2.0, value=1.0, step=0.1),
85
+ ],
86
+ outputs=[
87
+ gr.Textbox(label="Result"),
88
+ gr.Audio(label="Generated Audio"),
89
+ ],
90
+ title="Text-to-Speech (TTS) Application",
91
+ description="Upload a text, PDF, or Word file or enter text directly. Customize accent, gender, and speed. Download the generated audio as MP3."
92
+ )
93
+
94
+ # Launch the app
95
+ interface.launch()