hivecorp commited on
Commit
42fcedf
·
verified ·
1 Parent(s): 2894da7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -8
app.py CHANGED
@@ -1,25 +1,100 @@
1
  from fastapi import FastAPI
2
  import edge_tts
3
  import asyncio
4
- from fastapi.responses import FileResponse
5
- import uvicorn
 
 
 
 
6
 
7
  app = FastAPI()
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  @app.get("/")
10
  def home():
11
  return {"message": "EdgeTTS FastAPI is running!"}
12
 
13
  @app.get("/tts")
14
  async def tts(text: str, voice: str = "en-US-AriaNeural"):
15
- output_file = "output.mp3"
 
16
 
17
- # Generate speech
18
- communicate = edge_tts.Communicate(text, voice)
19
- await communicate.save(output_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- return FileResponse(output_file, media_type="audio/mpeg", filename="speech.mp3")
22
 
23
- # Ensure the app starts when running in Hugging Face Spaces
24
  if __name__ == "__main__":
 
25
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  from fastapi import FastAPI
2
  import edge_tts
3
  import asyncio
4
+ import os
5
+ import time
6
+ import io
7
+ from fastapi.responses import StreamingResponse
8
+ from tempfile import TemporaryDirectory
9
+ from pydub import AudioSegment
10
 
11
  app = FastAPI()
12
 
13
+ def split_text(text, max_chunk_size=500):
14
+ """Split text into chunks only if it's longer than max_chunk_size."""
15
+ if len(text) <= max_chunk_size:
16
+ return [text] # No need to split if it's within the limit
17
+
18
+ sentences = text.replace('।', '.').replace('؟', '?').split('.')
19
+ chunks = []
20
+ current_chunk = []
21
+ current_length = 0
22
+
23
+ for sentence in sentences:
24
+ sentence = sentence.strip() + '.'
25
+ sentence_length = len(sentence)
26
+
27
+ if current_length + sentence_length > max_chunk_size and current_chunk:
28
+ chunks.append(' '.join(current_chunk))
29
+ current_chunk = []
30
+ current_length = 0
31
+
32
+ current_chunk.append(sentence)
33
+ current_length += sentence_length
34
+
35
+ if current_chunk:
36
+ chunks.append(' '.join(current_chunk))
37
+
38
+ return chunks
39
+
40
+ async def process_chunk(text, voice, temp_dir, chunk_index):
41
+ """Process a single chunk of text into an MP3 file."""
42
+ tmp_path = os.path.join(temp_dir, f"chunk_{chunk_index}_{int(time.time())}_{os.urandom(4).hex()}.mp3")
43
+ communicate = edge_tts.Communicate(text, voice)
44
+ await communicate.save(tmp_path)
45
+ return tmp_path
46
+
47
+ async def combine_audio_files(chunk_files):
48
+ """Combine multiple MP3 files into one final MP3 file."""
49
+ combined = AudioSegment.empty()
50
+
51
+ for file in chunk_files:
52
+ audio_segment = AudioSegment.from_mp3(file)
53
+ combined += audio_segment
54
+
55
+ output = io.BytesIO()
56
+ combined.export(output, format="mp3")
57
+ output.seek(0)
58
+
59
+ # Clean up temp files
60
+ for file in chunk_files:
61
+ try:
62
+ os.remove(file)
63
+ except:
64
+ pass
65
+
66
+ return output
67
+
68
  @app.get("/")
69
  def home():
70
  return {"message": "EdgeTTS FastAPI is running!"}
71
 
72
  @app.get("/tts")
73
  async def tts(text: str, voice: str = "en-US-AriaNeural"):
74
+ if not text.strip():
75
+ return {"error": "Text cannot be empty."}
76
 
77
+ text_chunks = split_text(text) # Split only if necessary
78
+
79
+ if len(text_chunks) == 1:
80
+ # Process the entire text as a single request if it's within limit
81
+ output_audio = io.BytesIO()
82
+ communicate = edge_tts.Communicate(text_chunks[0], voice)
83
+ await communicate.save(output_audio)
84
+ output_audio.seek(0)
85
+
86
+ return StreamingResponse(output_audio, media_type="audio/mpeg", headers={"Content-Disposition": "attachment; filename=speech.mp3"})
87
+
88
+ # If text is split into chunks, process them individually
89
+ with TemporaryDirectory() as temp_dir:
90
+ chunk_files = await asyncio.gather(*[
91
+ process_chunk(chunk, voice, temp_dir, i) for i, chunk in enumerate(text_chunks)
92
+ ])
93
+
94
+ output_audio = await combine_audio_files(chunk_files)
95
 
96
+ return StreamingResponse(output_audio, media_type="audio/mpeg", headers={"Content-Disposition": "attachment; filename=speech.mp3"})
97
 
 
98
  if __name__ == "__main__":
99
+ import uvicorn
100
  uvicorn.run(app, host="0.0.0.0", port=7860)