piyazon commited on
Commit
c07a39c
·
1 Parent(s): 2427e4e
Files changed (3) hide show
  1. Dockerfile +0 -13
  2. app.py +11 -35
  3. requirements.txt +1 -2
Dockerfile CHANGED
@@ -1,9 +1,5 @@
1
  FROM python:3.10-slim
2
 
3
- # Install git as root
4
- USER root
5
- RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
6
-
7
  RUN useradd -m -u 1000 user
8
  USER user
9
  ENV PATH="/home/user/.local/bin:$PATH"
@@ -13,15 +9,6 @@ WORKDIR /app
13
  COPY --chown=user ./requirements.txt requirements.txt
14
  RUN pip install --no-cache-dir -r requirements.txt
15
 
16
- # Clone MeloTTS and install in editable mode
17
- RUN git clone https://github.com/myshell-ai/MeloTTS.git /home/user/MeloTTS
18
- WORKDIR /home/user/MeloTTS
19
- RUN pip install --no-cache-dir -e .
20
- WORKDIR /app
21
-
22
- # Download unidic dictionary
23
- RUN python -m unidic download
24
-
25
  COPY --chown=user . /app
26
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
27
 
 
1
  FROM python:3.10-slim
2
 
 
 
 
 
3
  RUN useradd -m -u 1000 user
4
  USER user
5
  ENV PATH="/home/user/.local/bin:$PATH"
 
9
  COPY --chown=user ./requirements.txt requirements.txt
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
 
 
 
 
 
 
 
 
 
12
  COPY --chown=user . /app
13
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
14
 
app.py CHANGED
@@ -31,45 +31,20 @@ curl -X POST https://piyazon-tts-piyazon.hf.space/generate-tts \
31
  -d '{"text": "Hello, world!"}' \
32
  --output output.wav
33
  """
34
- # @app.post("/generate-tts")
35
- # async def generate_tts(input: TextInput):
36
- # try:
37
- # # Tokenize input text
38
- # inputs = tokenizer(input.text, return_tensors="pt")
39
-
40
- # # Generate waveform
41
- # with torch.no_grad():
42
- # waveform = model(**inputs).waveform
43
-
44
- # # Convert waveform to audio file (WAV format)
45
- # waveform = waveform.squeeze().numpy() # Convert tensor to numpy array
46
- # buffer = io.BytesIO()
47
- # sf.write(buffer, waveform, samplerate=model.config.sampling_rate, format="WAV")
48
- # buffer.seek(0)
49
-
50
- # # Return audio as streaming response
51
- # return StreamingResponse(
52
- # buffer,
53
- # media_type="audio/wav",
54
- # headers={"Content-Disposition": 'attachment; filename="output.wav"'}
55
- # )
56
- # except Exception as e:
57
- # raise HTTPException(status_code=500, detail=f"Error generating audio: {str(e)}")
58
-
59
- from melo.api import TTS
60
-
61
- # Initialize TTS model for Chinese
62
- speed = 1.0
63
- device = 'cpu'
64
- model_melo = TTS(language='ZH', device=device)
65
- speaker_ids = model_melo.hps.data.spk2id
66
-
67
  @app.post("/generate-tts")
68
  async def generate_tts(input: TextInput):
69
  try:
70
- # Generate audio to a temporary buffer
 
 
 
 
 
 
 
 
71
  buffer = io.BytesIO()
72
- model_melo.tts_to_file(input.text, speaker_ids['ZH'], buffer, speed=speed, format='wav')
73
  buffer.seek(0)
74
 
75
  # Return audio as streaming response
@@ -81,6 +56,7 @@ async def generate_tts(input: TextInput):
81
  except Exception as e:
82
  raise HTTPException(status_code=500, detail=f"Error generating audio: {str(e)}")
83
 
 
84
 
85
  @app.get("/")
86
  def greet_json():
 
31
  -d '{"text": "Hello, world!"}' \
32
  --output output.wav
33
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  @app.post("/generate-tts")
35
  async def generate_tts(input: TextInput):
36
  try:
37
+ # Tokenize input text
38
+ inputs = tokenizer(input.text, return_tensors="pt")
39
+
40
+ # Generate waveform
41
+ with torch.no_grad():
42
+ waveform = model(**inputs).waveform
43
+
44
+ # Convert waveform to audio file (WAV format)
45
+ waveform = waveform.squeeze().numpy() # Convert tensor to numpy array
46
  buffer = io.BytesIO()
47
+ sf.write(buffer, waveform, samplerate=model.config.sampling_rate, format="WAV")
48
  buffer.seek(0)
49
 
50
  # Return audio as streaming response
 
56
  except Exception as e:
57
  raise HTTPException(status_code=500, detail=f"Error generating audio: {str(e)}")
58
 
59
+
60
 
61
  @app.get("/")
62
  def greet_json():
requirements.txt CHANGED
@@ -13,5 +13,4 @@ torchcodec
13
  flask
14
  flask-cors
15
  pydantic
16
- soundfile
17
- unidic
 
13
  flask
14
  flask-cors
15
  pydantic
16
+ soundfile