WRX020510 commited on
Commit
2ba3da6
·
verified ·
1 Parent(s): 5f79f2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -18
app.py CHANGED
@@ -1,8 +1,12 @@
1
  #Import part
2
  from transformers import pipeline
3
  import streamlit as st
4
- from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
5
  import torch
 
 
 
 
 
6
 
7
  # Use function for the implementation
8
 
@@ -31,31 +35,35 @@ def text2story(text):
31
  # text2audio
32
  def text2audio(story_text):
33
 
34
- # tts_pipeline = pipeline("text-to-speech", model="suno/bark-small")
35
 
36
- # audio_data = tts_pipeline(story_text)
37
 
38
- # audio_buffer = io.BytesIO()
39
- # wavfile.write(audio_buffer, rate=audio_data["sampling_rate"], data=audio_data["audio"])
40
- # audio_buffer.seek(0)
41
 
42
- # return {
43
- # 'audio': audio_buffer.getvalue(),
44
- # 'sampling_rate': audio_data["sampling_rate"]
45
- # }
46
 
47
 
48
- inputs = processor(text=story_text, return_tensors="pt")
49
- with torch.no_grad():
50
- speech = model.generate(**inputs)
51
 
52
- audio_data = speech.cpu().numpy().squeeze()
 
53
 
54
- audio_buffer = io.BytesIO()
55
- wavfile.write(audio_buffer, rate=16000, data=audio_data) # 16kHz 采样率
56
- audio_buffer.seek(0)
 
 
 
 
 
 
57
 
58
- return {'audio': audio_buffer.getvalue(), 'sampling_rate': 16000}
59
 
60
 
61
  # program main part
 
1
  #Import part
2
  from transformers import pipeline
3
  import streamlit as st
 
4
  import torch
5
+ import io
6
+ import scipy.io.wavfile as wavfile
7
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
8
+
9
+
10
 
11
  # Use function for the implementation
12
 
 
35
  # text2audio
36
  def text2audio(story_text):
37
 
38
+ tts_pipeline = pipeline("text-to-speech", model="suno/bark-small")
39
 
40
+ audio_data = tts_pipeline(story_text)
41
 
42
+ audio_buffer = io.BytesIO()
43
+ wavfile.write(audio_buffer, rate=audio_data["sampling_rate"], data=audio_data["audio"])
44
+ audio_buffer.seek(0)
45
 
46
+ return {
47
+ 'audio': audio_buffer.getvalue(),
48
+ 'sampling_rate': audio_data["sampling_rate"]
49
+ }
50
 
51
 
 
 
 
52
 
53
+ # processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
54
+ # model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
55
 
56
+ # inputs = processor(text=story_text, return_tensors="pt")
57
+ # with torch.no_grad():
58
+ # speech = model.generate(**inputs)
59
+
60
+ # audio_data = speech.cpu().numpy().squeeze()
61
+
62
+ # audio_buffer = io.BytesIO()
63
+ # wavfile.write(audio_buffer, rate=16000, data=audio_data) # 16kHz 采样率
64
+ # audio_buffer.seek(0)
65
 
66
+ # return {'audio': audio_buffer.getvalue(), 'sampling_rate': 16000}
67
 
68
 
69
  # program main part