szili2011 commited on
Commit
85d2702
·
verified ·
1 Parent(s): 09a58b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -23
app.py CHANGED
@@ -14,10 +14,6 @@ model = tf.keras.models.load_model('audio_model.h5')
14
 
15
  # Preprocess input text
16
  def preprocess_text(text):
17
- """
18
- Process the input text to prepare it for the model.
19
- This includes tokenization and phoneme extraction.
20
- """
21
  d = cmudict.dict()
22
  words = text.lower().split()
23
  phonemes = []
@@ -30,7 +26,7 @@ def preprocess_text(text):
30
 
31
  flattened_phonemes = [p for sublist in phonemes for p in sublist]
32
 
33
- # Create dummy 13-feature vectors for each phoneme (you need to implement your own feature extraction)
34
  num_features = 13
35
  sequence_length = len(flattened_phonemes)
36
  input_data = np.random.rand(sequence_length, num_features)
@@ -42,32 +38,18 @@ def preprocess_text(text):
42
 
43
  # Convert model output to an audio file
44
  def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
45
- """
46
- Convert the model output into a .wav file.
47
- """
48
- # Normalize the audio output
49
  normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
50
-
51
- # Write the audio data to a file
52
- write(filename, sample_rate, normalized_output.astype(np.float32)) # Ensure the output is of type float32
53
-
54
  return filename
55
 
56
  # Define function to generate sound effect
57
  def generate_sfx(text, duration=30):
58
- """
59
- Takes input text, preprocesses it, runs it through the model,
60
- and generates a downloadable audio file for the specified duration.
61
- """
62
  input_data = preprocess_text(text)
63
-
64
- # Generate prediction
65
  prediction = model.predict(input_data)
66
 
67
- # Generate a longer output by repeating or padding
68
- audio_data = np.tile(prediction.flatten(), (duration * 22050 // len(prediction.flatten()) + 1))[:duration * 22050]
69
 
70
- # Convert the prediction to an audio file
71
  audio_file = convert_to_audio(audio_data, filename="output.wav")
72
 
73
  return audio_file
@@ -77,7 +59,7 @@ interface = gr.Interface(
77
  fn=generate_sfx,
78
  inputs=[
79
  gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
80
- gr.Slider(minimum=2, maximum=20, default=30, label="Duration (seconds)")
81
  ],
82
  outputs=gr.Audio(label="Generated SFX", type="filepath"),
83
  title="SFX Generator from Text",
 
14
 
15
  # Preprocess input text
16
  def preprocess_text(text):
 
 
 
 
17
  d = cmudict.dict()
18
  words = text.lower().split()
19
  phonemes = []
 
26
 
27
  flattened_phonemes = [p for sublist in phonemes for p in sublist]
28
 
29
+ # Create dummy 13-feature vectors for each phoneme (implement your own feature extraction)
30
  num_features = 13
31
  sequence_length = len(flattened_phonemes)
32
  input_data = np.random.rand(sequence_length, num_features)
 
38
 
39
  # Convert model output to an audio file
40
  def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
 
 
 
 
41
  normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
42
+ write(filename, sample_rate, normalized_output.astype(np.float32))
 
 
 
43
  return filename
44
 
45
  # Define function to generate sound effect
46
  def generate_sfx(text, duration=30):
 
 
 
 
47
  input_data = preprocess_text(text)
 
 
48
  prediction = model.predict(input_data)
49
 
50
+ # Generate longer output by repeating or padding
51
+ audio_data = np.tile(prediction.flatten(), (duration * sample_rate // len(prediction.flatten()) + 1))[:duration * sample_rate]
52
 
 
53
  audio_file = convert_to_audio(audio_data, filename="output.wav")
54
 
55
  return audio_file
 
59
  fn=generate_sfx,
60
  inputs=[
61
  gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
62
+ gr.Slider(minimum=2, maximum=20, label="Duration (seconds)", value=30)
63
  ],
64
  outputs=gr.Audio(label="Generated SFX", type="filepath"),
65
  title="SFX Generator from Text",