Spaces:

szili2011
/

FNaF-Audio-Generation

Runtime error

szili2011 commited on Sep 24, 2024

Commit

26107f3

verified ·

1 Parent(s): 38b530f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ def preprocess_text(text):
     """
     Process the input text to prepare it for the model.
     This could include tokenization, phoneme extraction, etc.
     """
     d = cmudict.dict()
     words = text.lower().split()
@@ -30,10 +31,17 @@ def preprocess_text(text):
     # Flatten the list of phonemes
     flattened_phonemes = [p for sublist in phonemes for p in sublist]
-    # Convert phonemes to numeric format for the model (customize this based on your model's input requirements)
-    numeric_input = np.array([hash(p) % 1000 for p in flattened_phonemes])
-    return numeric_input
 # Define function to generate sound
 def generate_sfx(text):
@@ -43,9 +51,6 @@ def generate_sfx(text):
     """
     input_data = preprocess_text(text)
-    # Add batch dimension
-    input_data = np.expand_dims(input_data, axis=0)
     # Generate prediction
     prediction = model.predict(input_data)

     """
     Process the input text to prepare it for the model.
     This could include tokenization, phoneme extraction, etc.
+    The model expects input of shape (batch_size, sequence_length, 13).
     """
     d = cmudict.dict()
     words = text.lower().split()
     # Flatten the list of phonemes
     flattened_phonemes = [p for sublist in phonemes for p in sublist]
+    # Create dummy 13-feature vectors for each phoneme (you need to implement your own feature extraction)
+    # Here we create a placeholder with 13 features for each phoneme.
+    num_features = 13
+    sequence_length = len(flattened_phonemes)
+    input_data = np.random.rand(sequence_length, num_features)  # Placeholder, replace with actual feature extraction
+    # Add batch dimension
+    input_data = np.expand_dims(input_data, axis=0)  # Shape (1, sequence_length, 13)
+    return input_data
 # Define function to generate sound
 def generate_sfx(text):
     """
     input_data = preprocess_text(text)
     # Generate prediction
     prediction = model.predict(input_data)