Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -145,8 +145,8 @@ def response_gen(user_review):
|
|
| 145 |
inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
|
| 146 |
outputs = model.generate(
|
| 147 |
**inputs,
|
| 148 |
-
max_new_tokens=
|
| 149 |
-
min_length=
|
| 150 |
no_repeat_ngram_size=2, # Avoid repetitive phrases
|
| 151 |
temperature=0.7 # Add randomness for more natural responses
|
| 152 |
)
|
|
@@ -168,9 +168,19 @@ def sound_gen(response):
|
|
| 168 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
|
| 169 |
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default embedding
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
with torch.no_grad():
|
| 176 |
speech = vocoder(spectrogram) # Convert spectrogram to waveform
|
|
|
|
| 145 |
inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
|
| 146 |
outputs = model.generate(
|
| 147 |
**inputs,
|
| 148 |
+
max_new_tokens=200,
|
| 149 |
+
min_length=50, # Ensure concise and complete responses
|
| 150 |
no_repeat_ngram_size=2, # Avoid repetitive phrases
|
| 151 |
temperature=0.7 # Add randomness for more natural responses
|
| 152 |
)
|
|
|
|
| 168 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
|
| 169 |
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default embedding
|
| 170 |
|
| 171 |
+
# Ensure the response is not too long for the model's capacity
|
| 172 |
+
max_tokens = 300 # Limit the input text tokens to a maximum of 300
|
| 173 |
+
truncated_response = response[:max_tokens] # Truncate the response to fit within the limit
|
| 174 |
+
|
| 175 |
+
# Process the truncated text for spectrogram generation
|
| 176 |
+
inputs = processor(text=truncated_response, return_tensors="pt")# Process text for spectrogram generation
|
| 177 |
+
inputs_embeds_size = inputs["input_ids"].size(1)
|
| 178 |
+
|
| 179 |
+
# Ensure tensor dimensions align between input IDs and speaker embeddings
|
| 180 |
+
speaker_embeddings = speaker_embeddings[:, :inputs_embeds_size] # Match dimensions with input IDs
|
| 181 |
+
|
| 182 |
+
# Generate the spectrogram using the SpeechT5 model
|
| 183 |
+
spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings) # Generate the spectrogram
|
| 184 |
|
| 185 |
with torch.no_grad():
|
| 186 |
speech = vocoder(spectrogram) # Convert spectrogram to waveform
|