Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,6 +15,7 @@ import torch # For tensor operations
|
|
| 15 |
import soundfile as sf # For saving audio as .wav files
|
| 16 |
import sentencepiece # Required by SpeechT5Processor for tokenization
|
| 17 |
|
|
|
|
| 18 |
##########################################
|
| 19 |
# Streamlit application title and input
|
| 20 |
##########################################
|
|
@@ -58,15 +59,13 @@ def analyze_dominant_emotion(user_review):
|
|
| 58 |
##########################################
|
| 59 |
# Step 2: Response Generation Function
|
| 60 |
##########################################
|
| 61 |
-
|
| 62 |
-
|
| 63 |
def response_gen(user_review):
|
| 64 |
"""
|
| 65 |
Generate a concise and logical response based on the sentiment of the user's comment.
|
| 66 |
"""
|
| 67 |
-
dominant_emotion = analyze_dominant_emotion(user_review) #
|
| 68 |
emotion_label = dominant_emotion['label'].lower() # Extract the emotion label in lowercase format
|
| 69 |
-
|
| 70 |
# Define response templates for each emotion
|
| 71 |
emotion_prompts = {
|
| 72 |
"anger": (
|
|
@@ -139,14 +138,14 @@ def response_gen(user_review):
|
|
| 139 |
) # Default to neutral if emotion is not found
|
| 140 |
|
| 141 |
# Load the tokenizer and language model for response generation
|
| 142 |
-
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") # Load tokenizer for
|
| 143 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B") # Load language model for response generation
|
| 144 |
|
| 145 |
inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
|
| 146 |
outputs = model.generate(
|
| 147 |
**inputs,
|
| 148 |
-
max_new_tokens=
|
| 149 |
-
min_length=50, # Ensure
|
| 150 |
no_repeat_ngram_size=2, # Avoid repetitive phrases
|
| 151 |
temperature=0.7 # Add randomness for more natural responses
|
| 152 |
)
|
|
@@ -168,19 +167,12 @@ def sound_gen(response):
|
|
| 168 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
|
| 169 |
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default embedding
|
| 170 |
|
| 171 |
-
#
|
| 172 |
-
max_tokens =
|
| 173 |
-
truncated_response = response[:max_tokens]
|
| 174 |
-
|
| 175 |
-
# Process the truncated text for spectrogram generation
|
| 176 |
-
inputs = processor(text=truncated_response, return_tensors="pt")# Process text for spectrogram generation
|
| 177 |
-
inputs_embeds_size = inputs["input_ids"].size(1)
|
| 178 |
-
|
| 179 |
-
# Ensure tensor dimensions align between input IDs and speaker embeddings
|
| 180 |
-
speaker_embeddings = speaker_embeddings[:, :inputs_embeds_size] # Match dimensions with input IDs
|
| 181 |
|
| 182 |
-
|
| 183 |
-
spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
|
| 184 |
|
| 185 |
with torch.no_grad():
|
| 186 |
speech = vocoder(spectrogram) # Convert spectrogram to waveform
|
|
|
|
| 15 |
import soundfile as sf # For saving audio as .wav files
|
| 16 |
import sentencepiece # Required by SpeechT5Processor for tokenization
|
| 17 |
|
| 18 |
+
|
| 19 |
##########################################
|
| 20 |
# Streamlit application title and input
|
| 21 |
##########################################
|
|
|
|
| 59 |
##########################################
|
| 60 |
# Step 2: Response Generation Function
|
| 61 |
##########################################
|
|
|
|
|
|
|
| 62 |
def response_gen(user_review):
|
| 63 |
"""
|
| 64 |
Generate a concise and logical response based on the sentiment of the user's comment.
|
| 65 |
"""
|
| 66 |
+
dominant_emotion = analyze_dominant_emotion(user_review) # Determine the dominant emotion from the user's comment
|
| 67 |
emotion_label = dominant_emotion['label'].lower() # Extract the emotion label in lowercase format
|
| 68 |
+
|
| 69 |
# Define response templates for each emotion
|
| 70 |
emotion_prompts = {
|
| 71 |
"anger": (
|
|
|
|
| 138 |
) # Default to neutral if emotion is not found
|
| 139 |
|
| 140 |
# Load the tokenizer and language model for response generation
|
| 141 |
+
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") # Load tokenizer for text processing
|
| 142 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B") # Load language model for response generation
|
| 143 |
|
| 144 |
inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
|
| 145 |
outputs = model.generate(
|
| 146 |
**inputs,
|
| 147 |
+
max_new_tokens=150, # Limit generated tokens to ensure concise responses
|
| 148 |
+
min_length=50, # Ensure the generated response is logical and complete
|
| 149 |
no_repeat_ngram_size=2, # Avoid repetitive phrases
|
| 150 |
temperature=0.7 # Add randomness for more natural responses
|
| 151 |
)
|
|
|
|
| 167 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
|
| 168 |
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default embedding
|
| 169 |
|
| 170 |
+
# Limit text tokens to match the model's capacity
|
| 171 |
+
max_tokens = 200 # Limit the input text length to avoid tensor mismatch
|
| 172 |
+
truncated_response = response[:max_tokens]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
+
inputs = processor(text=truncated_response, return_tensors="pt") # Process text for spectrogram generation
|
| 175 |
+
spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings) # Generate the spectrogram
|
| 176 |
|
| 177 |
with torch.no_grad():
|
| 178 |
speech = vocoder(spectrogram) # Convert spectrogram to waveform
|