Spaces:
Sleeping
Sleeping
File size: 9,280 Bytes
292a85f 5faa186 292a85f 5faa186 292a85f 5faa186 292a85f 5faa186 292a85f 5faa186 292a85f 5faa186 292a85f 5faa186 292a85f 5faa186 292a85f 5faa186 292a85f 5faa186 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
import gradio as gr
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
import librosa
from gtts import gTTS
import numpy as np
import tempfile
import os
# Device configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")
class EmotionAwareTranscriber:
def __init__(self, model_size="base"):
print("Initializing models...")
# Initialize Whisper
self.processor = WhisperProcessor.from_pretrained(f"openai/whisper-{model_size}")
self.model = WhisperForConditionalGeneration.from_pretrained(f"openai/whisper-{model_size}").to(DEVICE)
# Initialize emotion classifier
self.emotion_classifier = pipeline(
"text-classification",
model="j-hartmann/emotion-english-distilroberta-base",
top_k=1
)
# Response templates
self.response_templates = {
'happy': {
'motivational': ["Your joy is absolutely contagious! This kind of positive energy is what makes life worth living..."],
'calm': ["I can feel the warmth of your happiness radiating through your words..."],
'energetic': ["OH MY GOODNESS THIS IS INCREDIBLE NEWS!!! LET'S CELEBRATE!!!"],
'angry': ["How can you be happy when there's so much suffering in the world?"]
},
'sad': {
'motivational': ["I hear the sadness in your voice, and I want you to know that this feeling won't last forever..."],
'calm': ["I sense your heavy heart, and I'm here with you in this moment..."],
'energetic': ["HEY! I know you're feeling DOWN right now, but LISTEN UP! YOU'VE GOT THIS!!"],
'angry': ["Stop wallowing and do something productive!"]
},
'angry': {
'motivational': ["That passion can fuel positive change! Let's channel this energy constructively..."],
'calm': ["I sense your anger. Let's take a deep breath and find solutions..."],
'energetic': ["YEAH! I FEEL THAT TOO! NOW LET'S DO SOMETHING ABOUT IT!!"],
'angry': ["You think YOU'RE angry? The whole system is broken!"]
},
'disgust': {
'motivational': ["I can tell you're feeling repulsed. Sometimes disgust protects us from harmful things..."],
'calm': ["I sense your strong distaste. Let's remove ourselves from this situation..."],
'energetic': ["EW EW EW!!! GROSS ALERT! LET'S GET AWAY FROM THAT RIGHT NOW!!"],
'angry': ["This is ABSOLUTELY DISGUSTING and UNACCEPTABLE!"]
},
'fear': {
'motivational': ["It's understandable to feel scared. Remember you've survived tough times before..."],
'calm': ["Fear is a natural response. Let's assess the situation calmly..."],
'energetic': ["DANGER ALERT! BUT WAIT - LET'S MAKE A SAFETY PLAN!!"],
'angry': ["Stop being such a coward!"]
},
'neutral': {
'motivational': ["I appreciate you sharing this with me. Every day brings new opportunities..."],
'calm': ["Thank you for expressing yourself. I'm here to listen..."],
'energetic': ["LET'S FIND SOMETHING EXCITING TO DO RIGHT NOW!!"],
'angry': ["Is that all? How utterly boring."]
},
'surprise': {
'motivational': ["Unexpected moments can be life's greatest gifts!..."],
'calm': ["I sense your surprise. Let's observe what unfolds..."],
'energetic': ["WHOA! NO WAY! THAT'S INCREDIBLE!!!"],
'angry': ["Why are you surprised? You should have seen this coming!"]
},
'tired': {
'motivational': ["Rest is revolutionary. Recharge and return stronger..."],
'calm': ["Fatigue is natural. Honor your need for rest..."],
'energetic': ["DON'T QUIT! PUSH THROUGH! YOU'RE ALMOST THERE!!"],
'angry': ["Tired? That's pathetic! Winners never rest!"]
}
}
def detect_emotion(self, text):
try:
result = self.emotion_classifier(text)[0][0]
emotion = result['label'].lower()
# Manual checks
disgust_keywords = ['disgusting', 'gross', 'revolting']
if any(kw in text.lower() for kw in disgust_keywords):
return 'disgust'
tired_keywords = ['exhausted', 'tired', 'sleepy']
if any(kw in text.lower() for kw in tired_keywords):
return 'tired'
return emotion
except Exception as e:
print(f"Emotion detection error: {e}")
return 'neutral'
def generate_response(self, text, emotion, style):
try:
if emotion not in self.response_templates:
emotion = 'neutral'
if style not in self.response_templates[emotion]:
style = 'motivational'
return np.random.choice(self.response_templates[emotion][style])
except Exception as e:
print(f"Response generation error: {e}")
return "I appreciate you sharing this with me."
def text_to_speech(self, text, style="motivational"):
try:
voice_params = {
'motivational': {'lang': 'en', 'slow': False},
'calm': {'lang': 'en', 'slow': True},
'energetic': {'lang': 'en-uk', 'slow': False},
'angry': {'lang': 'en-au', 'slow': False}
}.get(style, {'lang': 'en'})
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
tts = gTTS(text=text, **voice_params)
tts.save(fp.name)
return fp.name
except Exception as e:
print(f"TTS error: {e}")
return None
def process_audio(self, audio_path, style):
try:
# Transcribe
waveform, _ = librosa.load(audio_path, sr=16000)
input_features = self.processor(waveform, sampling_rate=16000, return_tensors="pt").input_features.to(DEVICE)
predicted_ids = self.model.generate(input_features, max_length=200)
transcription = self.processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# Detect emotion
emotion = self.detect_emotion(transcription)
# Generate response
response = self.generate_response(transcription, emotion, style)
# Convert to speech
audio_output = self.text_to_speech(response, style)
return {
"transcription": transcription,
"emotion": emotion,
"response": response,
"audio": audio_output
}
except Exception as e:
print(f"Processing error: {e}")
return {
"transcription": "Error processing audio",
"emotion": "neutral",
"response": "Sorry, something went wrong",
"audio": None
}
# Initialize the transcriber first
transcriber = EmotionAwareTranscriber()
# Define a global variable to store the last audio file path
last_audio_file = None
# Define the process_audio_wrapper function AFTER initializing the variable
def process_audio_wrapper(audio_path, style):
global last_audio_file
result = transcriber.process_audio(audio_path, style)
# Clean up previous audio files
if last_audio_file and os.path.exists(last_audio_file):
try:
os.unlink(last_audio_file)
except Exception as e:
print(f"Error cleaning up audio file: {e}")
last_audio_file = result["audio"]
return (
result["transcription"],
result["emotion"].upper(),
result["response"],
result["audio"] if result["audio"] else None
)
# Gradio interface
with gr.Blocks(title="Emotion-Aware Audio Transcriber") as demo:
gr.Markdown("# 🎤 Emotion-Aware Audio Transcriber")
gr.Markdown("Upload an audio file to get a transcription with emotional analysis and response")
with gr.Row():
audio_input = gr.Audio(label="Upload Audio", type="filepath")
style_selector = gr.Radio(
["motivational", "calm", "energetic", "angry"],
label="Response Style",
value="motivational"
)
submit_btn = gr.Button("Process", variant="primary")
with gr.Column():
transcription_output = gr.Textbox(label="Transcription")
emotion_output = gr.Textbox(label="Detected Emotion")
response_output = gr.Textbox(label="Generated Response")
audio_output = gr.Audio(label="Spoken Response")
submit_btn.click(
fn=process_audio_wrapper,
inputs=[audio_input, style_selector],
outputs=[transcription_output, emotion_output, response_output, audio_output]
)
# Launch the app
if __name__ == "__main__":
demo.launch()
else:
# This part is crucial for HuggingFace Spaces deployment
app = demo |