Spaces:

KavyaBansal
/

ToneRewriter

Sleeping

App Files Files Community

KavyaBansal commited on Apr 17, 2025

Commit

292a85f

verified ·

1 Parent(s): a465989

Create app.py

Browse files

Files changed (1) hide show

app.py +241 -0

app.py ADDED Viewed

	@@ -0,0 +1,241 @@

+import gradio as gr
+import torch
+from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
+import librosa
+from gtts import gTTS
+import numpy as np
+import tempfile
+import os
+# Device configuration
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {DEVICE}")
+class EmotionAwareTranscriber:
+    def __init__(self, model_size="base"):  # Fixed method name from _init_ to __init__
+        print("Initializing models...")
+        # Initialize Whisper
+        self.processor = WhisperProcessor.from_pretrained(f"openai/whisper-{model_size}")
+        self.model = WhisperForConditionalGeneration.from_pretrained(f"openai/whisper-{model_size}").to(DEVICE)
+        # Initialize emotion classifier
+        self.emotion_classifier = pipeline(
+            "text-classification",
+            model="j-hartmann/emotion-english-distilroberta-base",
+            top_k=1
+        )
+        # Response templates
+        self.response_templates = {
+            'happy': {
+                'motivational': ["Your joy is absolutely contagious! This kind of positive energy is what makes life worth living..."],
+                'calm': ["I can feel the warmth of your happiness radiating through your words..."],
+                'energetic': ["OH MY GOODNESS THIS IS INCREDIBLE NEWS!!! LET'S CELEBRATE!!!"],
+                'angry': ["How can you be happy when there's so much suffering in the world?"]
+            },
+            'sad': {
+                'motivational': ["I hear the sadness in your voice, and I want you to know that this feeling won't last forever..."],
+                'calm': ["I sense your heavy heart, and I'm here with you in this moment..."],
+                'energetic': ["HEY! I know you're feeling DOWN right now, but LISTEN UP! YOU'VE GOT THIS!!"],
+                'angry': ["Stop wallowing and do something productive!"]
+            },
+            'angry': {
+                'motivational': ["That passion can fuel positive change! Let's channel this energy constructively..."],
+                'calm': ["I sense your anger. Let's take a deep breath and find solutions..."],
+                'energetic': ["YEAH! I FEEL THAT TOO! NOW LET'S DO SOMETHING ABOUT IT!!"],
+                'angry': ["You think YOU'RE angry? The whole system is broken!"]
+            },
+            'disgust': {
+                'motivational': ["I can tell you're feeling repulsed. Sometimes disgust protects us from harmful things..."],
+                'calm': ["I sense your strong distaste. Let's remove ourselves from this situation..."],
+                'energetic': ["EW EW EW!!! GROSS ALERT! LET'S GET AWAY FROM THAT RIGHT NOW!!"],
+                'angry': ["This is ABSOLUTELY DISGUSTING and UNACCEPTABLE!"]
+            },
+            'fear': {
+                'motivational': ["It's understandable to feel scared. Remember you've survived tough times before..."],
+                'calm': ["Fear is a natural response. Let's assess the situation calmly..."],
+                'energetic': ["DANGER ALERT! BUT WAIT - LET'S MAKE A SAFETY PLAN!!"],
+                'angry': ["Stop being such a coward!"]
+            },
+            'neutral': {
+                'motivational': ["I appreciate you sharing this with me. Every day brings new opportunities..."],
+                'calm': ["Thank you for expressing yourself. I'm here to listen..."],
+                'energetic': ["LET'S FIND SOMETHING EXCITING TO DO RIGHT NOW!!"],
+                'angry': ["Is that all? How utterly boring."]
+            },
+            'surprise': {
+                'motivational': ["Unexpected moments can be life's greatest gifts!..."],
+                'calm': ["I sense your surprise. Let's observe what unfolds..."],
+                'energetic': ["WHOA! NO WAY! THAT'S INCREDIBLE!!!"],
+                'angry': ["Why are you surprised? You should have seen this coming!"]
+            },
+            'tired': {
+                'motivational': ["Rest is revolutionary. Recharge and return stronger..."],
+                'calm': ["Fatigue is natural. Honor your need for rest..."],
+                'energetic': ["DON'T QUIT! PUSH THROUGH! YOU'RE ALMOST THERE!!"],
+                'angry': ["Tired? That's pathetic! Winners never rest!"]
+            }
+        }
+    def detect_emotion(self, text):
+        try:
+            result = self.emotion_classifier(text)[0][0]
+            emotion = result['label'].lower()
+            # Manual checks
+            disgust_keywords = ['disgusting', 'gross', 'revolting']
+            if any(kw in text.lower() for kw in disgust_keywords):
+                return 'disgust'
+            tired_keywords = ['exhausted', 'tired', 'sleepy']
+            if any(kw in text.lower() for kw in tired_keywords):
+                return 'tired'
+            return emotion
+        except Exception as e:
+            print(f"Emotion detection error: {e}")
+            return 'neutral'
+    def generate_response(self, text, emotion, style):
+        try:
+            if emotion not in self.response_templates:
+                emotion = 'neutral'
+            if style not in self.response_templates[emotion]:
+                style = 'motivational'
+            return np.random.choice(self.response_templates[emotion][style])
+        except Exception as e:
+            print(f"Response generation error: {e}")
+            return "I appreciate you sharing this with me."
+    def text_to_speech(self, text, style="motivational"):
+        try:
+            voice_params = {
+                'motivational': {'lang': 'en', 'slow': False},
+                'calm': {'lang': 'en', 'slow': True},
+                'energetic': {'lang': 'en-uk', 'slow': False},
+                'angry': {'lang': 'en-au', 'slow': False}
+            }.get(style, {'lang': 'en'})
+            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
+                tts = gTTS(text=text, **voice_params)
+                tts.save(fp.name)
+                return fp.name
+        except Exception as e:
+            print(f"TTS error: {e}")
+            return None
+    def process_audio(self, audio_path, style):
+        try:
+            # Transcribe
+            waveform, _ = librosa.load(audio_path, sr=16000)
+            input_features = self.processor(waveform, sampling_rate=16000, return_tensors="pt").input_features.to(DEVICE)
+            predicted_ids = self.model.generate(input_features, max_length=200)
+            transcription = self.processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+            # Detect emotion
+            emotion = self.detect_emotion(transcription)
+            # Generate response
+            response = self.generate_response(transcription, emotion, style)
+            # Convert to speech
+            audio_output = self.text_to_speech(response, style)
+            return {
+                "transcription": transcription,
+                "emotion": emotion,
+                "response": response,
+                "audio": audio_output
+            }
+        except Exception as e:
+            print(f"Processing error: {e}")
+            return {
+                "transcription": "Error processing audio",
+                "emotion": "neutral",
+                "response": "Sorry, something went wrong",
+                "audio": None
+            }
+# Add installation code for Google Colab
+def install_dependencies():
+    print("Installing required packages...")
+    import subprocess
+    # Install required packages
+    subprocess.run(["pip", "install", "gradio", "torch", "transformers", "librosa", "gtts", "numpy"])
+    # Check if ffmpeg is installed, and install if needed
+    try:
+        import ffmpeg
+    except ImportError:
+        print("Installing ffmpeg...")
+        subprocess.run(["apt-get", "update", "-qq"])
+        subprocess.run(["apt-get", "install", "-y", "-qq", "ffmpeg"])
+    print("Dependencies installed successfully.")
+# Initialize the transcriber
+process_audio_wrapper.last_audio = None  # Initialize the class attribute
+def process_audio_wrapper(audio_path, style):
+    result = transcriber.process_audio(audio_path, style)
+    # Clean up previous audio files
+    if hasattr(process_audio_wrapper, "last_audio") and process_audio_wrapper.last_audio:
+        try:
+            os.unlink(process_audio_wrapper.last_audio)
+        except:
+            pass
+    process_audio_wrapper.last_audio = result["audio"]
+    return (
+        result["transcription"],
+        result["emotion"].upper(),
+        result["response"],
+        result["audio"] if result["audio"] else None
+    )
+# Main execution with proper checks for Colab
+if __name__ == "__main__":  # Fixed double underscore
+    # Check if running in Colab
+    try:
+        import google.colab
+        IN_COLAB = True
+    except:
+        IN_COLAB = False
+    if IN_COLAB:
+        install_dependencies()
+    # Initialize transcriber after dependencies are installed
+    transcriber = EmotionAwareTranscriber()
+    # Gradio interface
+    with gr.Blocks(title="Emotion-Aware Audio Transcriber") as demo:
+        gr.Markdown("# 🎤 Emotion-Aware Audio Transcriber")
+        gr.Markdown("Upload an audio file to get a transcription with emotional analysis and response")
+        with gr.Row():
+            audio_input = gr.Audio(label="Upload Audio", type="filepath")
+            style_selector = gr.Radio(
+                ["motivational", "calm", "energetic", "angry"],
+                label="Response Style",
+                value="motivational"
+            )
+            submit_btn = gr.Button("Process", variant="primary")
+        with gr.Column():
+            transcription_output = gr.Textbox(label="Transcription")
+            emotion_output = gr.Textbox(label="Detected Emotion")
+            response_output = gr.Textbox(label="Generated Response")
+            audio_output = gr.Audio(label="Spoken Response")
+        submit_btn.click(
+            fn=process_audio_wrapper,
+            inputs=[audio_input, style_selector],
+            outputs=[transcription_output, emotion_output, response_output, audio_output]
+        )
+    # Launch with share=True for Colab to generate a public URL
+    demo.launch(debug=True, share=True)