Spaces:

jatinsabari
/

echo

Sleeping

App Files Files Community

jatinsabari commited on Oct 26, 2025

Commit

294fc5b

verified ·

1 Parent(s): 62ccb65

Create app.py

Browse files

Files changed (1) hide show

app.py +291 -0

app.py ADDED Viewed

	@@ -0,0 +1,291 @@

+import gradio as gr
+import librosa
+import numpy as np
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import os
+from huggingface_hub import login
+import tempfile
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+# === CONFIGURATION ===
+HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
+MODEL_NAME = "google/gemma-2b-it"
+# Login to Hugging Face
+try:
+    if HF_TOKEN and HF_TOKEN != "your_hf_token_here":
+        login(token=HF_TOKEN)
+        print("✅ Authenticated with Hugging Face Hub")
+    else:
+        print("⚠️  No HF_TOKEN provided, using fallback method")
+except Exception as e:
+    print(f"⚠️  Authentication warning: {e}")
+class GemmaAudioEmotionAnalyzer:
+    def __init__(self, model_name: str = MODEL_NAME):
+        self.model_name = model_name
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"🚀 Using device: {self.device}")
+        try:
+            print("📥 Loading Gemma tokenizer...")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_name,
+                token=HF_TOKEN if HF_TOKEN != "your_hf_token_here" else None,
+                trust_remote_code=True
+            )
+            print("📥 Loading Gemma model...")
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                token=HF_TOKEN if HF_TOKEN != "your_hf_token_here" else None,
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+                device_map="auto" if self.device == "cuda" else None,
+                trust_remote_code=True
+            )
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            print("✅ Gemma model loaded successfully!")
+        except Exception as e:
+            print(f"❌ Failed to load Gemma: {e}")
+            print("🔧 Using fallback rule-based analyzer")
+            self.model = None
+            self.tokenizer = None
+    def extract_fast_features(self, audio_path: str) -> dict:
+        """Extract minimal features quickly"""
+        try:
+            y, sr = librosa.load(audio_path, sr=16000, duration=3)
+            features = {
+                'energy': float(np.mean(librosa.feature.rms(y=y))),
+                'brightness': float(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))),
+                'pitch': float(np.median(librosa.piptrack(y=y, sr=sr)[0][librosa.piptrack(y=y, sr=sr)[0] > 0]) or 150),
+                'tempo': float(librosa.beat.tempo(y=y, sr=sr)[0]),
+                'speech_rate': float(np.mean(librosa.feature.zero_crossing_rate(y)))
+            }
+            return features
+        except Exception as e:
+            print(f"❌ Feature extraction error: {e}")
+            return {'energy': 0.05, 'brightness': 1500, 'pitch': 200, 'tempo': 100, 'speech_rate': 0.1}
+    def create_gemma_prompt(self, features: dict) -> str:
+        """Create optimized prompt for Gemma"""
+        prompt = f"""Analyze the emotional content from these audio features:
+Audio Characteristics:
+- Energy Level: {"High" if features['energy'] > 0.08 else "Low" if features['energy'] < 0.03 else "Medium"}
+- Brightness: {"Bright" if features['brightness'] > 2000 else "Dark" if features['brightness'] < 1000 else "Neutral"}
+- Average Pitch: {"High" if features['pitch'] > 250 else "Low" if features['pitch'] < 150 else "Medium"}
+- Tempo: {"Fast" if features['tempo'] > 140 else "Slow" if features['tempo'] < 90 else "Moderate"}
+- Speech Rate: {"Rapid" if features['speech_rate'] > 0.15 else "Slow" if features['speech_rate'] < 0.08 else "Normal"}
+Based on these acoustic properties, identify the primary emotion. Choose ONE from: happy, sad, angry, fearful, neutral, excited, calm.
+Respond in this exact format:
+Emotion: [emotion]
+Confidence: [high/medium/low]
+Reason: [brief reason based on features]
+Analysis:"""
+        return prompt
+    def generate_with_gemma(self, prompt: str) -> str:
+        """Generate response using Gemma with optimized settings"""
+        if self.model is None:
+            return "Emotion: neutral\nConfidence: medium\nReason: Using fallback analysis"
+        try:
+            inputs = self.tokenizer(
+                prompt,
+                return_tensors="pt",
+                max_length=512,
+                truncation=True
+            ).to(self.device)
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    max_new_tokens=100,
+                    temperature=0.7,
+                    do_sample=True,
+                    top_p=0.9,
+                    pad_token_id=self.tokenizer.eos_token_id,
+                    repetition_penalty=1.1
+                )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            return response[len(prompt):].strip()
+        except Exception as e:
+            print(f"❌ Gemma generation error: {e}")
+            return "Emotion: neutral\nConfidence: low\nReason: Analysis unavailable"
+    def parse_gemma_response(self, response: str) -> dict:
+        """Parse Gemma's response"""
+        lines = response.split('\n')
+        result = {
+            'emotion': 'neutral',
+            'confidence': 'medium',
+            'reason': 'No analysis provided',
+            'raw_response': response
+        }
+        for line in lines:
+            line = line.strip()
+            if line.startswith('Emotion:'):
+                result['emotion'] = line.split(':', 1)[1].strip().lower()
+            elif line.startswith('Confidence:'):
+                result['confidence'] = line.split(':', 1)[1].strip().lower()
+            elif line.startswith('Reason:'):
+                result['reason'] = line.split(':', 1)[1].strip()
+        return result
+    def analyze_emotion(self, audio_path: str) -> dict:
+        """Main analysis function"""
+        print(f"🎵 Analyzing: {os.path.basename(audio_path)}")
+        features = self.extract_fast_features(audio_path)
+        prompt = self.create_gemma_prompt(features)
+        print("🤖 Querying Gemma...")
+        gemma_response = self.generate_with_gemma(prompt)
+        result = self.parse_gemma_response(gemma_response)
+        result['features'] = features
+        print(f"✅ Gemma result: {result['emotion']}")
+        return result
+# Initialize analyzer
+print("🔄 Initializing Gemma Audio Analyzer...")
+analyzer = GemmaAudioEmotionAnalyzer()
+def process_audio(audio_path: str) -> str:
+    """Gradio interface function"""
+    if not audio_path:
+        return "❌ Please provide an audio file"
+    try:
+        result = analyzer.analyze_emotion(audio_path)
+        emotion_icons = {
+            'happy': '😊', 'sad': '😢', 'angry': '😠',
+            'fearful': '😨', 'neutral': '😐', 'excited': '🤩', 'calm': '😌'
+        }
+        icon = emotion_icons.get(result['emotion'], '🎭')
+        output = f"""
+{icon} **Emotion**: {result['emotion'].title()}
+📊 **Confidence**: {result['confidence'].title()}
+💭 **Reason**: {result['reason']}
+🔬 **Audio Analysis**:
+• Energy: {result['features']['energy']:.3f}
+• Brightness: {result['features']['brightness']:.0f} Hz
+• Pitch: {result['features']['pitch']:.0f} Hz
+• Tempo: {result['features']['tempo']:.0f} BPM
+🤖 **Powered by Google Gemma**
+"""
+        return output
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# ============ NEW: FastAPI Integration ============
+app = FastAPI(title="Echo Emotion Detection API")
+# Enable CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/")
+async def root():
+    """API Info"""
+    return {
+        "service": "Echo Emotion Detection API",
+        "status": "online",
+        "version": "1.0.0",
+        "endpoints": {
+            "analyze": "POST /api/analyze",
+            "health": "GET /health"
+        }
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "model_loaded": analyzer.model is not None
+    }
+@app.post("/api/analyze")
+async def api_analyze(audio: UploadFile = File(...)):
+    """
+    API endpoint for emotion detection
+    Example usage:
+    curl -X POST "https://your-space.hf.space/api/analyze" \
+         -F "audio=@voice.mp3"
+    """
+    try:
+        # Save uploaded file temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio.filename)[1]) as tmp_file:
+            content = await audio.read()
+            tmp_file.write(content)
+            tmp_path = tmp_file.name
+        # Analyze emotion using your existing analyzer
+        result = analyzer.analyze_emotion(tmp_path)
+        # Clean up temp file
+        os.unlink(tmp_path)
+        # Return structured JSON response
+        return {
+            "success": True,
+            "emotion": result['emotion'],
+            "confidence": result['confidence'],
+            "reason": result['reason'],
+            "features": result['features']
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing audio: {str(e)}")
+# Create Gradio interface
+demo = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(
+        sources=["upload"],
+        type="filepath",
+        label="Upload Audio File",
+        max_length=10
+    ),
+    outputs=gr.Markdown(label="Gemma Emotion Analysis"),
+    title="🎵 Audio Emotion Analysis with Google Gemma",
+    description="Upload audio to analyze emotions using Google's Gemma model",
+    examples=[],
+    allow_flagging="never"
+)
+# Mount Gradio to FastAPI at root path
+app = gr.mount_gradio_app(app, demo, path="/")
+if __name__ == "__main__":
+    print("🚀 Starting Echo API Server...")
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)