Spaces:

Komal133
/

Emotion_Triggered_Alarm_System

Sleeping

App Files Files Community

Komal133 commited on Jun 21, 2025

Commit

b453cec

verified ·

1 Parent(s): f76d519

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -91

app.py CHANGED Viewed

@@ -1,94 +1,116 @@
-import torch
-import librosa
 import numpy as np
-from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
-from fastapi import FastAPI, UploadFile, File
-from pydantic import BaseModel
-from datetime import datetime
-import requests
-# Initialize FastAPI application
-app = FastAPI()
-# Hugging Face Model
-model_name = "facebook/wav2vec2-large-960h"
-processor = Wav2Vec2Processor.from_pretrained(model_name)
-# Initialize model for sequence classification (you will need a custom fine-tuned model for panic/scream detection)
-model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name, num_labels=2)  # Assuming 2 classes: 'not panic' and 'panic'
-# Salesforce API Configuration (You should configure Salesforce API integration here)
-SF_URL = "https://your-salesforce-instance.com"
-SF_API_KEY = "your-api-key"  # Placeholder, use secure methods for handling API keys
-# Helper functions
-def process_audio(file_path):
-    """
-    Function to process and predict the class of the audio file.
-    Fine-tuning may be required for accurate panic/scream detection.
-    """
-    # Load audio file using librosa (resample to 16 kHz)
-    audio_input, _ = librosa.load(file_path, sr=16000)
-    # Pre-process audio using Hugging Face's Wav2Vec2Processor
-    inputs = processor(audio_input, return_tensors="pt", padding=True)
-    # Predict emotions (this requires fine-tuning for scream/panic detection)
-    with torch.no_grad():
-        logits = model(**inputs).logits
-    # Get predicted label (0: no panic, 1: panic/scream)
-    predicted_class = torch.argmax(logits, dim=-1).item()
-    return predicted_class
-def send_salesforce_alert(alert_data):
-    """
-    Sends alert data to the Salesforce system (via REST API)
-    """
-    headers = {'Authorization': f'Bearer {SF_API_KEY}'}
-    response = requests.post(f"{SF_URL}/alerts", json=alert_data, headers=headers)
-    return response.json()
-def log_alert_to_salesforce(audio_metadata, alert_type):
-    """
-    Logs the detected alert in Salesforce with the metadata and alert type.
-    """
-    alert_data = {
-        "timestamp": datetime.now().isoformat(),
-        "audio_metadata": audio_metadata,
-        "alert_type": alert_type
-    }
-    return send_salesforce_alert(alert_data)
-# API Endpoint for uploading audio file and processing it
-@app.post("/upload-audio/")
-async def upload_audio(file: UploadFile = File(...)):
-    """
-    Handles audio file upload, processes the audio for panic detection,
-    and triggers an alert if necessary.
-    """
-    # Save the uploaded file temporarily
-    file_location = f"./temp_audio/{file.filename}"
-    with open(file_location, "wb") as audio_file:
-        audio_file.write(file.file.read())
-    # Process audio to detect panic/scream
-    detection_result = process_audio(file_location)
-    # Set alert type based on confidence
-    alert_type = "High-Risk" if detection_result == 1 else "Medium-Risk"
-    # Log detection and send alert to Salesforce
-    audio_metadata = {"filename": file.filename, "file_size": len(file.file.read())}
-    log_alert_to_salesforce(audio_metadata, alert_type)
-    return {"message": f"Alert triggered: {alert_type}", "alert_type": alert_type}
-# API Endpoint to start/stop detection system (you can implement start/stop functionality if needed)
-@app.get("/toggle-detection/{status}")
-async def toggle_detection(status: str):
-    """
-    Start or stop the detection system.
-    """
-    return {"message": f"Detection system {status}"}

+import gradio as gr
+import sounddevice as sd
 import numpy as np
+import librosa
+import torch
+from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load Hugging Face model
+MODEL_NAME = "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_NAME)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_NAME)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+model.eval()
+logger.info(f"Loaded model {MODEL_NAME} on {device}")
+# Audio settings
+SAMPLE_RATE = 16000  # Model expects 16kHz
+DURATION = 5  # Seconds for real-time audio chunks
+recording = None
+is_recording = False
+# Function to process audio and detect screams
+def process_audio(audio_data, sample_rate=SAMPLE_RATE):
+    try:
+        inputs = feature_extractor(audio_data, sampling_rate=sample_rate, return_tensors="pt", padding=True)
+        inputs = {key: val.to(device) for key, val in inputs.items()}
+        with torch.no_grad():
+            outputs = model(**inputs)
+            probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
+            confidence, predicted_label = torch.max(probabilities, dim=-1)
+            confidence = confidence.item() * 100
+            label = model.config.id2label[predicted_label.item()]
+        # Check for scream-like emotions (e.g., fear, surprise)
+        scream_detected = label in ["fear", "surprise"]
+        risk_level = None
+        if scream_detected:
+            if confidence > 80:
+                risk_level = "High-Risk"
+            elif 50 <= confidence <= 80:
+                risk_level = "Medium-Risk"
+        return scream_detected, confidence, label, risk_level
+    except Exception as e:
+        logger.error(f"Error processing audio: {e}")
+        return False, 0, "error", None
+# Real-time audio capture
+def start_recording():
+    global recording, is_recording
+    is_recording = True
+    recording = []
+    def callback(indata, frames, time, status):
+        if status:
+            logger.error(f"Recording error: {status}")
+        recording.append(indata.copy())
+    logger.info("Starting real-time audio capture")
+    with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, callback=callback, blocksize=int(SAMPLE_RATE * DURATION)):
+        while is_recording:
+            sd.sleep(1000)
+    return "Recording started"
+def stop_recording():
+    global is_recording, recording
+    is_recording = False
+    if recording:
+        audio_data = np.concatenate(recording, axis=0).flatten()
+        scream_detected, confidence, label, risk_level = process_audio(audio_data)
+        return f"Detection: {scream_detected}, Confidence: {confidence:.2f}%, Label: {label}, Risk: {risk_level}"
+    return "No audio recorded"
+# Process uploaded audio file
+def process_uploaded_audio(audio_file):
+    try:
+        audio_data, sr = librosa.load(audio_file, sr=SAMPLE_RATE)
+        scream_detected, confidence, label, risk_level = process_audio(audio_data, sr)
+        return f"Detection: {scream_detected}, Confidence: {confidence:.2f}%, Label: {label}, Risk: {risk_level}"
+    except Exception as e:
+        logger.error(f"Error processing uploaded audio: {e}")
+        return f"Error: {e}"
+# Gradio interface
+def create_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Scream Detection System")
+        with gr.Row():
+            start_btn = gr.Button("Start Recording")
+            stop_btn = gr.Button("Stop Recording")
+        upload = gr.Audio(source="upload", type="filepath", label="Upload Audio File")
+        output = gr.Textbox(label="Detection Results")
+        with gr.Accordion("Settings"):
+            confidence_threshold = gr.Slider(50, 100, value=80, label="High-Risk Confidence Threshold")
+        start_btn.click(start_recording, outputs=output)
+        stop_btn.click(stop_recording, outputs=output)
+        upload.change(process_uploaded_audio, inputs=upload, outputs=output)
+    return demo
+# Launch the interface
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()