Spaces:

prashant-garg
/

gender-detection

Sleeping

App Files Files Community

prashant-garg commited on Feb 16

Commit

6a3ed8c

1 Parent(s): ccdfda0

working locally

Browse files

Files changed (3) hide show

app.py +101 -3
packages.txt +1 -0
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,9 +1,21 @@
 import streamlit as st
 import numpy as np
 import torch
 from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
-from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
-import av
 # Load Model and Feature Extractor
 @st.cache_resource
@@ -20,4 +32,90 @@ def load_model():
 placeholder = st.empty()
 placeholder.text("Loading model...")
 feature_extractor, model = load_model()
-placeholder.text("Model loaded!")

+import warnings
+warnings.filterwarnings("ignore")
 import streamlit as st
 import numpy as np
 import torch
 from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
+import pyaudio
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Define audio stream parameters
+FORMAT = pyaudio.paInt16  # 16-bit resolution
+CHANNELS = 1              # Mono audio
+RATE = 16000              # 16kHz sampling rate
+CHUNK = 1024              # Number of frames per buffer
 # Load Model and Feature Extractor
 @st.cache_resource
 placeholder = st.empty()
 placeholder.text("Loading model...")
 feature_extractor, model = load_model()
+placeholder.text("Model loaded!")
+st.title("Real-Time Gender Detection from Voice :microphone:")
+st.write("Click 'Start' to detect gender in real-time.")
+placeholder.empty()
+# Initialize session state
+if 'listening' not in st.session_state:
+    st.session_state['listening'] = False
+if 'prediction' not in st.session_state:
+    st.session_state['prediction'] = ""
+# Function to stop listening
+def stop_listening():
+    """Stop the audio stream and update session state to stop listening."""
+    if 'stream' in st.session_state:
+        logging.info("Stopping stream")
+        st.session_state['stream'].stop_stream()
+        st.session_state['stream'].close()
+    if 'audio' in st.session_state:
+        logging.info("Stopping audio")
+        st.session_state['audio'].terminate()
+    st.session_state['listening'] = False
+    st.session_state['prediction'] = "Stopped listening, click 'Start Listening' to start again."
+    st.rerun()
+def start_listening():
+    """Start the audio stream and continuously process audio for gender detection."""
+    try:
+        placeholder = st.empty()
+        audio = pyaudio.PyAudio()
+        stream = audio.open(format=FORMAT,
+                            channels=CHANNELS,
+                            rate=RATE,
+                            input=True,
+                            frames_per_buffer=CHUNK)
+        st.session_state['stream'] = stream
+        st.session_state['audio'] = audio
+        st.session_state['listening'] = True
+        st.session_state['prediction'] = "Listening........................"
+        placeholder.write("Listening for audio...")
+        while st.session_state['listening']:
+            audio_data = np.array([], dtype=np.float32)
+            for _ in range(int(RATE / CHUNK * 1.5)):
+                # Read audio chunk from the stream
+                data = stream.read(CHUNK, exception_on_overflow=False)
+                # Convert byte data to numpy array and normalize
+                chunk_data = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768.0
+                audio_data = np.concatenate((audio_data, chunk_data))
+            # Check if there is significant sound
+            if np.max(np.abs(audio_data)) > 0.05:  # Threshold for detecting sound
+                # Process the audio data
+                inputs = feature_extractor(audio_data, sampling_rate=RATE, return_tensors="pt", padding=True)
+                # Perform inference
+                with torch.no_grad():
+                    logits = model(**inputs).logits
+                    predicted_ids = torch.argmax(logits, dim=-1)
+                    # Map predicted IDs to labels
+                    predicted_label = model.config.id2label[predicted_ids.item()]
+                    if predicted_label != st.session_state['prediction']:
+                        st.session_state['prediction'] = predicted_label
+                        # st.write(f"Detected Gender: {predicted_label}")
+                        placeholder.write(f"Detected Gender: {predicted_label}")
+            else:
+                st.session_state['prediction'] = "---- No significant sound detected, skipping prediction. ----"
+                placeholder.empty()
+        placeholder.empty()
+    except Exception as e:
+        logging.error(f"An error occurred: {e}")
+        st.error(f"An error occurred: {e}")
+        stop_listening()
+col1, col2 = st.columns(2)
+    with col1:
+        if st.button("Start"):
+            start_listening()
+    with col2:
+        if st.button("Stop"):
+            stop_listening()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ portaudio19-dev

requirements.txt CHANGED Viewed

@@ -34,6 +34,7 @@ pandas==2.2.3
 pillow==11.1.0
 protobuf==5.29.3
 pyarrow==19.0.0
 pycparser==2.22
 pydeck==0.9.1
 pyee==12.1.1
@@ -52,7 +53,6 @@ safetensors==0.5.2
 six==1.17.0
 smmap==5.0.2
 streamlit==1.42.0
-streamlit-webrtc==0.47.9
 sympy==1.13.1
 tenacity==9.0.0
 tokenizers==0.21.0

 pillow==11.1.0
 protobuf==5.29.3
 pyarrow==19.0.0
+PyAudio==0.2.14
 pycparser==2.22
 pydeck==0.9.1
 pyee==12.1.1
 six==1.17.0
 smmap==5.0.2
 streamlit==1.42.0
 sympy==1.13.1
 tenacity==9.0.0
 tokenizers==0.21.0