Komal133 commited on
Commit
22d3f74
·
verified ·
1 Parent(s): 07f29fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -45
app.py CHANGED
@@ -1,21 +1,15 @@
1
- import os
2
  import subprocess
3
- import soundfile as sf
4
  import librosa
5
  from transformers import pipeline
6
- from datetime import datetime
7
 
8
- # Initialize Hugging Face pipeline
9
  classifier = pipeline(
10
  "audio-classification",
11
- model="padmalcom/wav2vec2-large-nonverbalvocalization-classification",
12
  )
13
 
14
  def convert_audio(input_path, output_path="input.wav"):
15
- """Convert audio to 16kHz mono WAV using ffmpeg."""
16
- if not os.path.isfile(input_path):
17
- raise FileNotFoundError(f"File not found: {input_path}")
18
-
19
  cmd = [
20
  "ffmpeg", "-i", input_path,
21
  "-acodec", "pcm_s16le",
@@ -24,23 +18,11 @@ def convert_audio(input_path, output_path="input.wav"):
24
  output_path,
25
  "-y"
26
  ]
 
 
27
 
28
- try:
29
- subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
30
- return output_path
31
- except subprocess.CalledProcessError as e:
32
- raise RuntimeError("FFmpeg conversion failed: " + e.stderr.decode())
33
-
34
- def detect_scream(audio_path: str):
35
- """Run scream detection on a WAV file."""
36
- if not os.path.isfile(audio_path):
37
- raise FileNotFoundError(f"Audio file not found: {audio_path}")
38
-
39
- audio, sr = sf.read(audio_path)
40
- # Resample if needed
41
- if sr != classifier.feature_extractor.sampling_rate:
42
- audio = librosa.resample(audio, orig_sr=sr, target_sr=classifier.feature_extractor.sampling_rate)
43
-
44
  results = classifier(audio)
45
  top = results[0]
46
  label = top["label"]
@@ -53,27 +35,20 @@ def detect_scream(audio_path: str):
53
  else:
54
  alert = "None"
55
 
56
- return label, score, alert
57
-
58
- def log_to_salesforce(audio_url, label, score, alert):
59
- """Placeholder for Salesforce integration."""
60
- print("Logging to Salesforce...")
61
- print(f"Audio URL: {audio_url}")
62
- print(f"Result: {label}, Score: {score:.1f}%, Alert Level: {alert}")
63
- # Integration with Salesforce via simple-salesforce or REST API goes here
64
 
65
- def main():
66
- # Input file path (any audio format)
67
- raw_input_path = "my_audio.mp3" # Change to your input file
68
- audio_url = "https://yourstorage.com/path/to/audio" # Simulated URL
69
 
70
- try:
71
- wav_path = convert_audio(raw_input_path)
72
- label, score, alert = detect_scream(wav_path)
73
- print(f"Detected: {label} ({score:.1f}%) — Alert Level: {alert}")
74
- log_to_salesforce(audio_url, label, score, alert)
75
- except Exception as e:
76
- print("Error:", str(e))
77
 
78
  if __name__ == "__main__":
79
- main()
 
1
+ import gradio as gr
2
  import subprocess
3
+ import os
4
  import librosa
5
  from transformers import pipeline
 
6
 
 
7
  classifier = pipeline(
8
  "audio-classification",
9
+ model="padmalcom/wav2vec2-large-nonverbalvocalization-classification"
10
  )
11
 
12
  def convert_audio(input_path, output_path="input.wav"):
 
 
 
 
13
  cmd = [
14
  "ffmpeg", "-i", input_path,
15
  "-acodec", "pcm_s16le",
 
18
  output_path,
19
  "-y"
20
  ]
21
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
22
+ return output_path
23
 
24
+ def detect_scream(audio_path):
25
+ audio, sr = librosa.load(audio_path, sr=16000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  results = classifier(audio)
27
  top = results[0]
28
  label = top["label"]
 
35
  else:
36
  alert = "None"
37
 
38
+ return f"Detected: {label} ({score:.1f}%) — Alert Level: {alert}"
 
 
 
 
 
 
 
39
 
40
+ def process_uploaded(audio_file):
41
+ # audio_file is a temp file path from gradio
42
+ wav_path = convert_audio(audio_file.name)
43
+ return detect_scream(wav_path)
44
 
45
+ iface = gr.Interface(
46
+ fn=process_uploaded,
47
+ inputs=gr.Audio(type="filepath"),
48
+ outputs="text",
49
+ title="Scream Detection",
50
+ description="Upload an audio clip, and this app detects if it contains a scream."
51
+ )
52
 
53
  if __name__ == "__main__":
54
+ iface.launch()