vetsin commited on
Commit
86c2562
·
1 Parent(s): b087b2f

fix: works

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. S5065-MSK.wav +3 -0
  3. app.py +5 -12
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .gradio
S5065-MSK.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30569be92c11907721469fa876588f196bd3e557d9031706d66b56a6c2da6101
3
+ size 1817772
app.py CHANGED
@@ -1,33 +1,26 @@
1
- # !pip install gradio transformers torch torchaudio librosa soundfile datasets accelerate
2
  import gradio as gr
3
  import json
4
  from transformers import pipeline
5
  import warnings
6
-
7
 
8
  warnings.filterwarnings("ignore")
9
- audio_classifier = pipeline("audio-classification", model="ATDI-Group/AST_finetuned_SIGIDwiki")
 
 
10
 
11
- # 4. DEFINE THE CORE ANALYSIS FUNCTION
12
  def analyze_signal(audio_path):
13
- """
14
- Takes an audio file path and returns a holistic analysis dictionary.
15
- """
16
  if audio_path is None:
17
  return {"Error": "No audio file provided. Please upload a file."}
18
 
19
  print(f"Analysing audio file: {audio_path}...")
20
  try:
21
  acoustic_results = audio_classifier(audio_path, top_k=3)
22
- print(repr(acoustic_results))
23
  detected_sounds = {item['label']: round(item['score'], 2) for item in acoustic_results}
24
 
25
- # Fuse the results
26
  holistic_understanding = {
27
  "Detected Sounds": detected_sounds,
28
- #"Insight": f"The model detected speech saying '{transcription}' in an environment with sounds like: {', '.join(detected_sounds.keys())}."
29
  }
30
-
31
  return holistic_understanding
32
  except Exception as e:
33
  return {"Error": f"Could not process the audio file. Details: {str(e)}"}
@@ -40,7 +33,7 @@ iface = gr.Interface(
40
  description="""
41
  https://huggingface.co/ATDI-Group/AST_finetuned_SIGIDwiki
42
  """,
43
- examples=[["example.wav"]]
44
  )
45
 
46
  # This will create the web UI
 
 
1
  import gradio as gr
2
  import json
3
  from transformers import pipeline
4
  import warnings
5
+ from transformers import ASTForAudioClassification, ASTFeatureExtractor
6
 
7
  warnings.filterwarnings("ignore")
8
+ model = ASTForAudioClassification.from_pretrained("ATDI-Group/AST_finetuned_SIGIDwiki", subfolder='AST_finetuned_SIGIDwiki')
9
+ feature_extractor = ASTFeatureExtractor.from_pretrained("ATDI-Group/AST_finetuned_SIGIDwiki", subfolder='AST_finetuned_SIGIDwiki')
10
+ audio_classifier = pipeline("audio-classification", model=model, feature_extractor=feature_extractor)
11
 
 
12
  def analyze_signal(audio_path):
 
 
 
13
  if audio_path is None:
14
  return {"Error": "No audio file provided. Please upload a file."}
15
 
16
  print(f"Analysing audio file: {audio_path}...")
17
  try:
18
  acoustic_results = audio_classifier(audio_path, top_k=3)
 
19
  detected_sounds = {item['label']: round(item['score'], 2) for item in acoustic_results}
20
 
 
21
  holistic_understanding = {
22
  "Detected Sounds": detected_sounds,
 
23
  }
 
24
  return holistic_understanding
25
  except Exception as e:
26
  return {"Error": f"Could not process the audio file. Details: {str(e)}"}
 
33
  description="""
34
  https://huggingface.co/ATDI-Group/AST_finetuned_SIGIDwiki
35
  """,
36
+ examples=[["S5065-MSK.wav"]]
37
  )
38
 
39
  # This will create the web UI