sohei1l commited on
Commit
938c18f
Β·
1 Parent(s): 94b5018

Improve candidate labels and clean up console output

Browse files
assets/index-DTilw9T1.js ADDED
The diff for this file is too large to render. See raw diff
 
assets/vite-DcBtz0py.svg ADDED
index.html CHANGED
@@ -2,10 +2,10 @@
2
  <html lang="en">
3
  <head>
4
  <meta charset="UTF-8" />
5
- <link rel="icon" type="image/svg+xml" href="./vite.svg" />
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
  <title>clip-tagger</title>
8
- <script type="module" crossorigin src="./assets/index-kltcWzb5.js"></script>
9
  <link rel="stylesheet" crossorigin href="./assets/index-F_aFpJd-.css">
10
  </head>
11
  <body>
 
2
  <html lang="en">
3
  <head>
4
  <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/svg+xml" href="./assets/vite-DcBtz0py.svg" />
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
  <title>clip-tagger</title>
8
+ <script type="module" crossorigin src="./assets/index-DTilw9T1.js"></script>
9
  <link rel="stylesheet" crossorigin href="./assets/index-F_aFpJd-.css">
10
  </head>
11
  <body>
src/clapProcessor.js CHANGED
@@ -5,16 +5,17 @@ class CLAPProcessor {
5
  this.classifier = null;
6
  this.isLoaded = false;
7
  this.candidateLabels = [
8
- 'speech', 'music', 'singing', 'guitar', 'piano', 'drums', 'violin',
9
- 'trumpet', 'saxophone', 'flute', 'classical music', 'rock music',
10
- 'pop music', 'jazz', 'electronic music', 'ambient', 'nature sounds',
11
- 'rain', 'wind', 'ocean waves', 'birds chirping', 'dog barking',
12
- 'cat meowing', 'car engine', 'traffic', 'footsteps', 'door closing',
13
- 'applause', 'laughter', 'crying', 'coughing', 'sneezing',
14
- 'telephone ringing', 'alarm clock', 'typing', 'water running',
15
- 'fire crackling', 'thunder', 'helicopter', 'airplane', 'train',
16
- 'motorcycle', 'bell ringing', 'whistle', 'horn', 'siren',
17
- 'explosion', 'gunshot', 'silence', 'noise', 'distortion'
 
18
  ];
19
  }
20
 
@@ -22,7 +23,7 @@ class CLAPProcessor {
22
  if (this.isLoaded) return;
23
 
24
  try {
25
- console.log('πŸ”„ Loading CLAP pipeline...');
26
 
27
  this.classifier = await pipeline(
28
  'zero-shot-audio-classification',
@@ -30,7 +31,7 @@ class CLAPProcessor {
30
  );
31
 
32
  this.isLoaded = true;
33
- console.log('βœ… CLAP pipeline ready!');
34
  } catch (error) {
35
  console.error('❌ CLAP initialization failed:', error);
36
  throw new Error(`CLAP loading failed: ${error.message}`);
@@ -57,12 +58,12 @@ class CLAPProcessor {
57
  // Run the classification - pass raw Float32Array and candidate labels as separate params
58
  const results = await this.classifier(rawAudio, this.candidateLabels);
59
 
60
- console.log('🎯 Classification results:', results);
61
 
62
  // Format results
63
  const formattedTags = this.formatResults(results);
64
 
65
- console.log('πŸ“ Final tags:', formattedTags);
66
  return formattedTags;
67
 
68
  } catch (error) {
@@ -124,14 +125,14 @@ class CLAPProcessor {
124
  }
125
 
126
  async fileToAudioBuffer(file) {
127
- console.log('πŸ“ Decoding file:', file.name, `(${Math.round(file.size / 1024)}KB)`);
128
 
129
  try {
130
  const arrayBuffer = await file.arrayBuffer();
131
  const audioContext = new (window.AudioContext || window.webkitAudioContext)();
132
  const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
133
 
134
- console.log('βœ… File decoded successfully');
135
  return audioBuffer;
136
  } catch (error) {
137
  console.error('❌ File decoding failed:', error);
 
5
  this.classifier = null;
6
  this.isLoaded = false;
7
  this.candidateLabels = [
8
+ 'speech', 'male voice', 'female voice', 'narration', 'reading aloud', 'conversation',
9
+ 'music', 'singing', 'instrumental music', 'classical music', 'rock music', 'pop music',
10
+ 'jazz', 'electronic music', 'acoustic music', 'background music',
11
+ 'guitar', 'piano', 'drums', 'violin', 'trumpet', 'saxophone', 'flute',
12
+ 'nature sounds', 'rain', 'wind', 'ocean waves', 'birds chirping', 'water running',
13
+ 'ambient sounds', 'room tone', 'background noise', 'white noise',
14
+ 'animal sounds', 'dog barking', 'cat meowing', 'birds singing',
15
+ 'mechanical sounds', 'car engine', 'traffic', 'airplane', 'train', 'motorcycle',
16
+ 'household sounds', 'door closing', 'footsteps', 'typing', 'telephone ringing', 'alarm clock',
17
+ 'human sounds', 'applause', 'laughter', 'crying', 'coughing', 'sneezing', 'breathing',
18
+ 'silence', 'quiet', 'noise', 'distortion', 'static'
19
  ];
20
  }
21
 
 
23
  if (this.isLoaded) return;
24
 
25
  try {
26
+ console.log('Loading CLAP pipeline...');
27
 
28
  this.classifier = await pipeline(
29
  'zero-shot-audio-classification',
 
31
  );
32
 
33
  this.isLoaded = true;
34
+ console.log('CLAP pipeline ready!');
35
  } catch (error) {
36
  console.error('❌ CLAP initialization failed:', error);
37
  throw new Error(`CLAP loading failed: ${error.message}`);
 
58
  // Run the classification - pass raw Float32Array and candidate labels as separate params
59
  const results = await this.classifier(rawAudio, this.candidateLabels);
60
 
61
+ console.log('Classification results:', results);
62
 
63
  // Format results
64
  const formattedTags = this.formatResults(results);
65
 
66
+ console.log('Final tags:', formattedTags);
67
  return formattedTags;
68
 
69
  } catch (error) {
 
125
  }
126
 
127
  async fileToAudioBuffer(file) {
128
+ console.log('Decoding file:', file.name, `(${Math.round(file.size / 1024)}KB)`);
129
 
130
  try {
131
  const arrayBuffer = await file.arrayBuffer();
132
  const audioContext = new (window.AudioContext || window.webkitAudioContext)();
133
  const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
134
 
135
+ console.log('File decoded successfully');
136
  return audioBuffer;
137
  } catch (error) {
138
  console.error('❌ File decoding failed:', error);