Heinrich Dinkel commited on
Commit
e393f34
·
1 Parent(s): 63ada3a

Updated notebook

Browse files
Files changed (1) hide show
  1. notebook.ipynb +7 -50
notebook.ipynb CHANGED
@@ -55,15 +55,8 @@
55
  " audio_tensor = torch.tensor(audio).float()\n",
56
  " label_tensor = torch.tensor(label).long()\n",
57
  " \n",
58
- " return audio_tensor, label_tensor"
59
- ]
60
- },
61
- {
62
- "cell_type": "code",
63
- "execution_count": null,
64
- "metadata": {},
65
- "outputs": [],
66
- "source": [
67
  "def download_esc50():\n",
68
  " import urllib.request\n",
69
  " import zipfile\n",
@@ -80,34 +73,6 @@
80
  " print(\"ESC-50 dataset downloaded and extracted\")"
81
  ]
82
  },
83
- {
84
- "cell_type": "code",
85
- "execution_count": null,
86
- "metadata": {},
87
- "outputs": [],
88
- "source": [
89
- "def get_embedding_dim(model):\n",
90
- " dummy_input = torch.randn(1, 160000)\n",
91
- " with torch.no_grad():\n",
92
- " output = model(dummy_input)\n",
93
- " if isinstance(output, dict):\n",
94
- " for key in ['last_hidden_state', 'embeddings', 'audio']:\n",
95
- " if key in output:\n",
96
- " features = output[key]\n",
97
- " break\n",
98
- " else:\n",
99
- " features = list(output.values())[0]\n",
100
- " else:\n",
101
- " features = output\n",
102
- " \n",
103
- " if features.dim() > 2:\n",
104
- " embedding_dim = features.shape[-1]\n",
105
- " else:\n",
106
- " embedding_dim = features.shape[-1]\n",
107
- " \n",
108
- " return embedding_dim"
109
- ]
110
- },
111
  {
112
  "cell_type": "code",
113
  "execution_count": null,
@@ -121,7 +86,7 @@
121
  "model = AutoModel.from_pretrained(\"mispeech/dashengtokenizer\", trust_remote_code=True)\n",
122
  "\n",
123
  "# Get embedding dimension\n",
124
- "embedding_dim = get_embedding_dim(model)\n",
125
  "print(f\"Model embedding dimension: {embedding_dim}\")\n",
126
  "\n",
127
  "# Freeze model\n",
@@ -135,15 +100,7 @@
135
  "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
136
  "model.to(device)\n",
137
  "classifier.to(device)\n",
138
- "print(f\"Using device: {device}\")"
139
- ]
140
- },
141
- {
142
- "cell_type": "code",
143
- "execution_count": null,
144
- "metadata": {},
145
- "outputs": [],
146
- "source": [
147
  "# Create datasets\n",
148
  "audio_dir = 'ESC-50/audio'\n",
149
  "metadata_path = 'ESC-50/meta/esc50.csv'\n",
@@ -259,10 +216,10 @@
259
  }
260
  ],
261
  "metadata": {
262
- "accelerator": "GPU",
263
  "colab": {
264
- "gpuType": "T4",
265
- "provenance": []
266
  },
267
  "kernelspec": {
268
  "display_name": "Python 3 (ipykernel)",
 
55
  " audio_tensor = torch.tensor(audio).float()\n",
56
  " label_tensor = torch.tensor(label).long()\n",
57
  " \n",
58
+ " return audio_tensor, label_tensor\n",
59
+ "\n",
 
 
 
 
 
 
 
60
  "def download_esc50():\n",
61
  " import urllib.request\n",
62
  " import zipfile\n",
 
73
  " print(\"ESC-50 dataset downloaded and extracted\")"
74
  ]
75
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  {
77
  "cell_type": "code",
78
  "execution_count": null,
 
86
  "model = AutoModel.from_pretrained(\"mispeech/dashengtokenizer\", trust_remote_code=True)\n",
87
  "\n",
88
  "# Get embedding dimension\n",
89
+ "embedding_dim = 1280\n",
90
  "print(f\"Model embedding dimension: {embedding_dim}\")\n",
91
  "\n",
92
  "# Freeze model\n",
 
100
  "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
101
  "model.to(device)\n",
102
  "classifier.to(device)\n",
103
+ "print(f\"Using device: {device}\")\n",
 
 
 
 
 
 
 
 
104
  "# Create datasets\n",
105
  "audio_dir = 'ESC-50/audio'\n",
106
  "metadata_path = 'ESC-50/meta/esc50.csv'\n",
 
216
  }
217
  ],
218
  "metadata": {
219
+ "accelerator": "GPU",
220
  "colab": {
221
+ "gpuType": "T4",
222
+ "provenance": []
223
  },
224
  "kernelspec": {
225
  "display_name": "Python 3 (ipykernel)",