lynn-twinkl commited on
Commit
fd62f9c
Β·
1 Parent(s): 2e31929

fix: imports now reflect new module names

Browse files
Files changed (1) hide show
  1. notebooks/app_pipeline.ipynb +31 -35
notebooks/app_pipeline.ipynb CHANGED
@@ -239,33 +239,29 @@
239
  },
240
  {
241
  "cell_type": "code",
242
- "execution_count": 9,
243
  "id": "5c54b4dd-46ec-4bba-9531-18ae03e8af33",
244
  "metadata": {},
245
  "outputs": [
 
 
 
 
 
 
 
 
246
  {
247
  "name": "stdout",
248
  "output_type": "stream",
249
  "text": [
250
- "Model 'en_core_web_md' not found. Downloading now...\n",
251
- "Collecting en-core-web-md==3.8.0\n",
252
- " Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.8.0/en_core_web_md-3.8.0-py3-none-any.whl (33.5 MB)\n",
253
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m33.5/33.5 MB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
254
- "\u001b[?25hInstalling collected packages: en-core-web-md\n",
255
- "Successfully installed en-core-web-md-3.8.0\n",
256
- "\u001b[38;5;2mβœ” Download and installation successful\u001b[0m\n",
257
- "You can now load the package via spacy.load('en_core_web_md')\n",
258
- "\u001b[38;5;3m⚠ Restart to reload dependencies\u001b[0m\n",
259
- "If you are in a Jupyter or Colab notebook, you may need to restart Python in\n",
260
- "order to load all the package's dependencies. You can do this by selecting the\n",
261
- "'Restart kernel' or 'Restart runtime' option.\n",
262
  "Using device: mps\n"
263
  ]
264
  }
265
  ],
266
  "source": [
267
  "import importlib\n",
268
- "import src.models.topicModeling_contentRequests as tmc\n",
269
  "\n",
270
  "importlib.reload(tmc)\n",
271
  "\n",
@@ -275,7 +271,7 @@
275
  },
276
  {
277
  "cell_type": "code",
278
- "execution_count": 10,
279
  "id": "e0be5874-f2f3-41ab-9ccf-9be3a7a7559f",
280
  "metadata": {},
281
  "outputs": [],
@@ -293,7 +289,7 @@
293
  },
294
  {
295
  "cell_type": "code",
296
- "execution_count": 11,
297
  "id": "3a9ba097-546f-4ee6-89b0-bcf2d6ffdb4e",
298
  "metadata": {},
299
  "outputs": [],
@@ -311,7 +307,7 @@
311
  },
312
  {
313
  "cell_type": "code",
314
- "execution_count": 12,
315
  "id": "ac588c90-1daf-4b9b-8ab7-efd5fe358c53",
316
  "metadata": {},
317
  "outputs": [],
@@ -324,7 +320,7 @@
324
  },
325
  {
326
  "cell_type": "code",
327
- "execution_count": 13,
328
  "id": "a79358ea-5d0c-4ee6-be7e-88c0eeea1f0c",
329
  "metadata": {},
330
  "outputs": [],
@@ -340,7 +336,7 @@
340
  },
341
  {
342
  "cell_type": "code",
343
- "execution_count": 14,
344
  "id": "ed93e33a-bf63-411e-ba4d-2b01d91c6b10",
345
  "metadata": {},
346
  "outputs": [
@@ -360,7 +356,7 @@
360
  },
361
  {
362
  "cell_type": "code",
363
- "execution_count": 15,
364
  "id": "7a74aeb4-24f3-458b-abd4-51b6293e8946",
365
  "metadata": {},
366
  "outputs": [
@@ -368,7 +364,7 @@
368
  "name": "stderr",
369
  "output_type": "stream",
370
  "text": [
371
- "Encoding Batches: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 169/169 [00:04<00:00, 34.05it/s]\n"
372
  ]
373
  }
374
  ],
@@ -386,7 +382,7 @@
386
  },
387
  {
388
  "cell_type": "code",
389
- "execution_count": 16,
390
  "id": "2c83e408-785d-45ac-9522-386b091ac6f3",
391
  "metadata": {},
392
  "outputs": [],
@@ -400,7 +396,7 @@
400
  },
401
  {
402
  "cell_type": "code",
403
- "execution_count": 17,
404
  "id": "690fbbcf-a75e-4dd3-9dcf-8c35e4341592",
405
  "metadata": {},
406
  "outputs": [
@@ -408,16 +404,16 @@
408
  "name": "stderr",
409
  "output_type": "stream",
410
  "text": [
411
- "2025-05-23 13:37:42,741 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm\n",
412
- "2025-05-23 13:37:50,840 - BERTopic - Dimensionality - Completed βœ“\n",
413
- "2025-05-23 13:37:50,840 - BERTopic - Cluster - Start clustering the reduced embeddings\n",
414
- "2025-05-23 13:37:50,890 - BERTopic - Cluster - Completed βœ“\n",
415
- "2025-05-23 13:37:50,890 - BERTopic - Representation - Extracting topics using c-TF-IDF for topic reduction.\n",
416
- "2025-05-23 13:37:50,958 - BERTopic - Representation - Completed βœ“\n",
417
- "2025-05-23 13:37:50,958 - BERTopic - Topic reduction - Reducing number of topics\n",
418
- "2025-05-23 13:37:50,964 - BERTopic - Representation - Fine-tuning topics using representation models.\n",
419
- "2025-05-23 13:37:52,325 - BERTopic - Representation - Completed βœ“\n",
420
- "2025-05-23 13:37:52,326 - BERTopic - Topic reduction - Reduced number of topics from 72 to 33\n"
421
  ]
422
  }
423
  ],
@@ -457,7 +453,7 @@
457
  },
458
  {
459
  "cell_type": "code",
460
- "execution_count": 18,
461
  "id": "8b4fe78a-684b-47f6-8162-c186dafc61b3",
462
  "metadata": {
463
  "scrolled": true
@@ -902,7 +898,7 @@
902
  "32 [At present, we have over 90 children benefiting from our programs, and we are eager to provide them with additional outdoor equipment that will contribute to their physical, social, and emotional development., This would be a great help to purchase some new forest school equipment to expand and stretch our children's learning as we are creating a forest school area., We are a thriving school with over 450 pupils who benefit from our large open space, but we currently have limited equipment ... "
903
  ]
904
  },
905
- "execution_count": 18,
906
  "metadata": {},
907
  "output_type": "execute_result"
908
  }
 
239
  },
240
  {
241
  "cell_type": "code",
242
+ "execution_count": 8,
243
  "id": "5c54b4dd-46ec-4bba-9531-18ae03e8af33",
244
  "metadata": {},
245
  "outputs": [
246
+ {
247
+ "name": "stderr",
248
+ "output_type": "stream",
249
+ "text": [
250
+ "/Users/lynn/Documents/Twinkl/grant-applications-app/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
251
+ " from .autonotebook import tqdm as notebook_tqdm\n"
252
+ ]
253
+ },
254
  {
255
  "name": "stdout",
256
  "output_type": "stream",
257
  "text": [
 
 
 
 
 
 
 
 
 
 
 
 
258
  "Using device: mps\n"
259
  ]
260
  }
261
  ],
262
  "source": [
263
  "import importlib\n",
264
+ "import src.models.topic_modeling_pipeline as tmc\n",
265
  "\n",
266
  "importlib.reload(tmc)\n",
267
  "\n",
 
271
  },
272
  {
273
  "cell_type": "code",
274
+ "execution_count": 9,
275
  "id": "e0be5874-f2f3-41ab-9ccf-9be3a7a7559f",
276
  "metadata": {},
277
  "outputs": [],
 
289
  },
290
  {
291
  "cell_type": "code",
292
+ "execution_count": 10,
293
  "id": "3a9ba097-546f-4ee6-89b0-bcf2d6ffdb4e",
294
  "metadata": {},
295
  "outputs": [],
 
307
  },
308
  {
309
  "cell_type": "code",
310
+ "execution_count": 11,
311
  "id": "ac588c90-1daf-4b9b-8ab7-efd5fe358c53",
312
  "metadata": {},
313
  "outputs": [],
 
320
  },
321
  {
322
  "cell_type": "code",
323
+ "execution_count": 12,
324
  "id": "a79358ea-5d0c-4ee6-be7e-88c0eeea1f0c",
325
  "metadata": {},
326
  "outputs": [],
 
336
  },
337
  {
338
  "cell_type": "code",
339
+ "execution_count": 13,
340
  "id": "ed93e33a-bf63-411e-ba4d-2b01d91c6b10",
341
  "metadata": {},
342
  "outputs": [
 
356
  },
357
  {
358
  "cell_type": "code",
359
+ "execution_count": 14,
360
  "id": "7a74aeb4-24f3-458b-abd4-51b6293e8946",
361
  "metadata": {},
362
  "outputs": [
 
364
  "name": "stderr",
365
  "output_type": "stream",
366
  "text": [
367
+ "Encoding Batches: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 169/169 [00:07<00:00, 22.11it/s]\n"
368
  ]
369
  }
370
  ],
 
382
  },
383
  {
384
  "cell_type": "code",
385
+ "execution_count": 15,
386
  "id": "2c83e408-785d-45ac-9522-386b091ac6f3",
387
  "metadata": {},
388
  "outputs": [],
 
396
  },
397
  {
398
  "cell_type": "code",
399
+ "execution_count": 16,
400
  "id": "690fbbcf-a75e-4dd3-9dcf-8c35e4341592",
401
  "metadata": {},
402
  "outputs": [
 
404
  "name": "stderr",
405
  "output_type": "stream",
406
  "text": [
407
+ "2025-05-23 19:05:30,115 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm\n",
408
+ "2025-05-23 19:05:38,209 - BERTopic - Dimensionality - Completed βœ“\n",
409
+ "2025-05-23 19:05:38,209 - BERTopic - Cluster - Start clustering the reduced embeddings\n",
410
+ "2025-05-23 19:05:38,259 - BERTopic - Cluster - Completed βœ“\n",
411
+ "2025-05-23 19:05:38,259 - BERTopic - Representation - Extracting topics using c-TF-IDF for topic reduction.\n",
412
+ "2025-05-23 19:05:38,327 - BERTopic - Representation - Completed βœ“\n",
413
+ "2025-05-23 19:05:38,327 - BERTopic - Topic reduction - Reducing number of topics\n",
414
+ "2025-05-23 19:05:38,333 - BERTopic - Representation - Fine-tuning topics using representation models.\n",
415
+ "2025-05-23 19:05:41,321 - BERTopic - Representation - Completed βœ“\n",
416
+ "2025-05-23 19:05:41,322 - BERTopic - Topic reduction - Reduced number of topics from 72 to 33\n"
417
  ]
418
  }
419
  ],
 
453
  },
454
  {
455
  "cell_type": "code",
456
+ "execution_count": 17,
457
  "id": "8b4fe78a-684b-47f6-8162-c186dafc61b3",
458
  "metadata": {
459
  "scrolled": true
 
898
  "32 [At present, we have over 90 children benefiting from our programs, and we are eager to provide them with additional outdoor equipment that will contribute to their physical, social, and emotional development., This would be a great help to purchase some new forest school equipment to expand and stretch our children's learning as we are creating a forest school area., We are a thriving school with over 450 pupils who benefit from our large open space, but we currently have limited equipment ... "
899
  ]
900
  },
901
+ "execution_count": 17,
902
  "metadata": {},
903
  "output_type": "execute_result"
904
  }