johnnydevriese commited on
Commit
8938d1b
·
verified ·
1 Parent(s): c8a46ad

Upload 80 files

Browse files

I guess GH isn't ready for all this winning.

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +23 -0
  2. Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf +3 -0
  3. BoostedTree.pdf +3 -0
  4. README.md +155 -3
  5. code/.DS_Store +0 -0
  6. code/README.md +74 -0
  7. code/activation_functions.md +45 -0
  8. code/all_results.json +12 -0
  9. code/classification.md +88 -0
  10. code/config.json +32 -0
  11. code/cracks.py +32 -0
  12. code/creditcard.csv +3 -0
  13. code/cs224N_pytorch_tutorial_nlp.ipynb +981 -0
  14. code/cs229-scratchpad.ipynb +812 -0
  15. code/cs229_random_gems.md +13 -0
  16. code/data/cifar-10-batches-py/batches.meta +0 -0
  17. code/data/cifar-10-batches-py/data_batch_1 +3 -0
  18. code/data/cifar-10-batches-py/data_batch_2 +3 -0
  19. code/data/cifar-10-batches-py/data_batch_3 +3 -0
  20. code/data/cifar-10-batches-py/data_batch_4 +3 -0
  21. code/data/cifar-10-batches-py/data_batch_5 +3 -0
  22. code/data/cifar-10-batches-py/readme.html +1 -0
  23. code/data/cifar-10-batches-py/test_batch +3 -0
  24. code/data/cifar-10-python.tar.gz +3 -0
  25. code/data_science_handbook.ipynb +0 -0
  26. code/diffusers/hf_diffusers.ipynb +508 -0
  27. code/eda_pandas.html +0 -0
  28. code/eda_pandas.ipynb +0 -0
  29. code/eval_results.json +8 -0
  30. code/fin_hackerrank.ipynb +115 -0
  31. code/fraud_detection.ipynb +0 -0
  32. code/gcp_ml_engineer_questions.md +226 -0
  33. code/gcp_ml_engineer_tips.md +626 -0
  34. code/gcp_sample_questions.md +64 -0
  35. code/huggingface_vit_beans/.DS_Store +0 -0
  36. code/huggingface_vit_beans/huggingface_fine_tune_vit.ipynb +0 -0
  37. code/interview_questions.md +135 -0
  38. code/leetcode/blind_75.ipynb +768 -0
  39. code/leetcode/two_sum_grind_75.ipynb +2195 -0
  40. code/lru_cache_leetcode.ipynb +286 -0
  41. code/m1_gpu_pytorch.ipynb +472 -0
  42. code/main.py +137 -0
  43. code/netflix.jpg +0 -0
  44. code/netflix.png +0 -0
  45. code/netflix_price_increase.ipynb +63 -0
  46. code/penguin_classifier.pt +3 -0
  47. code/penguins_nn.ipynb +866 -0
  48. code/preprocessor_config.json +17 -0
  49. code/ps2_submission.py +411 -0
  50. code/pytorch_m1_testing.ipynb +659 -0
.gitattributes CHANGED
@@ -33,3 +33,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf filter=lfs diff=lfs merge=lfs -text
37
+ BoostedTree.pdf filter=lfs diff=lfs merge=lfs -text
38
+ code/creditcard.csv filter=lfs diff=lfs merge=lfs -text
39
+ code/data/cifar-10-batches-py/data_batch_1 filter=lfs diff=lfs merge=lfs -text
40
+ code/data/cifar-10-batches-py/data_batch_2 filter=lfs diff=lfs merge=lfs -text
41
+ code/data/cifar-10-batches-py/data_batch_3 filter=lfs diff=lfs merge=lfs -text
42
+ code/data/cifar-10-batches-py/data_batch_4 filter=lfs diff=lfs merge=lfs -text
43
+ code/data/cifar-10-batches-py/data_batch_5 filter=lfs diff=lfs merge=lfs -text
44
+ code/data/cifar-10-batches-py/test_batch filter=lfs diff=lfs merge=lfs -text
45
+ cs229_linear_algebra_review.pdf filter=lfs diff=lfs merge=lfs -text
46
+ cs229_probability_review.pdf filter=lfs diff=lfs merge=lfs -text
47
+ cs229/cs229-notes-dt.pdf filter=lfs diff=lfs merge=lfs -text
48
+ cs229/cs229-notes-ensemble.pdf filter=lfs diff=lfs merge=lfs -text
49
+ cs229/cs229-notes1.pdf filter=lfs diff=lfs merge=lfs -text
50
+ cs229/cs229-notes2.pdf filter=lfs diff=lfs merge=lfs -text
51
+ cs229/cs229-notes3.pdf filter=lfs diff=lfs merge=lfs -text
52
+ cs229/cs229-notes4.pdf filter=lfs diff=lfs merge=lfs -text
53
+ cs229/cs229-notes5.pdf filter=lfs diff=lfs merge=lfs -text
54
+ gcp_intro_ml_slides.pdf filter=lfs diff=lfs merge=lfs -text
55
+ machine-learning-yearning.pdf filter=lfs diff=lfs merge=lfs -text
56
+ matrixcookbook.pdf filter=lfs diff=lfs merge=lfs -text
57
+ mml-book.pdf filter=lfs diff=lfs merge=lfs -text
58
+ probability_cheatsheet.pdf filter=lfs diff=lfs merge=lfs -text
Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee767e0a6b04fa05ba7e599e9dbb4637a94a4407ccedf0b4d316b1fd7c8ec64
3
+ size 18090775
BoostedTree.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6819dcf6b1bf49fa47c6c4f018418dcbaaa020bf90340d89850f1b6c0bfcefd3
3
+ size 1463610
README.md CHANGED
@@ -1,3 +1,155 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Courses
2
+
3
+ <img src="https://upload.wikimedia.org/wikipedia/en/d/d6/Stanford_University.png" width="200px">
4
+
5
+
6
+ ## Stanford - Machine Learning - CS229
7
+ Course Description This course provides a broad introduction to machine learning and statistical pattern recognition. Topics include: supervised learning (generative/discriminative learning, parametric/non-parametric learning, neural networks, support vector machines); unsupervised learning (clustering, dimensionality reduction, kernel methods); learning theory (bias/variance tradeoffs, practical advice); reinforcement learning and adaptive control. The course will also discuss recent applications of machine learning, such as to robotic control, data mining, autonomous navigation, bioinformatics, speech recognition, and text and web data processing.
8
+
9
+ [course website](https://cs229.stanford.edu/)
10
+
11
+ ## Stanford - Natural Language Processing with Deep Learning - CS224n
12
+
13
+ Natural language processing (NLP) or computational linguistics is one of the most important technologies of the information age. Applications of NLP are everywhere because people communicate almost everything in language: web search, advertising, emails, customer service, language translation, virtual agents, medical reports, politics, etc. In the last decade, deep learning (or neural network) approaches have obtained very high performance across many different NLP tasks, using single end-to-end neural models that do not require traditional, task-specific feature engineering. In this course, students will gain a thorough introduction to cutting-edge research in Deep Learning for NLP. Through lectures, assignments and a final project, students will learn the necessary skills to design, implement, and understand their own neural network models, using the Pytorch framework.
14
+
15
+ Note: 2019 lectures by Anand Avati are better and have review of concepts for first lectures.
16
+
17
+ [course website](https://web.stanford.edu/class/cs224n/)
18
+
19
+ ## Stanford - Machine Learning with Graphs - CS224W
20
+
21
+ Complex data can be represented as a graph of relationships between objects. Such networks are a fundamental tool for modeling social, technological, and biological systems. This course focuses on the computational, algorithmic, and modeling challenges specific to the analysis of massive graphs. By means of studying the underlying graph structure and its features, students are introduced to machine learning techniques and data mining tools apt to reveal insights on a variety of networks.
22
+ Topics include: representation learning and Graph Neural Networks; algorithms for the World Wide Web; reasoning over Knowledge Graphs; influence maximization; disease outbreak detection, social network analysis.
23
+
24
+ [course website](https://snap.stanford.edu/class/cs224w-2023/index.html#content)
25
+
26
+
27
+ ## Stanford - Deep Learning for Computer Vision - CS231n
28
+
29
+ Computer Vision has become ubiquitous in our society, with applications in search, image understanding, apps, mapping, medicine, drones, and self-driving cars. Core to many of these applications are visual recognition tasks such as image classification, localization and detection. Recent developments in neural network (aka “deep learning”) approaches have greatly advanced the performance of these state-of-the-art visual recognition systems. This course is a deep dive into the details of deep learning architectures with a focus on learning end-to-end models for these tasks, particularly image classification. During the 10-week course, students will learn to implement and train their own neural networks and gain a detailed understanding of cutting-edge research in computer vision. Additionally, the final assignment will give them the opportunity to train and apply multi-million parameter networks on real-world vision problems of their choice. Through multiple hands-on assignments and the final course project, students will acquire the toolset for setting up deep learning tasks and practical engineering tricks for training and fine-tuning deep neural networks.
30
+
31
+ [course website](http://cs231n.stanford.edu/)
32
+
33
+ ## Stanford - Introduction to Statistical Learning - STAT216
34
+
35
+ Overview of supervised learning, with a focus on regression and classification methods. Syllabus includes: linear and polynomial regression, logistic regression and linear discriminant analysis; cross-validation and the bootstrap, model selection and regularization methods (ridge and lasso); nonlinear models, splines and generalized additive models; tree-based methods, random forests and boosting; support-vector machines; Some unsupervised learning: principal components and clustering (k-means and hierarchical). Computing is done in R, through tutorial sessions and homework assignments. This math-light course is offered via video segments (MOOC style), and in-class problem solving sessions. Prerequisites: first courses in statistics, linear algebra, and computing.
36
+
37
+ [Tibshirani Course website - 2018](https://tibshirani.su.domains/stats216.html)
38
+ [awesome ISLR course videos](https://www.dataschool.io/15-hours-of-expert-machine-learning-videos/)
39
+
40
+ ## Stanford - Machine Learning Systems Design - CS 329S
41
+
42
+ This course aims to provide an iterative framework for developing real-world machine learning systems that are deployable, reliable, and scalable.
43
+
44
+ It starts by considering all stakeholders of each machine learning project and their objectives. Different objectives require different design choices, and this course will discuss the tradeoffs of those choices.
45
+
46
+ Students will learn about data management, data engineering, feature engineering, approaches to model selection, training, scaling, how to continually monitor and deploy changes to ML systems, as well as the human side of ML projects such as team structure and business metrics. In the process, students will learn about important issues including privacy, fairness, and security.
47
+
48
+
49
+ [course website](https://stanford-cs329s.github.io/syllabus.html)
50
+
51
+
52
+ <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/d/d0/Princeton_seal.svg/320px-Princeton_seal.svg.png?1663207635322" width="200px">
53
+
54
+ [Overview of AI/ML courses](https://aiml.cs.princeton.edu/course.html)
55
+
56
+ ## Princeton - Computer Vision - COS 429
57
+
58
+ An introduction to the concepts of 2D and 3D computer vision. Topics include: low-level image processing methods such as filtering and edge detection; segmentation and clustering; optical flow and tracking; recognition; shape reconstruction from stereo, motion, texture, and shading; and recent developments in deep learning. Throughout the course, we also look at aspects of human vision and perception that guide and inspire computer vision techniques.
59
+
60
+ https://www.cs.princeton.edu/courses/archive/fall19/cos429/
61
+
62
+ <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Seal_of_the_University_of_Michigan.svg/320px-Seal_of_the_University_of_Michigan.svg.png?1663207811796" width="200px">
63
+
64
+
65
+ ## University of Michigan - Deep Learning for Computer Vision - EECS 498.008 / 598.008
66
+
67
+
68
+ Computer Vision has become ubiquitous in our society, with applications in search, image understanding, apps, mapping, medicine, drones, and self-driving cars. Core to many of these applications are visual recognition tasks such as image classification and object detection. Recent developments in neural network approaches have greatly advanced the performance of these state-of-the-art visual recognition systems. This course is a deep dive into details of neural-network based deep learning methods for computer vision. During this course, students will learn to implement, train and debug their own neural networks and gain a detailed understanding of cutting-edge research in computer vision. We will cover learning algorithms, neural network architectures, and practical engineering tricks for training and fine-tuning networks for visual recognition tasks.
69
+
70
+ Excellent course and essentially an updated version of Stanford CS231n.
71
+
72
+ [YouTube Playlist](https://www.youtube.com/playlist?list=PL5-TkQAfAZFbzxjBHtzdVCWE0Zbhomg7r)
73
+
74
+ [Course Website](https://web.eecs.umich.edu/~justincj/teaching/eecs498/WI2022/)
75
+
76
+ <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/a/aa/University_of_Wisconsin_seal.svg/640px-University_of_Wisconsin_seal.svg.png?1663207698837" width="200px">
77
+
78
+ ## UW - Madison - Introduction to Machine Learning and Statistical Pattern Classification - STAT 451
79
+
80
+ Introduction to machine learning for pattern classification, regression analysis, clustering, and dimensionality reduction. For each category, fundamental algorithms, as well as selections of contemporary, current state-of-the-art algorithms, are being discussed. The evaluation of machine learning models using statistical methods is a particular focus of this course. Statistical pattern classification approaches, including maximum likelihood estimation and Bayesian decision theory, are compared and contrasted to algorithmic and nonparametric approaches. While fundamental mathematical concepts underlying machine learning and pattern classification algorithms are being taught, the practical use of machine learning algorithms using open source libraries from the Python programming ecosystem will be of equal focus in this course.
81
+
82
+ [course website](https://sebastianraschka.com/teaching/stat451-fs2021/)
83
+
84
+ [youtube playlist](https://www.youtube.com/playlist?list=PLTKMiZHVd_2KyGirGEvKlniaWeLOHhUF3)
85
+
86
+ Sebastian also has an excellent textbook [info](https://sebastianraschka.com/blog/2022/ml-pytorch-book.html)
87
+
88
+
89
+ <img src="https://upload.wikimedia.org/wikipedia/en/thumb/4/44/MIT_Seal.svg/1200px-MIT_Seal.svg.png" width="200px">
90
+
91
+ ## MIT - Matrix Methods in Data Analysis, Signal Processing, and Machine Learning - MATH 18.065
92
+
93
+ Linear algebra concepts are key for understanding and creating machine learning algorithms, especially as applied to deep learning and neural networks. This course reviews linear algebra with applications to probability and statistics and optimization–and above all a full explanation of deep learning.
94
+
95
+ Taught by Gil Strang! - Also 💙 the intro linear algebra course with Strang
96
+
97
+ [OCW link](https://ocw.mit.edu/courses/18-065-matrix-methods-in-data-analysis-signal-processing-and-machine-learning-spring-2018/)
98
+
99
+ ## MIT - Distributed Systems - CS 6.824
100
+
101
+ 6.824 is a core 12-unit graduate subject with lectures, readings, programming labs, an optional project, a mid-term exam, and a final exam. It will present abstractions and implementation techniques for engineering distributed systems. Major topics include fault tolerance, replication, and consistency. Much of the class consists of studying and discussing case studies of distributed systems.
102
+
103
+ [course website](https://pdos.csail.mit.edu/6.824/)
104
+
105
+ ## MIT - Introduction to Deep Learning - CS 6.S191
106
+
107
+ Pretty high level view of DL.
108
+
109
+ [youtube playlist](https://www.youtube.com/playlist?list=PLtBw6njQRU-rwp5__7C0oIVt26ZgjG9NI)
110
+
111
+ <img src="https://cdn5.euraxess.org/sites/default/files/news/eth_1.png" width="200px">
112
+
113
+ ## ETH Zurich - Digital Design and Computer Architecture
114
+
115
+ The class provides a first introduction to the design of digital circuits and computer architecture. It covers technical foundations of how a computing platform is designed from the bottom up. It introduces various execution paradigms, hardware description languages, and principles in digital design and computer architecture. The focus is on fundamental techniques employed in the design of modern microprocessors and their hardware/software interface.
116
+
117
+ [course website](https://safari.ethz.ch/digitaltechnik/spring2021/doku.php?id=start)
118
+ [YouTube Playlist](https://www.youtube.com/watch?v=LbC0EZY8yw4&list=PL4YhK0pT0ZhXVMJMffEq_XqAIQM_uWSdi)
119
+
120
+ <img src="https://bair.berkeley.edu/images/BAIR_Logo_BlueType_Tag.png" width="200px">
121
+
122
+ ## UC Berkely - Advanced Robotics - CS 287
123
+
124
+ Over the past ten years advances in optimization, in probabilistic reasoning, and in machine learning have had a large impact in robotics, with many of the current state-of-the-art algorithms heavily relying on these tools. At the same time these three tools have wide applicability in many other fields. The current curriculum of CS287 is centered around these three tools---making it both a treatment of these tools (in the context of a specific application domain, namely robotics), as well as a treatment of the state of the art in (algorithmic) robotics. Problem sets are a mix of mathematical/algorithmic questions and programming problems. There is a substantial final project. NOTE: This course is about algorithms for robotics, and does *not* cover hardware aspects. PREREQS: Familiarity with mathematical proofs, probability, algorithms, linear algebra; ability to implement algorithmic ideas in code.
125
+
126
+ [course website](https://people.eecs.berkeley.edu/~pabbeel/cs287-fa19/)
127
+
128
+ <img src="https://cmu-multicomp-lab.github.io/mmml-course/assets/img/cmu-logo.png" width="200px">
129
+
130
+ ## Carnegie Mellon University - MultiModal Machine Learning - 11-777 • Fall 2022
131
+
132
+ Multimodal machine learning (MMML) is a vibrant multi-disciplinary research field which addresses some of the original goals of artificial intelligence by integrating and modeling multiple communicative modalities, including linguistic, acoustic, and visual messages. With the initial research on audio-visual speech recognition and more recently with language & vision projects such as image and video captioning, this research field brings some unique challenges for multimodal researchers given the heterogeneity of the data and the contingency often found between modalities. This course will teach fundamental mathematical concepts related to MMML including multimodal alignment and fusion, heterogeneous representation learning and multi-stream temporal modeling. We will also review recent papers describing state-of-the-art probabilistic models and computational algorithms for MMML and discuss the current and upcoming challenges.
133
+
134
+ [course website](https://cmu-multicomp-lab.github.io/mmml-course/fall2022/)
135
+
136
+ <img src="https://logos-world.net/wp-content/uploads/2022/01/University-of-Washington-Symbol.png" width="200px">
137
+
138
+ ## University of Washington
139
+
140
+ Welcome to the Computer Vision course (CSE/ECE 576, Spring 2020)
141
+
142
+ This class is a general introduction to computer vision. It covers standard techniques in image processing like filtering, edge detection, stereo, flow, etc. (old-school vision), as well as newer, machine-learning based computer vision.
143
+
144
+ [course website](https://courses.cs.washington.edu/courses/cse576/20sp/)
145
+
146
+ <img src="https://ai.google/static/images/share.png" width="200px">
147
+
148
+ ## Google Machine Learning Crash Course
149
+
150
+ Nice quick and easy overview ML topics.
151
+
152
+
153
+ [course website](https://developers.google.com/machine-learning/crash-course/ml-intro)
154
+
155
+
code/.DS_Store ADDED
Binary file (10.2 kB). View file
 
code/README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - image-classification
5
+ - generated_from_trainer
6
+ datasets:
7
+ - beans
8
+ metrics:
9
+ - accuracy
10
+ model-index:
11
+ - name: ''
12
+ results:
13
+ - task:
14
+ name: Image Classification
15
+ type: image-classification
16
+ dataset:
17
+ name: beans
18
+ type: beans
19
+ args: default
20
+ metrics:
21
+ - name: Accuracy
22
+ type: accuracy
23
+ value: 0.9774436090225563
24
+ ---
25
+
26
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
27
+ should probably proofread and complete it, then remove this comment. -->
28
+
29
+ #
30
+
31
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the beans dataset.
32
+ It achieves the following results on the evaluation set:
33
+ - Loss: 0.0666
34
+ - Accuracy: 0.9774
35
+
36
+ ## Model description
37
+
38
+ More information needed
39
+
40
+ ## Intended uses & limitations
41
+
42
+ More information needed
43
+
44
+ ## Training and evaluation data
45
+
46
+ More information needed
47
+
48
+ ## Training procedure
49
+
50
+ ### Training hyperparameters
51
+
52
+ The following hyperparameters were used during training:
53
+ - learning_rate: 0.0002
54
+ - train_batch_size: 16
55
+ - eval_batch_size: 8
56
+ - seed: 42
57
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
+ - lr_scheduler_type: linear
59
+ - num_epochs: 4
60
+
61
+ ### Training results
62
+
63
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
64
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
65
+ | 0.0742 | 1.54 | 100 | 0.0806 | 0.9774 |
66
+ | 0.0185 | 3.08 | 200 | 0.0666 | 0.9774 |
67
+
68
+
69
+ ### Framework versions
70
+
71
+ - Transformers 4.14.1
72
+ - Pytorch 1.10.0
73
+ - Datasets 2.0.0
74
+ - Tokenizers 0.10.3
code/activation_functions.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ https://stats.stackexchange.com/questions/11859/what-is-the-difference-between-multiclass-and-multilabel-problem
2
+
3
+ https://machinelearningmastery.com/choose-an-activation-function-for-deep-learning/
4
+
5
+ https://web.stanford.edu/~nanbhas/blog/sigmoid-softmax/
6
+
7
+
8
+ Binary Classification: One node, sigmoid activation.
9
+ Multiclass Classification: One node per class, softmax activation.
10
+ Multilabel Classification: One node per class, sigmoid activation.
11
+
12
+ Multi-class vs Binary-class is the question of the number of classes your classifier is modeling. In theory, a binary classifier is much simpler than multi-class, so it's important to make this distinction. For example, the Support vector machine (SVM) can trivially learn a hyperplane to separate two classes, but 3 or more classes makes it complex. In the neural networks, we commonly use Sigmoid for binary, but Softmax for multi-class as the last layer of the model.
13
+
14
+ Multi-label vs Single-Label is the question of how many classes any object or example can belong to. In the neural networks, if single label is needed we use a single Softmax layer as the last layer, thus learning a single probability distribution that spans across all classes. If the multi-label classification is needed, we use multiple Sigmoids on the last layer, thus learning separate distribution for each class.
15
+
16
+ # Cross-Entropy or Log Likelihood in Output Layer (StackExchange)
17
+
18
+ *negative log likelihood* is also known as multi class cross-entropy
19
+
20
+
21
+ ### all of the normal loss functions and their applications in PyTorch
22
+
23
+ https://neptune.ai/blog/pytorch-loss-functions
24
+
25
+
26
+ https://medium.com/deeplearningmadeeasy/negative-log-likelihood-6bd79b55d8b6
27
+
28
+ It’s a cost function that is used as loss for machine learning models, telling us how bad it’s performing, the lower the better.
29
+
30
+ I’m going to explain it word by word, hopefully that will make it. easier to understand.
31
+
32
+ Negative: obviously means multiplying by -1. What? The loss of our model. Most machine learning frameworks only have minimization optimizations, but we want to maximize the probability of choosing the correct category.
33
+
34
+ We can **maximize by minimizing the negative log likelihood,** there you have it, we want somehow to maximize by minimizing.
35
+
36
+ Also it’s much easier to reason about the loss this way, to be consistent with the rule of loss functions approaching 0 as the model gets better.
37
+
38
+
39
+ cross entropy loss is same as negative log likelihood
40
+
41
+
42
+ NLL uses a negative connotation since the probabilities (or likelihoods) vary between zero and one, and the logarithms of values in this range are negative. In the end, the loss value becomes positive.
43
+
44
+
45
+
code/all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.9774436090225563,
4
+ "eval_loss": 0.10330713540315628,
5
+ "eval_runtime": 10.9904,
6
+ "eval_samples_per_second": 12.101,
7
+ "eval_steps_per_second": 1.547,
8
+ "train_loss": 0.3343511177943303,
9
+ "train_runtime": 232.7119,
10
+ "train_samples_per_second": 4.443,
11
+ "train_steps_per_second": 0.279
12
+ }
code/classification.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Logistic Regression
2
+
3
+ References:
4
+ https://www.kdnuggets.com/2020/01/guide-precision-recall-confusion-matrix.html
5
+ https://developers.google.com/machine-learning/crash-course/classification/thresholding
6
+ https://machinelearningmastery.com/roc-curves-and-precision-recall-curves-for-classification-in-python/
7
+
8
+ https://developers.google.com/machine-learning/crash-course/logistic-regression/model-training
9
+
10
+ ### Thresholding
11
+
12
+ A logistic regression model that returns 0.9995 for a particular email message is predicting that it is very likely to be spam. Conversely, another email message with a prediction score of 0.0003 on that same logistic regression model is very likely not spam. However, what about an email message with a prediction score of 0.6? In order to map a logistic regression value to a binary category, you must define a classification threshold (also called the decision threshold). A value above that threshold indicates "spam"; a value below indicates "not spam." It is tempting to assume that the classification threshold should always be 0.5, but thresholds are problem-dependent, and are therefore values that you must tune.
13
+
14
+ ### Accuracy
15
+
16
+ $
17
+ Accuracy = Total correct / total predictions
18
+ $
19
+
20
+ Using the Confusion Matrix values
21
+
22
+ $
23
+ Accuracy = TP + TN / TP + FP + TN + FN
24
+ $
25
+
26
+ Accuracy alone doesn't tell the full story when you're working with a **class-imbalanced data** set, like this one, where there is a significant disparity between the number of positive and negative labels.
27
+
28
+ ### Precision
29
+
30
+ Precision — Also called Positive predictive value
31
+ The ratio of correct positive predictions to the *total predicted positives.*
32
+
33
+ $
34
+ Precision = \frac{TP}{TP + FP}
35
+ $
36
+
37
+
38
+ ### Recall
39
+
40
+ Recall — Also called Sensitivity, Probability of Detection, True Positive Rate
41
+
42
+ The ratio of correct positive predictions to the *total positives examples.*
43
+
44
+ $
45
+ Recall = \frac{TP}{TP + FN}
46
+ $
47
+
48
+ ### ROC & AUC
49
+
50
+ * ROC Curves summarize the trade-off between the true positive rate and false positive rate for a predictive model using different probability thresholds.
51
+
52
+ * Precision-Recall curves summarize the trade-off between the true positive rate and the positive predictive value for a predictive model using different probability thresholds.
53
+
54
+ * ROC curves are appropriate when the observations are balanced between each class, whereas precision-recall curves are appropriate for imbalanced datasets.
55
+
56
+
57
+ ### sklearn functions
58
+
59
+ https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
60
+ https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_recall_curve.html
61
+
62
+
63
+ ### What about using XGBoost for classification?
64
+
65
+ We can still use XGBoost but logistic regression is linear and XGBoost is *not* linear.
66
+
67
+ For example we can see here that we are drawing linear boundaries between classifications in the iris dataset.
68
+
69
+ https://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html#sphx-glr-auto-examples-linear-model-plot-iris-logistic-py
70
+
71
+
72
+ ### What about difference between SVM and logistic regression?
73
+
74
+
75
+ ### Logistic Regression Loss function
76
+
77
+ **This always trips me up because some people call it *log loss* or cross entropy or logits or something else!**
78
+
79
+ The loss function for linear regression is squared loss. The loss function for logistic regression is Log Loss, which is defined as follows:
80
+ $
81
+ put formula in here later
82
+ $
83
+
84
+ is the data set containing many labeled examples, which are
85
+ pairs.
86
+ is the label in a labeled example. Since this is logistic regression, every value of
87
+ must either be 0 or 1.
88
+ is the predicted value (somewhere between 0 and 1), given the set of features in .
code/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.0,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "angular_leaf_spot",
12
+ "1": "bean_rust",
13
+ "2": "healthy"
14
+ },
15
+ "image_size": 224,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "angular_leaf_spot": "0",
20
+ "bean_rust": "1",
21
+ "healthy": "2"
22
+ },
23
+ "layer_norm_eps": 1e-12,
24
+ "model_type": "vit",
25
+ "num_attention_heads": 12,
26
+ "num_channels": 3,
27
+ "num_hidden_layers": 12,
28
+ "patch_size": 16,
29
+ "qkv_bias": true,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.14.1"
32
+ }
code/cracks.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from autodistill_grounded_sam import GroundedSAM
2
+ from autodistill.detection import CaptionOntology
3
+ from autodistill.utils import plot
4
+ import cv2
5
+
6
+ # define an ontology to map class names to our OWLv2 prompt
7
+ # the ontology dictionary has the format {caption: class}
8
+ # where caption is the prompt sent to the base model, and class is the label that will
9
+ # be saved for that caption in the generated annotations
10
+ # then, load the model
11
+ classes = ["crack"]
12
+
13
+ base_model = GroundedSAM(ontology=CaptionOntology({"crack": "crack"}))
14
+
15
+ results = base_model.predict("crack.png")
16
+
17
+ image = cv2.imread("crack.png")
18
+
19
+ # Print image properties
20
+ print("Image shape:", image.shape) # Shows (height, width, channels)
21
+ print("Image dtype:", image.dtype) # Shows data type of the image array
22
+ print("Image size:", image.size) # Shows total number of pixels * channels
23
+
24
+
25
+ plot(
26
+ image=image,
27
+ detections=results,
28
+ classes=[classes[i] for i in results.class_id],
29
+ )
30
+
31
+
32
+ print(results)
code/creditcard.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76274b691b16a6c49d3f159c883398e03ccd6d1ee12d9d8ee38f4b4b98551a89
3
+ size 150828752
code/cs224N_pytorch_tutorial_nlp.ipynb ADDED
@@ -0,0 +1,981 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import torch \n",
10
+ "import torch.nn as nn\n",
11
+ "\n",
12
+ "import pprint \n",
13
+ "pp = pprint.PrettyPrinter()\n"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": null,
19
+ "metadata": {},
20
+ "outputs": [],
21
+ "source": []
22
+ },
23
+ {
24
+ "cell_type": "markdown",
25
+ "metadata": {},
26
+ "source": [
27
+ "Tensors "
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 4,
33
+ "metadata": {},
34
+ "outputs": [
35
+ {
36
+ "name": "stdout",
37
+ "output_type": "stream",
38
+ "text": [
39
+ "[[1, 2, 3], [4, 5, 6]]\n"
40
+ ]
41
+ }
42
+ ],
43
+ "source": [
44
+ "list_of_lists = [[1 ,2, 3], [4,5,6]]\n",
45
+ "print(list_of_lists)"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": 7,
51
+ "metadata": {},
52
+ "outputs": [
53
+ {
54
+ "data": {
55
+ "text/plain": [
56
+ "tensor([[0, 1],\n",
57
+ " [2, 3],\n",
58
+ " [4, 5]])"
59
+ ]
60
+ },
61
+ "execution_count": 7,
62
+ "metadata": {},
63
+ "output_type": "execute_result"
64
+ }
65
+ ],
66
+ "source": [
67
+ "#init a tensor\n",
68
+ "data = torch.tensor([\n",
69
+ " [0,1],\n",
70
+ " [2,3],\n",
71
+ " [4,5]\n",
72
+ "])\n",
73
+ "data"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 10,
79
+ "metadata": {},
80
+ "outputs": [
81
+ {
82
+ "data": {
83
+ "text/plain": [
84
+ "tensor([[0., 1.],\n",
85
+ " [2., 3.],\n",
86
+ " [4., 5.]])"
87
+ ]
88
+ },
89
+ "execution_count": 10,
90
+ "metadata": {},
91
+ "output_type": "execute_result"
92
+ }
93
+ ],
94
+ "source": [
95
+ "# we can cast them to different types -- usualloy float32 or int\n",
96
+ "data = torch.tensor([\n",
97
+ " [0,1],\n",
98
+ " [2,3],\n",
99
+ " [4,5],\n",
100
+ "] , dtype=torch.float32)\n",
101
+ "data"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": 11,
107
+ "metadata": {},
108
+ "outputs": [
109
+ {
110
+ "data": {
111
+ "text/plain": [
112
+ "torch.Size([3, 2])"
113
+ ]
114
+ },
115
+ "execution_count": 11,
116
+ "metadata": {},
117
+ "output_type": "execute_result"
118
+ }
119
+ ],
120
+ "source": [
121
+ "data.shape"
122
+ ]
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "execution_count": 24,
127
+ "metadata": {},
128
+ "outputs": [],
129
+ "source": [
130
+ "data2 = torch.tensor([\n",
131
+ " [1, 2, 3],\n",
132
+ " [4, 5, 6]\n",
133
+ "], dtype=torch.float32)\n",
134
+ "\n",
135
+ "# must be same type \n",
136
+ "data3 = data @ data2"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": 16,
142
+ "metadata": {},
143
+ "outputs": [
144
+ {
145
+ "data": {
146
+ "text/plain": [
147
+ "tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])"
148
+ ]
149
+ },
150
+ "execution_count": 16,
151
+ "metadata": {},
152
+ "output_type": "execute_result"
153
+ }
154
+ ],
155
+ "source": [
156
+ "rr = torch.arange(1, 10)\n",
157
+ "rr"
158
+ ]
159
+ },
160
+ {
161
+ "cell_type": "code",
162
+ "execution_count": 22,
163
+ "metadata": {},
164
+ "outputs": [
165
+ {
166
+ "name": "stdout",
167
+ "output_type": "stream",
168
+ "text": [
169
+ "tensor([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])\n",
170
+ "tensor([[ 1, 2, 3],\n",
171
+ " [ 4, 5, 6],\n",
172
+ " [ 7, 8, 9],\n",
173
+ " [10, 11, 12],\n",
174
+ " [13, 14, 15]])\n",
175
+ "torch.Size([5, 3])\n"
176
+ ]
177
+ }
178
+ ],
179
+ "source": [
180
+ "# can reshape a vector into a matrix! \n",
181
+ "# evidently this can make batch operations easier \n",
182
+ "# e.x. take a 15 element row vector and turn it into a 5 x 3 matrix \n",
183
+ "vec = torch.arange(1,16)\n",
184
+ "print(vec)\n",
185
+ "matrix = vec.view(5,3)\n",
186
+ "print(matrix)\n",
187
+ "print(matrix.shape)"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": 23,
193
+ "metadata": {},
194
+ "outputs": [],
195
+ "source": [
196
+ "# one of the reason we use tensors is vectorized operations: \n",
197
+ "# operations that be conducted in parallel over a paarticular dimension of a tensor \n"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": 28,
203
+ "metadata": {},
204
+ "outputs": [
205
+ {
206
+ "data": {
207
+ "text/plain": [
208
+ "tensor([42., 57., 72.])"
209
+ ]
210
+ },
211
+ "execution_count": 28,
212
+ "metadata": {},
213
+ "output_type": "execute_result"
214
+ }
215
+ ],
216
+ "source": [
217
+ "# things we can do like sum, mean, and std dev \n",
218
+ "\n",
219
+ "# https://stackoverflow.com/questions/55691819/why-does-dim-1-return-row-indices-in-torch-argmax\n",
220
+ "\n",
221
+ "# dim=1 is ROWS \n",
222
+ "data3.mean(dim=1)\n",
223
+ "data3.sum(dim=1)\n",
224
+ "\n",
225
+ "# dim=0 is COLUMNS\n",
226
+ "data3.mean(dim=0)\n",
227
+ "data3.sum(dim=0)\n",
228
+ "\n",
229
+ "# look at the official pytorch tutorial for a good example of all the way you can slice a tensor\n"
230
+ ]
231
+ },
232
+ {
233
+ "cell_type": "markdown",
234
+ "metadata": {},
235
+ "source": [
236
+ "Autograd \n",
237
+ "\n",
238
+ "Pytorch is known for it's auto differentation feature.. We can call the `backward()` method to ask Pytorch to calculate the gradients which are then stored in the `grad` attribute"
239
+ ]
240
+ },
241
+ {
242
+ "cell_type": "code",
243
+ "execution_count": 33,
244
+ "metadata": {},
245
+ "outputs": [
246
+ {
247
+ "name": "stdout",
248
+ "output_type": "stream",
249
+ "text": [
250
+ "None\n"
251
+ ]
252
+ }
253
+ ],
254
+ "source": [
255
+ "x = torch.tensor([2], dtype=torch.float32, requires_grad=True)\n",
256
+ "\n",
257
+ "pp.pprint(x.grad)"
258
+ ]
259
+ },
260
+ {
261
+ "cell_type": "code",
262
+ "execution_count": 53,
263
+ "metadata": {},
264
+ "outputs": [
265
+ {
266
+ "name": "stdout",
267
+ "output_type": "stream",
268
+ "text": [
269
+ "<class 'torch.Tensor'>\n"
270
+ ]
271
+ },
272
+ {
273
+ "data": {
274
+ "text/plain": [
275
+ "tensor([84.])"
276
+ ]
277
+ },
278
+ "execution_count": 53,
279
+ "metadata": {},
280
+ "output_type": "execute_result"
281
+ }
282
+ ],
283
+ "source": [
284
+ "y = 3 * x * x \n",
285
+ "# z = 3 * x * x\n",
286
+ "print(type(y))\n",
287
+ "y.backward()\n",
288
+ "# if we just want to manually reset the grad instead of using an optimizer \n",
289
+ "# we can just do it manually like this... \n",
290
+ "# x.grad = torch.tensor([0.])\n",
291
+ "# x.zero_grad()\n",
292
+ "x.grad"
293
+ ]
294
+ },
295
+ {
296
+ "cell_type": "markdown",
297
+ "metadata": {},
298
+ "source": [
299
+ "# Neural Network Module\n",
300
+ "\n",
301
+ "### Linear Layer \n",
302
+ "\n",
303
+ "we can use `nn.Linear(H_in, H_out)` to create a linear layer. This will take a matrix of (N, *, H_in) dimensions and ouptput a matrix of (N, *, H_out). the `*` denotes that there could be arbitrary number of dimensions in between. The linear layer performs the operation `Ax+b`., whewr e`A` and `b` are intialized randomly. If we don't wnat the linear layer to learn the bias parameters, we can intialize our layer with `bias=False`\n"
304
+ ]
305
+ },
306
+ {
307
+ "cell_type": "code",
308
+ "execution_count": 88,
309
+ "metadata": {},
310
+ "outputs": [
311
+ {
312
+ "data": {
313
+ "text/plain": [
314
+ "tensor([[[ 0.1580, -0.3979],\n",
315
+ " [ 0.1580, -0.3979],\n",
316
+ " [ 0.1580, -0.3979]],\n",
317
+ "\n",
318
+ " [[ 0.1580, -0.3979],\n",
319
+ " [ 0.1580, -0.3979],\n",
320
+ " [ 0.1580, -0.3979]]], grad_fn=<AddBackward0>)"
321
+ ]
322
+ },
323
+ "execution_count": 88,
324
+ "metadata": {},
325
+ "output_type": "execute_result"
326
+ }
327
+ ],
328
+ "source": [
329
+ "import torch.nn as nn\n",
330
+ "\n",
331
+ "# create the inputs \n",
332
+ "# two, 3x4 matrices of ones. \n",
333
+ "# 3 rows by 4 columns \n",
334
+ "inputs = torch.ones(2,3,4)\n",
335
+ "# print(inputs)\n",
336
+ "\n",
337
+ "# make a linear layers transforming N,*,H_in dimensional inputs to N,*,H_out dimensional outputs \n",
338
+ "linear = nn.Linear(4,2)\n",
339
+ "linear_output = linear(inputs)\n",
340
+ "linear_output.shape\n",
341
+ "linear_output"
342
+ ]
343
+ },
344
+ {
345
+ "cell_type": "markdown",
346
+ "metadata": {},
347
+ "source": [
348
+ "We get different values in the tensor each time we run it because `Linear()` randomly initializes the function `Ax + b` to different values each time!"
349
+ ]
350
+ },
351
+ {
352
+ "cell_type": "code",
353
+ "execution_count": 90,
354
+ "metadata": {},
355
+ "outputs": [
356
+ {
357
+ "data": {
358
+ "text/plain": [
359
+ "[Parameter containing:\n",
360
+ " tensor([[-0.2625, -0.1494, 0.0883, 0.1954],\n",
361
+ " [ 0.0474, 0.1861, 0.1699, -0.3701]], requires_grad=True),\n",
362
+ " Parameter containing:\n",
363
+ " tensor([ 0.2862, -0.4312], requires_grad=True)]"
364
+ ]
365
+ },
366
+ "execution_count": 90,
367
+ "metadata": {},
368
+ "output_type": "execute_result"
369
+ }
370
+ ],
371
+ "source": [
372
+ "list(linear.parameters())"
373
+ ]
374
+ },
375
+ {
376
+ "cell_type": "markdown",
377
+ "metadata": {},
378
+ "source": [
379
+ "# Activation Function Layer\n",
380
+ "\n",
381
+ "we can also use `nn` module to apply activations function to our tensors. Activation functions are used to add non-linearity to our network. Some examples of activations functions are `nn.ReLU()`, `nn.Sigmoid()` and `nn.LeakyReLU()`. Activation functions operate on each elementate seperatel, so sthe sahpe of the tensorwe get as an output ar ethe same as the ones we pass in."
382
+ ]
383
+ },
384
+ {
385
+ "cell_type": "code",
386
+ "execution_count": 92,
387
+ "metadata": {},
388
+ "outputs": [
389
+ {
390
+ "data": {
391
+ "text/plain": [
392
+ "tensor([[[0.5394, 0.4018],\n",
393
+ " [0.5394, 0.4018],\n",
394
+ " [0.5394, 0.4018]],\n",
395
+ "\n",
396
+ " [[0.5394, 0.4018],\n",
397
+ " [0.5394, 0.4018],\n",
398
+ " [0.5394, 0.4018]]], grad_fn=<SigmoidBackward0>)"
399
+ ]
400
+ },
401
+ "execution_count": 92,
402
+ "metadata": {},
403
+ "output_type": "execute_result"
404
+ }
405
+ ],
406
+ "source": [
407
+ "sigmoid = nn.Sigmoid()\n",
408
+ "\n",
409
+ "output = sigmoid(linear_output)\n",
410
+ "output\n"
411
+ ]
412
+ },
413
+ {
414
+ "cell_type": "markdown",
415
+ "metadata": {},
416
+ "source": [
417
+ "# Putting the Layers Together\n",
418
+ "\n",
419
+ "So far we have just created lyaers and pass the output of one as the input of the next. Instead of creating intermediate tensors and passing them around, we can use `nn.Sequeuntial` which puts layers together sequentially"
420
+ ]
421
+ },
422
+ {
423
+ "cell_type": "code",
424
+ "execution_count": 101,
425
+ "metadata": {},
426
+ "outputs": [],
427
+ "source": [
428
+ "block = nn.Sequential(\n",
429
+ " nn.Linear(4,2),\n",
430
+ " nn.Linear(2,1),\n",
431
+ " nn.Sigmoid(),\n",
432
+ ")"
433
+ ]
434
+ },
435
+ {
436
+ "cell_type": "code",
437
+ "execution_count": 102,
438
+ "metadata": {},
439
+ "outputs": [
440
+ {
441
+ "data": {
442
+ "text/plain": [
443
+ "tensor([[[0.4056],\n",
444
+ " [0.4056],\n",
445
+ " [0.4056]],\n",
446
+ "\n",
447
+ " [[0.4056],\n",
448
+ " [0.4056],\n",
449
+ " [0.4056]]], grad_fn=<SigmoidBackward0>)"
450
+ ]
451
+ },
452
+ "execution_count": 102,
453
+ "metadata": {},
454
+ "output_type": "execute_result"
455
+ }
456
+ ],
457
+ "source": [
458
+ "input = torch.ones(2,3,4)\n",
459
+ "output = block(input)\n",
460
+ "output"
461
+ ]
462
+ },
463
+ {
464
+ "cell_type": "markdown",
465
+ "metadata": {},
466
+ "source": [
467
+ "# Custom Modules \n",
468
+ "\n",
469
+ "Instead of using the predefined modules, we can also build our own by extending the `nn.Module` class. For example , we can build the `nn.Linear` on our own using te tensor introduced earlier! We can also build new, more complex modules, such a s a custom neural network. \n",
470
+ "\n",
471
+ "To create a custom module, the first thing we have to do is to extend the `nn.Module` . We can then initialize our parameters in the `__init__` function, staring with a call to the `__init__` function of the super class. Al the class attributes we define which are `nn` module objects are treated as parameters , which can then learned during the training. Tensors are not parameters , but they can be turned into parameters if they are wrapp in `nn.Parameters` class \n",
472
+ "\n",
473
+ "All classes extending the `nn.Module` are also expected to implement a `forward(x)` function, where `x` is the tensor. this is the function that is called when a parameters is passed to our modul, for example `modelFooBarBaz(x)`"
474
+ ]
475
+ },
476
+ {
477
+ "cell_type": "code",
478
+ "execution_count": 107,
479
+ "metadata": {},
480
+ "outputs": [],
481
+ "source": [
482
+ "class MultiLayerPerceptron(nn.Module):\n",
483
+ " def __init__(self, input_size, hidden_size):\n",
484
+ " # Call to the super() class \n",
485
+ " super(MultiLayerPerceptron, self).__init__()\n",
486
+ "\n",
487
+ " # saving initialization params\n",
488
+ " self.input_size = input_size \n",
489
+ " self.hidden_size = hidden_size\n",
490
+ "\n",
491
+ " #define our model \n",
492
+ " # doesn't have to be named model \n",
493
+ " self.model = nn.Sequential(\n",
494
+ " nn.Linear(self.input_size, self.hidden_size),\n",
495
+ " nn.ReLU(),\n",
496
+ " nn.Linear(self.hidden_size, self.input_size),\n",
497
+ " nn.Sigmoid()\n",
498
+ " )\n",
499
+ "\n",
500
+ " def forward(self, x):\n",
501
+ " output = self.model(x)\n",
502
+ " return output\n",
503
+ "\n",
504
+ " \n",
505
+ "\n"
506
+ ]
507
+ },
508
+ {
509
+ "cell_type": "code",
510
+ "execution_count": 108,
511
+ "metadata": {},
512
+ "outputs": [
513
+ {
514
+ "data": {
515
+ "text/plain": [
516
+ "tensor([[0.3950, 0.4821, 0.3842, 0.5562, 0.4945],\n",
517
+ " [0.4746, 0.5936, 0.3286, 0.4435, 0.3822]], grad_fn=<SigmoidBackward0>)"
518
+ ]
519
+ },
520
+ "execution_count": 108,
521
+ "metadata": {},
522
+ "output_type": "execute_result"
523
+ }
524
+ ],
525
+ "source": [
526
+ "# now lets instantiate it and see what it does. \n",
527
+ "\n",
528
+ "input = torch.randn(2,5)\n",
529
+ "\n",
530
+ "multi_layer_perceptron = MultiLayerPerceptron(5,3)\n",
531
+ "\n",
532
+ "#pass our input through our model \n",
533
+ "multi_layer_perceptron(input)\n"
534
+ ]
535
+ },
536
+ {
537
+ "cell_type": "code",
538
+ "execution_count": 109,
539
+ "metadata": {},
540
+ "outputs": [
541
+ {
542
+ "data": {
543
+ "text/plain": [
544
+ "[('model.0.weight',\n",
545
+ " Parameter containing:\n",
546
+ " tensor([[-0.3665, -0.0611, 0.4206, -0.1927, -0.2036],\n",
547
+ " [-0.3317, -0.4113, -0.1653, -0.2854, -0.1773],\n",
548
+ " [-0.4202, 0.1413, 0.0547, -0.0287, -0.4163]], requires_grad=True)),\n",
549
+ " ('model.0.bias',\n",
550
+ " Parameter containing:\n",
551
+ " tensor([ 0.4075, -0.2855, -0.0500], requires_grad=True)),\n",
552
+ " ('model.2.weight',\n",
553
+ " Parameter containing:\n",
554
+ " tensor([[ 0.1899, 0.2779, 0.1600],\n",
555
+ " [ 0.1802, 0.2193, 0.5014],\n",
556
+ " [-0.2587, 0.1474, 0.2716],\n",
557
+ " [-0.3371, 0.4262, 0.0181],\n",
558
+ " [-0.4094, 0.2103, 0.2476]], requires_grad=True)),\n",
559
+ " ('model.2.bias',\n",
560
+ " Parameter containing:\n",
561
+ " tensor([-0.4264, -0.0715, -0.4720, 0.2259, -0.0221], requires_grad=True))]"
562
+ ]
563
+ },
564
+ "execution_count": 109,
565
+ "metadata": {},
566
+ "output_type": "execute_result"
567
+ }
568
+ ],
569
+ "source": [
570
+ "# we can inspect the parameters of our model with `named_parameters()` and `parameters()`\n",
571
+ "list(multi_layer_perceptron.named_parameters())"
572
+ ]
573
+ },
574
+ {
575
+ "cell_type": "code",
576
+ "execution_count": 116,
577
+ "metadata": {},
578
+ "outputs": [
579
+ {
580
+ "data": {
581
+ "text/plain": [
582
+ "[Parameter containing:\n",
583
+ " tensor([[-0.1681, 0.3615, -0.1085, 0.2136, -0.3068],\n",
584
+ " [ 0.1105, -0.0750, -0.4361, 0.4284, 0.3173],\n",
585
+ " [ 0.1914, 0.1469, 0.4325, -0.1647, -0.3230]], requires_grad=True),\n",
586
+ " Parameter containing:\n",
587
+ " tensor([-0.0533, -0.1809, -0.3992], requires_grad=True),\n",
588
+ " Parameter containing:\n",
589
+ " tensor([[ 0.5454, 0.0090, -0.0701],\n",
590
+ " [ 0.4629, -0.2518, -0.4816],\n",
591
+ " [ 0.0641, 0.3194, 0.4202],\n",
592
+ " [ 0.5256, 0.1806, -0.4929],\n",
593
+ " [ 0.0383, 0.4789, -0.3110]], requires_grad=True),\n",
594
+ " Parameter containing:\n",
595
+ " tensor([-0.0959, 0.4256, -0.3175, -0.5408, -0.3457], requires_grad=True)]"
596
+ ]
597
+ },
598
+ "execution_count": 116,
599
+ "metadata": {},
600
+ "output_type": "execute_result"
601
+ }
602
+ ],
603
+ "source": [
604
+ "list(multi_layer_perceptron.parameters())"
605
+ ]
606
+ },
607
+ {
608
+ "cell_type": "markdown",
609
+ "metadata": {},
610
+ "source": [
611
+ "# Optimization \n",
612
+ "\n",
613
+ "We have showed how gradients are calculated with the `backward()` function. Having the gradients isn't enough for our models to learn. We also need to know how to update the parameters of our models. this is where the optimizers comes in. `torch.optim` module contains several optimizers that we can use. Some popular examples are `optim.SGD` and `optim.Adam`. When initalizing optimizers, we pass our model parameters which can be accessed with `model.parameters()`, tellin g the optimizers which values it will optimizing. Optimizers also have a learning rate `lr` parameters, which determines how big of a n update will be made in every step. Different optimizers have different hypterparameters as well. "
614
+ ]
615
+ },
616
+ {
617
+ "cell_type": "markdown",
618
+ "metadata": {},
619
+ "source": [
620
+ "after we have our optimization function, we can defien a loss that we want to optimize for. We can either define the loss ourselve, or use one of th epredefined loss function in `pyTorch`, such as `nn.BCELoss()`. Let's put everything together now! We will start by creating some dummy data"
621
+ ]
622
+ },
623
+ {
624
+ "cell_type": "code",
625
+ "execution_count": 112,
626
+ "metadata": {},
627
+ "outputs": [
628
+ {
629
+ "data": {
630
+ "text/plain": [
631
+ "tensor([[1.6229, 1.6178, 1.8535, 1.6113, 1.0269],\n",
632
+ " [1.2863, 1.9466, 1.5752, 1.0483, 1.0441],\n",
633
+ " [1.9156, 1.2305, 1.4923, 1.9213, 1.9353],\n",
634
+ " [1.1242, 1.0729, 1.3030, 1.0789, 1.0008],\n",
635
+ " [1.9523, 1.0653, 1.9383, 1.7856, 1.3598],\n",
636
+ " [1.2001, 1.4937, 1.9633, 1.7623, 1.8388],\n",
637
+ " [1.9458, 1.6999, 1.5424, 1.1136, 1.2931],\n",
638
+ " [1.3253, 1.6970, 1.1702, 1.5542, 1.9958],\n",
639
+ " [1.4209, 1.7721, 1.8824, 1.5063, 1.6478],\n",
640
+ " [1.8896, 1.3686, 1.0754, 1.1028, 1.7788]])"
641
+ ]
642
+ },
643
+ "execution_count": 112,
644
+ "metadata": {},
645
+ "output_type": "execute_result"
646
+ }
647
+ ],
648
+ "source": [
649
+ "import torch.optim as optim \n",
650
+ "\n",
651
+ "# create label/y/output data \n",
652
+ "y = torch.ones(10, 5)\n",
653
+ "\n",
654
+ "# add some noise to our y to geenrate our x \n",
655
+ "x = y + torch.rand_like(y)\n",
656
+ "x\n"
657
+ ]
658
+ },
659
+ {
660
+ "cell_type": "markdown",
661
+ "metadata": {},
662
+ "source": [
663
+ "# Define our \n",
664
+ "1. model \n",
665
+ "2. optimizer \n",
666
+ "3. loss function (to optimize)"
667
+ ]
668
+ },
669
+ {
670
+ "cell_type": "code",
671
+ "execution_count": 114,
672
+ "metadata": {},
673
+ "outputs": [
674
+ {
675
+ "name": "stdout",
676
+ "output_type": "stream",
677
+ "text": [
678
+ "<class 'torch.optim.adam.Adam'>\n"
679
+ ]
680
+ },
681
+ {
682
+ "data": {
683
+ "text/plain": [
684
+ "0.7805761098861694"
685
+ ]
686
+ },
687
+ "execution_count": 114,
688
+ "metadata": {},
689
+ "output_type": "execute_result"
690
+ }
691
+ ],
692
+ "source": [
693
+ "multi_layer_perceptron = MultiLayerPerceptron(5,3)\n",
694
+ "\n",
695
+ "# optimizer \n",
696
+ "adam = optim.Adam(multi_layer_perceptron.parameters(), lr=1e-1)\n",
697
+ "print(type(adam))\n",
698
+ "\n",
699
+ "# loss function \n",
700
+ "loss_function = nn.BCELoss()\n",
701
+ "\n",
702
+ "# calculate how our model is doing now \n",
703
+ "y_pred = multi_layer_perceptron(x)\n",
704
+ "loss_function(y_pred, y).item()"
705
+ ]
706
+ },
707
+ {
708
+ "cell_type": "markdown",
709
+ "metadata": {},
710
+ "source": [
711
+ "Let's see if we can have our model achieve a smaller loss. Now that we have everything we need, we can setup our training loop. "
712
+ ]
713
+ },
714
+ {
715
+ "cell_type": "code",
716
+ "execution_count": 117,
717
+ "metadata": {},
718
+ "outputs": [
719
+ {
720
+ "name": "stdout",
721
+ "output_type": "stream",
722
+ "text": [
723
+ "Epoch 0: training loss: 0.7805761098861694\n",
724
+ "Epoch 1: training loss: 0.465933233499527\n",
725
+ "Epoch 2: training loss: 0.21142368018627167\n",
726
+ "Epoch 3: training loss: 0.06858699023723602\n",
727
+ "Epoch 4: training loss: 0.018102366477251053\n",
728
+ "Epoch 5: training loss: 0.004522338043898344\n",
729
+ "Epoch 6: training loss: 0.0011653534602373838\n",
730
+ "Epoch 7: training loss: 0.00032139578252099454\n",
731
+ "Epoch 8: training loss: 9.614464215701446e-05\n",
732
+ "Epoch 9: training loss: 3.128984462819062e-05\n"
733
+ ]
734
+ }
735
+ ],
736
+ "source": [
737
+ "# set number of epoch which determines the number of training iterations. \n",
738
+ "n_epoch = 10 \n",
739
+ "\n",
740
+ "for epoch in range(n_epoch):\n",
741
+ " #set gradients to zero \n",
742
+ " adam.zero_grad()\n",
743
+ "\n",
744
+ " # get model predictions \n",
745
+ " y_pred = multi_layer_perceptron(x)\n",
746
+ "\n",
747
+ "\n",
748
+ " # calculate the loss \n",
749
+ " loss = loss_function(y_pred, y)\n",
750
+ "\n",
751
+ " # print stats\n",
752
+ " print(f\"Epoch {epoch}: training loss: {loss}\")\n",
753
+ "\n",
754
+ " #computer the gradients \n",
755
+ " loss.backward() \n",
756
+ "\n",
757
+ " # take a step to optimize the weights \n",
758
+ " adam.step()\n",
759
+ "\n",
760
+ " \n"
761
+ ]
762
+ },
763
+ {
764
+ "cell_type": "code",
765
+ "execution_count": 118,
766
+ "metadata": {},
767
+ "outputs": [
768
+ {
769
+ "data": {
770
+ "text/plain": [
771
+ "[Parameter containing:\n",
772
+ " tensor([[ 0.5215, 1.0621, 0.5877, 0.9016, 0.3710],\n",
773
+ " [ 0.8491, 0.6657, 0.3032, 1.1651, 1.0538],\n",
774
+ " [-0.2273, -0.2718, 0.0138, -0.5835, -0.7417]], requires_grad=True),\n",
775
+ " Parameter containing:\n",
776
+ " tensor([ 0.6412, 0.5603, -0.8180], requires_grad=True),\n",
777
+ " Parameter containing:\n",
778
+ " tensor([[ 1.2475, 0.7545, 0.3486],\n",
779
+ " [ 1.2015, 0.5252, -0.0629],\n",
780
+ " [ 0.7851, 1.0803, 0.8389],\n",
781
+ " [ 1.2163, 0.9155, -0.0741],\n",
782
+ " [ 0.7432, 1.2263, 0.1078]], requires_grad=True),\n",
783
+ " Parameter containing:\n",
784
+ " tensor([0.5621, 1.1223, 0.3672, 0.1077, 0.3128], requires_grad=True)]"
785
+ ]
786
+ },
787
+ "execution_count": 118,
788
+ "metadata": {},
789
+ "output_type": "execute_result"
790
+ }
791
+ ],
792
+ "source": [
793
+ "list(multi_layer_perceptron.parameters())"
794
+ ]
795
+ },
796
+ {
797
+ "cell_type": "markdown",
798
+ "metadata": {},
799
+ "source": [
800
+ "we can see that our loss is decreasing. Let's check the predictions of our model now and see if they are close to our original y of all ones."
801
+ ]
802
+ },
803
+ {
804
+ "cell_type": "code",
805
+ "execution_count": 120,
806
+ "metadata": {},
807
+ "outputs": [
808
+ {
809
+ "data": {
810
+ "text/plain": [
811
+ "tensor([[1.0000, 1.0000, 1.0000, 1.0000, 1.0000],\n",
812
+ " [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],\n",
813
+ " [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],\n",
814
+ " [0.9999, 0.9999, 0.9999, 1.0000, 0.9999],\n",
815
+ " [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],\n",
816
+ " [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],\n",
817
+ " [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],\n",
818
+ " [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],\n",
819
+ " [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],\n",
820
+ " [1.0000, 1.0000, 1.0000, 1.0000, 1.0000]], grad_fn=<SigmoidBackward0>)"
821
+ ]
822
+ },
823
+ "execution_count": 120,
824
+ "metadata": {},
825
+ "output_type": "execute_result"
826
+ }
827
+ ],
828
+ "source": [
829
+ "y_pred = multi_layer_perceptron(x)\n",
830
+ "y_pred"
831
+ ]
832
+ },
833
+ {
834
+ "cell_type": "markdown",
835
+ "metadata": {},
836
+ "source": [
837
+ "Looks our model almost perfectly learned to filter out the noise from the `x` that we passed in!"
838
+ ]
839
+ },
840
+ {
841
+ "cell_type": "markdown",
842
+ "metadata": {},
843
+ "source": [
844
+ "# NLP - Word Window Classification \n",
845
+ "\n",
846
+ "In this section, our goal will be to train a model that will find the words in a sentence corresponding to a `LOCATION`, which will be always of span `1`. (Meaning tha t`San Fransisco` won't be recognized a s a `LOCATION`). our task is called `Word Window Classification` for a reason. Instead of letting our model to only take a look at one word in each forward pass, we would like tot be able to consider the context of the word in question. That i, for each word, we want our model to be aware of the surrounding words. Let's dive in! "
847
+ ]
848
+ },
849
+ {
850
+ "cell_type": "code",
851
+ "execution_count": 121,
852
+ "metadata": {},
853
+ "outputs": [],
854
+ "source": [
855
+ "# Data \n",
856
+ "\n",
857
+ "# Our raw corpus. \n",
858
+ "corpus = [\n",
859
+ " \"We always come to Paris\", \n",
860
+ " \"The processor is from Australia\", \n",
861
+ " \"I live in Stanford\",\n",
862
+ " \"He comes from Taiwan\",\n",
863
+ " \"The capital of Turkey is Ankara\",\n",
864
+ "]"
865
+ ]
866
+ },
867
+ {
868
+ "cell_type": "markdown",
869
+ "metadata": {},
870
+ "source": [
871
+ "# Preprocessing\n",
872
+ "\n",
873
+ "To make it easier for our models to learn, we usually apply a few preprocessing steps to our data. This is especially important when dealing with text data. Here are some examples of text preprocessing: \n",
874
+ "\n",
875
+ "* Tokenization: turning sentence in token representation \n",
876
+ "* Lowercasing: convert all letters to lowercase \n",
877
+ "* Noise Removal: removing special characters (such as punctuations)\n",
878
+ "* Stop words removal: removing commonly used words \n",
879
+ "\n",
880
+ "Which preprocessing steps are necessary is determind by the task at hand. For example, although it is useful to remove special characers in some tasks, for other they may be important( for example, if we are dealing with ultiple langugaes). For our task, we will lowercase our words and tokenize. "
881
+ ]
882
+ },
883
+ {
884
+ "cell_type": "code",
885
+ "execution_count": 123,
886
+ "metadata": {},
887
+ "outputs": [
888
+ {
889
+ "data": {
890
+ "text/plain": [
891
+ "[['we', 'always', 'come', 'to', 'paris'],\n",
892
+ " ['the', 'processor', 'is', 'from', 'australia'],\n",
893
+ " ['i', 'live', 'in', 'stanford'],\n",
894
+ " ['he', 'comes', 'from', 'taiwan'],\n",
895
+ " ['the', 'capital', 'of', 'turkey', 'is', 'ankara']]"
896
+ ]
897
+ },
898
+ "execution_count": 123,
899
+ "metadata": {},
900
+ "output_type": "execute_result"
901
+ }
902
+ ],
903
+ "source": [
904
+ "def preprocess_sentence(sentence):\n",
905
+ " return sentence.lower().split()\n",
906
+ "\n",
907
+ "# create our training set \n",
908
+ "train_sentences = [preprocess_sentence(sentence) for sentence in corpus]\n",
909
+ "train_sentences"
910
+ ]
911
+ },
912
+ {
913
+ "cell_type": "markdown",
914
+ "metadata": {},
915
+ "source": [
916
+ "for each training example we have, we should also have a corresponding label. Recall that the goal of our model was to determine which words correspond to a `LOCATION`. That is, we want our model to output `0` for all the words that are not `LOCATION` and `1` for the ones that are `LOCATION`s. But we need to create a data for our output "
917
+ ]
918
+ },
919
+ {
920
+ "cell_type": "code",
921
+ "execution_count": 125,
922
+ "metadata": {},
923
+ "outputs": [
924
+ {
925
+ "data": {
926
+ "text/plain": [
927
+ "[[0, 0, 0, 0, 1],\n",
928
+ " [0, 0, 0, 0, 1],\n",
929
+ " [0, 0, 0, 1],\n",
930
+ " [0, 0, 0, 1],\n",
931
+ " [0, 0, 0, 1, 0, 1]]"
932
+ ]
933
+ },
934
+ "execution_count": 125,
935
+ "metadata": {},
936
+ "output_type": "execute_result"
937
+ }
938
+ ],
939
+ "source": [
940
+ "# set of lcoations tha tappear in our corpus \n",
941
+ "locations = set([\"australia\", \"ankara\", \"paris\", \"stanford\", \"taiwan\", \"turkey\"])\n",
942
+ "\n",
943
+ "# our train labels \n",
944
+ "train_labels = [[1 if word in locations else 0 for word in sentence] for sentence in train_sentences]\n",
945
+ "train_labels"
946
+ ]
947
+ },
948
+ {
949
+ "cell_type": "markdown",
950
+ "metadata": {},
951
+ "source": [
952
+ "# Converting Words to Embeddings \n"
953
+ ]
954
+ }
955
+ ],
956
+ "metadata": {
957
+ "interpreter": {
958
+ "hash": "b7e818f66e33c31ac0526ee7f8556503ff93918b8b22809241939dc19e90de0b"
959
+ },
960
+ "kernelspec": {
961
+ "display_name": "Python 3.8.12 64-bit ('pytorch_m1': conda)",
962
+ "language": "python",
963
+ "name": "python3"
964
+ },
965
+ "language_info": {
966
+ "codemirror_mode": {
967
+ "name": "ipython",
968
+ "version": 3
969
+ },
970
+ "file_extension": ".py",
971
+ "mimetype": "text/x-python",
972
+ "name": "python",
973
+ "nbconvert_exporter": "python",
974
+ "pygments_lexer": "ipython3",
975
+ "version": "3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:14) \n[Clang 12.0.1 ]"
976
+ },
977
+ "orig_nbformat": 4
978
+ },
979
+ "nbformat": 4,
980
+ "nbformat_minor": 2
981
+ }
code/cs229-scratchpad.ipynb ADDED
@@ -0,0 +1,812 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "ename": "ModuleNotFoundError",
10
+ "evalue": "No module named 'numpy'",
11
+ "output_type": "error",
12
+ "traceback": [
13
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
14
+ "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
15
+ "\u001b[1;32m/Users/johnnydevriese/projects/jupyter/cs229-scratchpad.ipynb Cell 1'\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/cs229-scratchpad.ipynb#ch0000000?line=0'>1</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnumpy\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mnp\u001b[39;00m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/cs229-scratchpad.ipynb#ch0000000?line=2'>3</a>\u001b[0m x \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39marray([\u001b[39m1\u001b[39m, \u001b[39m2\u001b[39m, \u001b[39m3\u001b[39m])\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/cs229-scratchpad.ipynb#ch0000000?line=3'>4</a>\u001b[0m y \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39marray([\u001b[39m4\u001b[39m, \u001b[39m5\u001b[39m, \u001b[39m6\u001b[39m])\n",
16
+ "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'numpy'"
17
+ ]
18
+ }
19
+ ],
20
+ "source": [
21
+ "import numpy as np\n",
22
+ "\n",
23
+ "x = np.array([1, 2, 3])\n",
24
+ "y = np.array([4, 5, 6])\n",
25
+ "\n",
26
+ "print(x.size)\n",
27
+ "k = 5\n",
28
+ "for k in range(k+1):\n",
29
+ " print(k)\n",
30
+ " foo = np.power(x[1], k)\n",
31
+ "\n",
32
+ "# todo: need to build a matrix \n",
33
+ "x[1] ** np.arange(k)\n",
34
+ "\n",
35
+ "\n",
36
+ "# polyn = np.array([1, np.power(x[0], )])"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": null,
42
+ "metadata": {},
43
+ "outputs": [],
44
+ "source": []
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 45,
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "name": "stdout",
53
+ "output_type": "stream",
54
+ "text": [
55
+ "[[8.41470985e-01 1.00000000e+00 1.00000000e+00 1.00000000e+00\n",
56
+ " 1.00000000e+00 1.00000000e+00 1.00000000e+00]\n",
57
+ " [9.09297427e-01 1.00000000e+00 2.00000000e+00 4.00000000e+00\n",
58
+ " 8.00000000e+00 1.60000000e+01 3.20000000e+01]\n",
59
+ " [1.41120008e-01 1.00000000e+00 3.00000000e+00 9.00000000e+00\n",
60
+ " 2.70000000e+01 8.10000000e+01 2.43000000e+02]]\n",
61
+ "(3, 7)\n"
62
+ ]
63
+ },
64
+ {
65
+ "data": {
66
+ "text/plain": [
67
+ "array([ 8.75909112, 15. , 32. , 78. ,\n",
68
+ " 206. , 570. , 1622. ])"
69
+ ]
70
+ },
71
+ "execution_count": 45,
72
+ "metadata": {},
73
+ "output_type": "execute_result"
74
+ }
75
+ ],
76
+ "source": [
77
+ "X = np.array([])\n",
78
+ "\n",
79
+ "# Build out our matrix X \n",
80
+ "for feature in x:\n",
81
+ " feature_vec = feature ** np.arange(k+1)\n",
82
+ " # print(feature_vec)\n",
83
+ " X = np.append(X, np.sin(feature))\n",
84
+ " X = np.append(X, feature_vec)\n",
85
+ "\n",
86
+ "X = X.reshape(3, k+2)\n",
87
+ "print(X)\n",
88
+ "print(X.shape)\n",
89
+ "\n",
90
+ "X.T @ y"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": null,
96
+ "metadata": {},
97
+ "outputs": [],
98
+ "source": []
99
+ },
100
+ {
101
+ "cell_type": "markdown",
102
+ "metadata": {},
103
+ "source": [
104
+ "# PS1: 3 Logistic Regression: Training Stability "
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": 32,
110
+ "metadata": {},
111
+ "outputs": [
112
+ {
113
+ "data": {
114
+ "text/plain": [
115
+ "[Text(0.5, 1.0, 'ds1_a.csv data')]"
116
+ ]
117
+ },
118
+ "execution_count": 32,
119
+ "metadata": {},
120
+ "output_type": "execute_result"
121
+ },
122
+ {
123
+ "data": {
124
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABKMElEQVR4nO3dd3hc1bXw4d+eUe+9d0sucrflbjoEMGADIWB6bxcCuSlfCAnkhtzkhnCTCwQSSugl2BA6xvRujDs2trEtV0lW773N/v44Y0kjjWyVqZr1Po8ePHvOzKxj5LPm7LK20lojhBDCd5ncHYAQQgj3kkQghBA+ThKBEEL4OEkEQgjh4yQRCCGEj5NEIIQQPk4SgRBC+DhJBMKrKKWeVkr9t7vjcAel1AGl1KnujkOMPZIIxJiklLpVKbVBKdWulHra3fG4mlJKK6Vy3R2H8A5+7g5ACCc5DPw3cDoQ7OZYhPBockcgPJpSaqZSapNSqlEptQII6vNcnFLqbaVUnVKqRin1hVLKBKC1flVr/TpQPYzPira+X6VSqtb657SjHD9ZKfWB9bPLlVJ3WtvnWu9GGqztf7W2r1ZK3drvPb5VSp0/yPtfrpQ6qJSqVkr9ut9zc5VSX1vPvVQp9ZBSKsD63OfWw75VSjUppS4a7rkJ3yKJQHgs64XtdeA5IAZ4Gfhhn0N+BhQD8UAicCcwmuJZJuApIBPIAFqBhwaJLRz4EFgNpAC5wEfWpx8AHtBaRwDjgJXW9heBi/u8R771s96x8/75wD+Ay63vHwv0vXB3A/8JxAELgFOA/wDQWh9vPWa61jpMa71iOOcmfI8kAuHJ5gP+wP1a606t9SvA+j7PdwLJQKb1+S/0KKooaq2rtdb/1lq3aK0bgT8AJwxy+NlAmdb6L1rrNq11o9b6mz5x5Sql4rTWTVrrtdb214AZSqlM6+NLgVe11u123v8C4G2t9efW5+8CLH1i3ai1Xqu17tJaHwAePUqswz034WMkEQhPlgKU9Lu4H+zz5/uAQuB9pdQ+pdQdo/kwpVSIUupRa3dMA/A5EKWUMts5PB3YO8hbXQuMB75XSq1XSp0NYL0AvwMstx63HHhhkPdIAYqOPNBaN9Onm0spNd7avVNmjfWPGHcHjjg34WMkEQhPVgqkKqVUn7aMI3+wfgv/mdY6BzgH+KlS6pRRfN7PgAnAPGu3zpEuFmXn2CKMbp8BtNZ7tNYXAwnAvcArSqlQ69P/Ai5WSi3AGMT+ZJBYSjGSjRGAUiEY3UNH/AP4HsizxnrnIHGO5NyEj5FEIDzZ10AXcJtSys86qDr3yJNKqbOVUrnWRNGA0W/ebX3OTykVBJgBs1IqSCl1rFly4Rh953VKqRjgt0c59m0gSSn1E6VUoFIqXCk1z/rZlyml4rXWFqDOeny39b+rMPrp7wFWWI+x5xXgbKXUYutYyT3Y/nsNt55zk1JqInBzv9eXAzkjPDfhYyQRCI+lte4AzgeuAmqBi4BX+xyShzFg24SRNP6utf7U+txvMC58dwCXWf/8m2N85P0Y39KrgLUYA8E9lFKPKKUescbWCJyGcSdSBuwBTrIeegawXSnVhDFwvFxr3WZ9Xbv1HE7FGDwe7Ny3A7dYjym1nn9xn0N+DlwCNAKPAyv6vcV/Ac9YZxVdeKxzE75NyQ5lQgjh2+SOQAghfJwkAuFTlFJ3WhdZ9f95192xCeEu0jUkhBA+zutqDcXFxemsrCx3hyGEEF5l48aNVVrreHvPeV0iyMrKYsOGDe4OQwghvIpS6uBgz8kYgRBC+DhJBEII4eOclgiUUk8qpSqUUt8N8rxSSj2olCpUSm1VSs1yVixCCCEG58w7gqcxVlgO5kyMlaF5wA0YtVOEEEK4mNMSgdb6c6DmKIcsA57VhrUYlRCTnRWPEEII+9w5ayiVPmV2MeqopGLUVbGhlLoB466BjIyM/k8fU1VjO9sP11Pe2E5GdAhTUiMJC/K6CVNCCOEU7rwa2it/a3d1m9b6MeAxgIKCgmGtgGto7eTe977n5Q299bp+vWQS1yzOxmySCrxCCDuqC2Hn27D/M5iwBMafDlHD/xLqLdyZCIrpU28dYxu+w47+kD0VjTZJAOB/39/FyZMSGBcf5uiPE0J4u6YKeOVaKN1iPN77Mez/HM79BwSOzWuGO6ePvglcYZ09NB+o11oP6BYarca2rgFt7V0WWtoHtgshBLUHYcIZcMIvYc51YPKDnW9CzT53R+Y0TrsjUEr9CzgRiFNKFWNshOEPoLV+BGODjiUYWw22AFc7I46s2FAKMqM4fnwCHd0W/M0mthXXkhYd4oyPE0J4s9oD8MFv4JB1m+mYHCMhfPIHBum5HhOclgisW/Ud7XmNsfGGU6VEBXH6lGT+uGonWoNJwZ/On0ZksL+zP1oI4W32ftKbBMC4C6jdDwXXGUlhjBrzK4v3VjbzP9YkAGDRcPeb33GgutloqDsE+z6Dw1ugo8VtcQohPEDx+oFt5dth8U8gMNzl4bjKmE8EVU3tWPrd0bV1Wqhp7oDiDfDYCfDsUuO/n/0JWuvcEqcQwgPknDCwbeI5EJU+sH0MGfOJIDUqmCB/29OMDvEnKdQEq34BLX3WvH31AJRtHd0HVu+FrSth3WNQtA66Okb3fkII18k6DmZe1vt43Ckw9Yfui8dFxvyqquy4UP5+6Sx+tvJbals6iQ8P5G/LZ5LmVw+HNw18QX3JyD+seh88dx7UWau9KgWXrIS8H4z8PYUQrhORAmfeB/NuBkuXMS4QFOHuqJxuzCcCpRQnT0zk7R8fR01zO3FhgSRHBUObP6QvgKKvbV8wmkUjhzf1JgEAreGD30LaXAiOGvn7jpbFAqWb4cCXYPKHrMWQPM198QjhyQJCIGmKu6NwqTGfCI5IjQ4mNTq4tyEoAs78E6y4FOqLwWSGE38NSVNH/iH2xheaK6CrfeTv6QjF6+Dps4xvOAD+IXD1KkiZ6d64xNjVWm+sztXdEJsHIdHujkgchc8kArtSZsB1H0LtIWNGQGwe+I1iWmnydFAm0JbetjnXQ1jCqEMdMYsFvnm0NwkAdLbAzrckEYihaaowZtcFRRpdJSbz0Y+vPQTv/gJ2rzYeZyyEZQ9B7DjnxypGZMwPFh9TeDJkzIPE/NElATASyyUrIXEKhMbDCXfAzMuNsQJ30RbjH3J/TZWuj2UomiqgvdHdUYgjSr+FJ34A/zwFHlkEG56Ejuajv6bww94kAHBoDWx/zblxilHx7TsCRzP7Q95pxphAV5txJ+DOJABg9oO518PBL23bp5zvnngGU18C375oXGgi0uDkuyBr0bG/fQrnaa2Ht39qLKgCo4tz1c+NO9/0uYO/bt+nA9t2r4ZFPzF+H4XHkTsCZwiOhPBE9yeBI8adBOc9BvETIWkaXPQCpM9zd1S9tIaNT8HH/w0Nh40xjefPg9JRTuUVo9NcBSUbBrbXDroHuiHnxIFteadLEvBg8n/GFwRFwvSLYPwZYDJ53grJxlJj3UVfli5jRWeqjGO4TXAkxOYag759hScd/XW5p8K4U2Hvh8bjtLmedwcqbEgi8CXBke6OwD6/QAiOhrZ62/YAKQzoVqFxcM6D8OKF0NFktC249dgz66Iz4IInoHqPMUYVkwuhsc6PV4yYJALhfiGxcNrvYeXlvW1RmTKryRNkLYIbP4ea/RASA3Hjh1aTPzgK0uY4PbwxrbsLqncb42fhycbfvV+AUz5KEoHwDHk/gKvfNeo/hcYZYxgx2e6OSoAx7VOmfo5e+Q7Y+xE0lPZOKgkMtX+sxQI7XofXbjS6SU1mOOv/YMYlxqQUB1Nae1eN7YKCAr1hg50BLCGE8FSVu+CpM6GlurftgqcGHzupKoRHF0Nna2+byQ9u/MKY6j4CSqmNWusCe8/JrCEh+utsh7LvjGmQ1XvdHY0YC0o22SYBgI9/b1v0sq+WStskAMadQVO5U8KTriEh+upoNtYyfHC3MdAZGA4XvQg5x7s7MuGpGsqgfKtRYiZuvLGgtP9UWXtlZjpbjHEAe8JTICgK2up62/yDISLVQUHbkjsCIfqq2Anv/6a3TEh7I7x+o/GPXYj+Gkrh9ZvghR/Bq9fD4ycZ4wD9pUwHc7+B3oW3Q/gg5WeiM42uo5AY43FQJPzwCWM6rxPIHYEQfTUctt/WUgURx5g/L3xP2VbY90nvY22Bd34GqbOMMjNHJM+AK98y9jypLzJqkE048+jvnXsy3PC50R0UGgfRWc44A0ASgRC2Iu3sRBWV6d7CgcJz2as43FBidDH2TQRKQcZ8SJkFlk4IGGS2UH9R6S7ZHU26hoToK2EinH1/7218SCyc96gkAmFfXN7AUjL55xrz/u3xCxh6EnAhuSPwFZW7oPw7QBkrQ+Py3B2RZ/IPhllXQOZCY0ZHZNqY369WjELiFKN216qfG6VSJi2Dk35trJb3IpIIfEHpt/DMOb0lHEJijf7KxMnujctTmcwQP8HdUQhv4BcAE88yVlF3NBt3Av5B7o5q2KRryBdsft62jk9LtdSHF8KRwhKMlfBemARAEsHYZ7FAxY6B7ZW7XR+LEMIjSSIY60wmY5e0/nyxLHBXm5EAq/ZAd6e7oxHCY8gYgYfp6OpmZ2kj+6uaiQ0NID8lgtiwUQ48jTsFTv0dfPG/xp7KJ94B2T62Ura+GD77M2x+zvg7mPcfsPBWYwMhIXycJAIP8/72cn780maO1AJcOj2F3y3NJzp0FMkgLB4W3Q5TLwBMEJnikFi9yvfvwKZnjD9rC3z9ICRPg2k/cm9cQngA6RryIIfrWrnrje/oWxD2zW8Ps7PMAZu5K2VMhfTFJNDdBdteHti+613XxyKGpqPZKPjnpCJrwpYkAg/S3N5FbcvAvus6O21iGMx+kD5/YLtsg+mZKnfBiivgb7PgsRONhD1YcTbhEJIIPEhSZBBzsmJs2vxMiuw4z1uJ6HVmXGK72jM2z9jDWXiW9iZ4947e/Y4bDsOKS6Fiu3vjGuNkjMCDhAf584fzpvD7t3bwRWEVqVHB/OG8KYxP9LDN5r1RYj5c8x5Ufm8MFifkQ+QxSvpWFxozjPxDIGGSlJlwhcZS2PexbZul2/h/kTzdPTF5Aku38eONW1Uqpc4AHgDMwD+11n/q93wk8DyQYY3lf7XWTzkzJk83PjGcRy6fTUVjO+GBfsSFe9dSdY8WnWn8DEXJRnj2XGhvMB7nnATLHj528hCjExBmJNymCtv24Bj7x/uCovWw7lGoPQAF1xrbXIbGOfQjnNY1pJQyAw8DZwL5wMVKqf57rN0C7NBaTwdOBP6ilHJOyvMioYF+ZMeFen4S6OqA5mrjm8pY0tECH/+hNwmAUWq4eL37YvIVEclw1l9tC7lNWmrU9PFFpVvhmbONyQ7F6429D777t8M/xpl3BHOBQq31PgCl1EvAMqDvMlcNhCulFBAG1AAyKuQNSrfCl/9n/HJOOsf4phLnnE0zXK69Ecq+Hdhed9D1sfiivNPhuk+gZq9RFytxijEF2heVbjEWQvb15V9h8nkO7ap0ZiJIBYr6PC4G5vU75iHgTeAwEA5cpPWRraF6KaVuAG4AyMjIcEqwY1FdSwfr9tfw2e5KxieGcVxePDnxYaN/49pD8Pz50FxpPF77dyjfDhc9D0ERo39/dwuJNUoJr/+nbXvSNLeE43P8AowZXTKry9iwvj9zgDHO5UDOTATKTpvu9/h0YAtwMjAO+EAp9YXWusHmRVo/BjwGUFBQ0P89hB1aa/617hD3rt7V05YVG8IL180nNTp4dG9evbs3CRyx/zOjDzN5DFwszX4w72ao2W9sO+gXZJQWTpnt7siEr0mZBcHR0Frb23bSrx0+RuDMRFAM9C3knobxzb+vq4E/aa01UKiU2g9MBNY5MS6vYLFovi2u493vymjt6OKsaSnMyogiwM88pNcX1bbywEd7bNoOVLews6xh9InAz87rTX5eV4P9qOJy4cJnoO6QkQiis426TcJ5Kr43Em/dIWNANH0eBPr4jLmEiXDl28ZairpDkH8OZCxw+Mc4MxGsB/KUUtlACbAcuKTfMYeAU4AvlFKJwARgnxNj8hrfFtdx0aNr6eg2esqe/+YQz107j8W5Q/smYLFouroH3jx1dQ/oeRu++IlG/aK+m3QvvB2ic0b/3p4kMFz2bHCV6kJ4dhk0lRmPv3kElv0dZl7q3rg8QdIU48eJnJYItNZdSqlbgfcwpo8+qbXerpS6yfr8I8DvgaeVUtswupJ+qbWuclZM3uS97WU9SQBAa3jii33Mz47Bz3zsb6apUcFcMi+DZ7/uHeCMCPZzzJqE0FhY+iAcWmusAk2dZWzM4ec/+vcWvqn0294kcMRHvzPuDGT9htM5dR2B1noVsKpf2yN9/nwY+IEzY/BW7Z0Dv7m3d3VjGTDMYp+/n4mbThhHZmwI/95YwpTUCK5YkOWYwWIw6hZNvcAx7yWEvbLgna3eUVrC0m3M7ilaZ3SbZswzFiB6EVlZ7KHOmJrE018fsClAd/WiHALMQxsjAEiJCubaxTlcPDeDQD8zZpO98XshPEDiFGO/6M7W3raFtxnrCjzdobXw7FKwWJNWcDRc9Y5XdStKIvBQM9OjeeHaeTzx5X7aOru5elE2C8bFjui9QgLkf7PwcElT4Iq34Ou/GVVHC66BiWfbLizzRF3txrx+S587l9ZaKPzIfiKoLzHG1go/gsyFRtdXjPvH1uQK4aEC/EwszI1jXk4sWushjQsI4dXS50DKk9DdDgF2Ci1W7zW+fTdXGmNSqbOMuwh36u6ExrKB7f2nV4NRWvuD38J31pLoO16HbXPh4hch1L0L5iQReDijO8fDvxUJ4ShmP+Onv5oD8PwFUNtnUuGPnoHJ57oqMvsCw2DejfDmj23b804beGz13t4kcETxOmP7VEkEwtcdqGpma0k9re1dTEqJYHJKpIxnCFulW2yTAMD7v4HMRe4vPzFhCZzVCV89YEw5PulO446lv4FFE47e7kKSCPpo7exie0mDsV9wWCBTUyOJ9/TCb15uX2UTVzzxDcV1Rj0VP5Pi2WvmsnCI6yWEj+hsGdjWWgPdHa6Ppb/QOJhzrVGWxOQHwZH2j4sZZ9RR2vNeb1tCPsSNd0mYRyOJoI93tpby85e39jw+LT+RP50/dfSbx4tBbThQ25MEALosmr9+sJvp6VGEBsqvp7BKyAezv+0007k32G425G6hx5jMERQOZ/4Zso+DHW9AzsnGFOzwRNfEdxQyAmlVXNvCPW/tsGn7YEc53ztiv2AxqOrmgd/oSuvbaOscY6WtxegkTYPLXjPKK0Skwkm/gTnXeV/Zj5gsWPhjuPo9OPlOiHf/3QDIHUGP1o5uGtoGLl5paJP9gp1pVkbUgLbL5mfIXdhY0lJrVKdtroSYbOPb/XB32jKZjG/SKa8YZZkdXHTN5YaxHsgVJBFYJUcFsXBcLGv2Vve0BfqZGOeolbijVFrXypq9VazdV0NBVgyLcmNJiw5xd1ijNiM9ikcvm8X/rP6e2uZOrl6YxXkz09wdlmfqbDW2zmxvMIrgecNuaa31RqmIjdaNB5WCC54y6umPRGCY8TNSjeXG36HJbHwbDxnZ2pyxRmntXVWdCwoK9IYNG5zy3oUVTfzl/V28t72M3IQwfrd0MvNzYlFuXtTS3NbFr17bypvflva0nTA+ngeWzyAqZGxs6Fbb3EFHt4WE8EC3/317pNY6+Op+40drCEuEi1d4fs3+g2vgqTNt24Kj4cYvICrd/mucpXI3vHwlVFi7gLOOh2UPDX37UndqrTOqj/qHGHdVpuHfUSilNmqtC+w9J3cEfeQmhHH/RTOobGonLNDPpRfZxtZOzGZldxXw/upmmyQA8NnuSvZVNjMrc2wkgujQsXEeTlP6rbEj3BFN5bD6V3DZy72lmhtKobkCQhM8pzRDc/XAttZa221AXUFr+PbF3iQAcOBzYwvS2Ve5Npbhqvge3rgFSjYYpd5PvgtmXenQTaC8bKTF+QL9zaRFh7gsCdS1dPDKxiLO/8caLn38Gz75voL2fgOlg5WO7rK4f/6xcJH6ooFtxWuhpcb4877P4fGT4NHjjf/u+8y18Q0mJnvgLlvJ040BX1fqaod9nw5sP/i1a+MYrq52+OxeIwkcefz+b4wvBg4kicDNPv6+gp+/vJU9FU1sLqrj6qfXs7mozuaYrLhQCjKjbdomJoWTHecZ4xfCBSLtbNGasQhCYoxVtysvh0brXWNjqfG4Zr9LQ7QrYRIsfxHCk4zHqQXGPgPBUa6Nwz8Ixp85sD3nBNfGMVzNlbB79cD26kKHfox0DblRS0cXT3418B/rJ99XMD+ndxArKiSA+340ndc2F/PRzgqOy4vjgtnpstjNXeoOQdF6aCiG5BmQOnt0A5hDkTIdjvsFfPkXYyVqRAqc/gejW+jwFmirsz2+rR7qi41v5O5kMsP40+H6T42YwpMHX3DlbFN/ZIxZ7P/U+vhCyPbwRBAUCUnToajfnYuDu/4kEbiRn0kRbacLyl63VHZcKD89bQI3n5hLkJ9JBlTdpeEwrLwSDm/qbTvnQZh9pXM/NygSTvi5UVunvQGis4xkAMZUyv6LrUx+njUjJiLZ/eMWsTlw4bNQs9f4+4nNgQAPv6sODIfTfw/Pnd87rpJ/LiQ7dpKAJAI3CvAzc/OJ4/iqsAqLdfJWWKAfJ4wffI50sP/gswVqmtv5Zn8NH++sYEJSOCdOiCc3wcf3fHW0sm22SQDgg7uNrTujnDzt1S/I/paFsblwxp9h1U+NQVGl4Ix7IS7PufF4o+BIo2qpN0mbAzd8ahStCwwztooNiXHoR0gicLO5WTG8ctMCvtpbTUiAmQXj4shPHv5sAK01L60v4s+rd/W0Pb3mAP+6fj7pMd6/3qCv6qZ29lU1o4Bx8WGunXHUYafmTXuDUTrZXcz+MOMS4wLXUGIMxMZPNNrF2BA7zvhxEkkEbuZnNjErM4ZZmaPL8CW1rfztI9sBpOLaVr4vaxxdIuhqN6Yl+gd7RE2U/VXN/GTFFr61DqjPy47hzxdMIzPWTv16Z0iYMHAnrak/MrbudCf/IEiZYfwIMUwya2iM6Nba7nTS7tFMMa3eB2/cCn+bCY8dbxTK6nTjN19g1dbDPUkAMLrCvq9wXQAJ+XD565B1nLGoa+FtcOKdxvzusaK1AQ58BdteMfbh7Wh2d0TCySQRjBEpUcFcscB2hWRksD8TEkc4RtDVaSxg2rbSmKXSWGasyixz7Pzl4dBa88mugTs/fVVY5dpAMubDJSvgxi/h1N8ZhcTGio5mWHM/PL0E/n0tPHEabH7eOzaRFyMmXUNjhL/ZxPXHjSMjJpR/bypmckokl83PIHuEtZI6Gw7jv22FbaPWULUb0uc6IOLhU0pxWn4iGw7W2rSfMCHB9cEEhNrfTtHbVe2GL/5i2/b+byDnRIif4JaQhPNJIhhDkiKDuHJhFsvnpONvNmEa4S5fZfWtrN3ZwDkRGZhr9tg+GRxt/0UucsaUJL4srOKLPcZdwGn5CZw0wc07VI0VTRXQ1TFwDKS7w1gDIHppDZZOMI+N0iiSCMagwKNMMR2Kr/dW86vVxUw64zdM+OgasFhLXmQsMBZQuVFmbCgPXzKL/VVNKKXIiQslLEhmx4xKY7nRBfj1wxAUZWy1uHWlMVUWjIFweyubfdXhLbDhSSj/DmZdYaxY9oCJFKMhiUAM8PW+ato6LVz/ZRj/ddwKMixF6MAIsibPx99JpY/3VjbxVWEVB6tbWJwXx+yMaCKC7V/gI4L9mZ7u3juTMWXHG0b3DxjlKd6/y1iHsPqXxoYwSx+EiCT3xugpKnfBM+f0Lu4q2QjHl8CJv/K+TXL6kEQgBpiZHs3KDcUcqm3nmvcAErnx+BzuiHJOEjhU3cKVT66juNbojnjiy/3897lTuGy+F5QH9natdbDuETvttfDjzcbCJVfXBfJk5dsHVk79+m/GnYGry2o7kPemMOE0i/PiWDCutzzBuPgwfjQ7zWllLXaUNvQkgSP+/N73lNa3DvIK4TDmAAhLGdgelmCUYBhpEqjYCZueg3WPQ8kmGCuVcu3tA2DyN1ZzezG5IxADpMeE8PdLZlFY0USXxcK4+DASIoKc9nkdXQMvEu2dFrq7vWvTJK8UEAIn/j94bk3vWFBYAmQtHvl7lm+Hp88y7irAWOF8xVuQuWD08bpb4lSjcF5jn/1BTvyV+xcUjpIkAmFXdGgAc7IdW89kMBOSwgkJMNPS0bsPw5ULs0iOCnbJ5/u8zEVwzQdweCP4h0F6AcSNYlP1wg97kwAYxfDWPGhUaR3uXsWeJjYHLn8Ndq2Gqu9h4tmQOYqk6SEkEQi3m5AUzovXz+Oxz/dRWNHERXPSWTI1GfMIp7+KYTKZIW228eMIjeUD2+qLwdIFeHkiAGOPhYRJ7o7CoSQRCI8wIz2aBy6aSXtXt0wH9XbjfwBrH7Ztm3ej0Q0lPJIMFguP4e9nkiQwFqTNhQufM7qXItNgyX0w/gx3RyWOwql3BEqpM4AHADPwT631n+wccyJwP+APVGmtPXzLICHEUQWEQP5SyD7e6A4KHXx/DadpKDVKcgdGAAp0t7Fb21gqDuhATksESikz8DBwGlAMrFdKvam13tHnmCjg78AZWutDSik3FI0Zu7q6LdS3dhIe5EeA3+hWGwsxbO5af1C0Hl6+wthNzj8EFt0O2183igWe8P96d3YTPZzZNTQXKNRa79NadwAvAcv6HXMJ8KrW+hCA1tqF9YRdp6Orm23F9byz9TDf7KumvrXD6Z+5t7KJ3721nXP+9iW/eGUrO0sbjv0iIbxdUyW8er2RBAA6W+CzP8GU82HjU1D4kXvj81DO7BpKBYr6PC4G5vU7Zjzgr5T6FAgHHtBaP9v/jZRSNwA3AGRkeF/Nk9XflXH7ii1o67T4qxZm8bPTxhM+SAmF0apr6eBnK79li7Vu//r9New43MCz18yVKZlibGsqh9r9tm1a9+4gt+MNmHW56+PycM68I7A396//CiE/YDZwFnA6cJdSasAEZq31Y1rrAq11QXy8d1WaLKpp4Tevf9eTBMDYQnJXRaPTPvNgTQtbiuqYlRHNL8+YwMLcOI4fH8/BajvbLAoxloTEGAvi+jNbxwYy5rs2Hi/hzDuCYqBv8Y004LCdY6q01s1As1Lqc2A6sNuJcblUQ1snDW0DN/WoaXJe91Cgn4nIYD9OnBDPvX32MH5zy2FW3DifnBHuUSCEx4tIgWX/gBWXQleb0TbnOmORW2weTFrq3vg8lDMTwXogTymVDZQAyzHGBPp6A3hIKeWHsdJkHvB/TozJ5ZIjgxmfGMbu8qaeNn+zcuoeu9lxofz6rHwe+NB2L4HKpna2ldRLIhBjW+4pcNOXUHsAAsON3dUmLDEWgclAsV1OSwRa6y6l1K3AexjTR5/UWm9XSt1kff4RrfVOpdRqYCtgwZhi+p2zYnKHmNAA/u+iGdzx761sK2kgITyQe384jbwE512MA/3MzM2KpqVj4J2Ivbo+QowpSkFcnvEjhkRp7V2FvQoKCvSGDRvcHcaw1bd0UN7YTmSwP4lOLODW1z8+LbTpGgr0M/HqfyxkckqkSz5fCOE5lFIbtdYF9p6TEhMuEhkSQGSIa+us/HB2GmGB/jy/9iDpMcHcdMI48pMjXBqDOIbag7D7Pdj7idGlkXcaRMs+DMK15I7ABzS3d+JnNhEoi8o8S2stvHw17Pukty33NPjhExAsd23CsY52RyC1hnxAaKC/TySBysY2Khra3B3G0FXtsU0CAIUfQPUe+8cL4STSNSQGOFTTwo7DDXRbLExICic3IdzdIR1VfWsHq7aW8dcPd2OxaG49OZdlM1KICfXwujKD3Y1rGdB3u6ZK6GiCsCQIGPuLMCURCBuFFY1c+eQ6SuqMb9bhgX68cP08pqVFuTewo1i7r4Zfvbat5/Hv3tpBTGgAy2Y4Z49lh4nLhfT5ULS2ty1zEcTmui8mX2fphr0fwzs/g/pDMOEsOPW3o9uoxwtI15Cw8dmuyp4kANDY3sUzaw7QbfHcsaS3tvRfpwj/WncIiwfHDEBILJz7Dzjlv4wVr6f+Fyx7yFgdK9yjYgf86yKoO2jcsX3/Nqy+Ezqa3R2ZU8kdgbCxt3LgL/z3ZY10dFsItrdxtwfIihu4OC8nLgyTN+xwFpsDx/2nUSHTJN/L3K50W+/ezUcUfmAUsRvD6xLkN0/YOGniwFpOFxakE+zvmUkAYMnUJCKCe7/ThAaYWT4n/Siv8ECSBNzv8GZoLBnYHhoPAc6rBOAJ5I5A2JibFcvdZ+fz1w9209lt4epFWZwxOdHdYR1Vfkok/75pIdsP19OtYUpKBBOSZL2EGKYdb0B9EWQdBwe+MNqUMnZYG+OlKSQRCBuRIf5cszibM6Yk0a01KZHBXrGJfF5iOHmJnj27SXi45irY9gpMuxBOuhO6OyAsGfJOd3dkTieJQNiVIvsWCF8z+XzY/BxsXdnb9qNnjK03x7gRdUwqpU5zdCBCCOFWGfPhohcgaRrET4LzH4eck9wdlUuM9I7gCcD7tgoTQojBBITApLMh5wRj6miQ74wzDZoIlFJvDvYUEOuccIQQws0CfW+s6Wh3BMcBlwFN/doVxsb0Qgjhuap2G6uEq/dB7snGKu7gKHdH5ZGOlgjWAi1a68/6P6GU2mXneCGE8Ay1B+D5C4wVwgDrHoUz/wzzbnRrWJ5q0MFirfWZWutPlFL5dp6+24kxCSHE6JRt600CR3zyB6gvdk88Hm4og8UrlVLPAX8Ggqz/LQAWODMwIYajq9vClqI6Pt1VSYCfiRMnxDM1NRKlPH8NhHCC7s6BbZ2tA8tHCGBoiWAecC+wBggHXgAWOTMob9DZZWFfVTPVze2kRgU7dTN6cWwbDtZyyeNrOVJn7uFPCll54wKmp0e5NS7hJgn5xqBve2Nv27ybIDLNfTF5sKEkgk6gFQjGuCPYr7VvF0xv7exi5foi7nl7J90WTXigH49cPptFuXEui6G5vYv9Vc2AJjM2lPAgf5d9tqfptmie+mo/fYuNtndZWP1dmSQCX5UwEa54A77+O1TuhJmXQ/5S8NDCie42lESwHngDmIMxbfRRpdQFWusLnBqZB9tT3sRv39zR87ixvYufrtzCm7csJjHS+RvTH65r5Y+rdvL21lIATp2UwG/PySc9xjfvSrTWNLQO7Aqob7PTPSC8W9k22PWu0dc/6RxjEdhg0z1TZ8N5j0JXGwSGuTZOLzOUlcXXaq3v1lp3aq3LtNbLMBKDzyqrH7gdYnlDO9XNHS75/M92VfYkAYAPd1bw/vbyY75Oa83+qia+3lvFnvJGurrHxo2dn9nEVYuyB7SfPTXZDdEIp6nYCU+fbQz6bnoGXrgAdq8++mvMfpIEhuCYiUBrPWCneK31c84Jxzsk2fnWnxwZRFyYP6V1rXa/nTrSJ7sqBrSt3l52zI1YPt1VyVkPfsnFj3/DWQ9+yWubS+jo8p5k0NDaycHqZupbBybcReNi+fuls5iRHsm87BieumoOszOj3RClcJqSTdBWZ9v28R+gudot4YwlUnRuBCYkhvOHc6fwX29tp7NbExXiz4PLZ/CX93fz1tZSxsWH8puz8pmbHeOUWSvzcmJ5f4ftHcBxefFH3YilqKaF21dspqXDmDXR0W3hjle3MTU1konJnr+UfmtxHXe/sZ0tRXVMSYng9+dOYWZG74U+LMifJVOTOWliAiYg0IP3TxAjZG8mUHc7aJkJNFqyG8YIBPqbuWhOOqtuO44VN87nrVsX8fgX+1mxoZiWjm62lTRwxZPr2F3ef1G2Y5w6MYEZfQZBJyWFc/a0o3eDVDW109DaZdPWbdGUNQzs5vI0pfWtXP/sBrYU1QHw3eEGrntmA8W1LQOODfY3SxIYq1Jngl+gbdvin0JYgnviGUPkjmCE/Mymnvr3eyubBnxDb++ysK+yiQlJjq9bkhkXyhNXFlBY0YRFa3ITwogPP/ogdXx4IJHB/tT36bYym5Tdbi5PU1zTSnlDu01bdXMHRTWtpEWP/RLBwippGlz5Nqx9xNhYvuBayJNCyI4gicABgvzMRAT7DfjGHRrovL/e2LBAYsMCj32gVVp0CA8sn8GtL26mqb2LQD8TfzxvKuPiPX8gLTzID5PCZnqoUthsTyl8gFKQPteYDWTpBr8Ad0c0Zsi/JAdIjQ7mrrPy+cUrW3vaFo6LZaIT7gZG48QJCbzz48WUNrQRGxpATnyYV+w+lhMfyk9PG8//vr+7p+2WE3MZF++b02V9nsks6wEcTBKBg5w1LZnM2BD2VjQTGxbA1LRIEiI8r9slMy6UzDjvuoAG+Jm5amEWc7JiKKlrJSUymPyUcIL85ddXCEeQf0kOEhLgx9zsWOZmy1YNzhAW5M+8HPm7FcIZJBEIIQaoaGijvrWThIhAIoOlL36sk0QghOhhsWi+LKzi/72ylbKGNqamRvA/509jSmqku0MTTuTUdQRKqTOUUruUUoVKqTuOctwcpVS3Uspn6xcJ4Qn2VjZx/bMbetaXbCtp4CcrNlPT3H6MV7pRdSFsfAY++R/Y9xm0N7s7Iq/jtDsCpZQZeBg4DSgG1iul3tRa77Bz3L3Ae86KRQgxNAerm2nvV3aksKKZw3VtxIQOfbqyy9Tsh+d/aOxIdsS5j8CMi90Wkjdy5h3BXKBQa71Pa90BvAQss3Pcj4F/AwML6Ajh4Q5WN7O1uI7KRs9foT0UMaEDxwPCAv2ICPLQXuSyrbZJAODDu6Hx2EUYRS9nJoJUoKjP42JrWw+lVCpwHvDI0d5IKXWDUmqDUmpDZWWlwwMVYrg6uiy8uaWEsx78kqUPfcX5/1jDlqJad4c1auMTw7l6UZZN2z3LJpPhqRsvddpJwG0N9usSiUE5M83bW6nUvzzm/cAvtdbdRyvOprV+DHgMoKCg4OglNsWgqpra2XSwlm0l9UxMCmd2ZoxXlJjwRIUVjfxkxZae1c5FNa38dMUWXrlpITHDWPHtacKC/PnJqXmcMTmJyqZ2MmJCmJDoWQsjbSTmG/WHuvqMYcy7ESJS3BeTF3JmIigG0vs8TgMO9zumAHjJmgTigCVKqS6t9etOjMujlDe08fnuSt7ZWsrMjCjOmppMrhP+4bV1dvG3j/fwzJreDb3PmJzEvRdMIzLYd3c3G6mimlb6V/3eV9VCeWO7VycCgMjgAO9Zs5E4BS5/Az6/D2r3w6wrYeqPwCT1NIfDmYlgPZCnlMoGSoDlwCV9D9Ba9+wmopR6Gnjbl5JAZ5eFRz/by5NfHQDg092VvLyxmBU3LCA1Otihn7W/qplnvz5o07Z6exnXH58jdftHID58YF96XFiAJFVXUwoyF8DyF4xuohD5XR4Jp6VNrXUXcCvGbKCdwEqt9Xal1E1KqZuc9bnepKi2ZcDFubi2ld3ljYO8YuTauyxoO51q7Z1Sy30kxidF8OOTc3seB5hN3PvDaaREOTaBiyHyD5YkMApOnQqgtV4FrOrXZndgWGt9lTNj8VT2Bjy0vSv2KGXGhjI9LYpvi+t62tKig8mWwm0jEhbox80njOOUSQnUNHWQHhPiFZVchbDHQ+eE+Ya06GAun5fJ018f6GlLjQpivBOqlkaHBPCXC6fz7JoDfLyrgvnZMVx3XA7JkfINdqRCAv2YkS7fQoX3U8749ulMBQUFesOGAdsoe63y+lY+3V3JG1sOMzszmqXTU3o2vHGGrm4LDW1dhAf64e/n2J7Brm4LZQ1tBPiZSDjGRjlCCNdSSm3UWhfYfU4SgXCEktpWnvpqP89+fZCIYD9+fdYkFufGsaWojre+PUxmbChnTk0iP1lq1gjhDkdLBNI1JBzilY1F/PPL/QBUNXXw+7d3cvspefz2ze09xzz79UFeuWmBU+94hBDDJ5NtxahVN7WzYn2RTdsZk5N46JNCm7b61k6+O1zvytCEEEMgiUCMWnCAmcxY203kTSZFd/8VV4DFMqBJCOFmkgjEqIUE+PGfp40nwNz767SrtIGbT8jpd5yZyakRrg5P9FNW38Z735Xy1Ff7+XJPJY2tUpfH18kYgXCIOVkxvH7LQnaVNxHsb2JySiShgWbiwwP517oisuNCuXhuBhOTJBG4U3VTO796dSuf7Oot3vjrJZO4dnE2JtPg9b7E2CaJoJ/SulYqG9uJCw+UVaLDoJQiPyWS/BTbWUHnzkzjnOmpmOUiMyQHqpo5VNNCdEgAuQmhBAc49p/o7vJGmyQA8JcPdnFafiJZcbK40FdJIujji92V/GTFFqqbO4gLC+D+i2awOC/e3WF5PUkCQ7N2bzXXPbuBpvYuAG47JZcbjsshLMhx9Yua2weWFGnrtNAqpUZ8mowRWB2saubmFzZR3dwBGFMgb35+EwerZdu7vpraOunokouGo1U3tfPLV7f2JAGABz8q5Psyx9adGhcfSnig7fe/heNiSXdwkUPhXSQRWB2ub7X5RwjQ2N5Faf3Y2HlqtMobWnl6zQHO/8cabn1xM5sO1rg7pDGltqWTg9UtA9od/fuXHR/Gs9fOZcG4GCKC/bioIJ3fL5sytLuOtgao3gct3r8Bj7AlXUNWsWGB+JsVnd29Ux4DzCZi7Wzd52u01rz4TREPfLQHgN3lTXy2u5LXb1nEpGQZ/HWE2LAAJiaFD7gDcMY39ZkZ0fzzyjk0tXURExIwtFIjh7fAqp9D8XpjD4Cz/wrp8xwem3APuSOwyo4L5Z6lUzjSnW1S8Ptlk8mWATTK6tt4/It9Nm3tXRa+L2twU0RjT3RIAPf+cCrJ1h3jAv1M3LNsMhOTnbMKOzTAj8SIoKElgcYyWHGZkQQAyr+DFy+CmgNOiU24ntwRWPmbTZw/K5VpaZGUNbSRFBlEXkIYfmbJlWazIiTATEuH7dhAgPzdONT09Ghev2URJbWtRAT7kx0X6hkD7XWHoN525TittcaOYDFZbglJOJYkgj4C/c1MTo1kcqoURusrITyIX54xkV+8srWnLSkykMkp8vfkaIkRQSRGeFjl1sBwMPmBxXYMjaAot4QjHE8SgRiSM6cmkxQZxOe7K0mLDmFRbqzMO3eAioY29lY2YTaZyE0II8YTx6Ric+GU38IHd/W2Lbod4ie4LybhUJIIxJCEBfpxXF48x8m6CocprGjkxuc2srfSmKI8PyeG+y6YTnpMyDFe6WJmfyi4GtLmGN1EESmQNBUCPCxOMWLSySuEG2itWbmhuCcJAKzdV8OXe6rcGNVRBIYbm8RPvwiyj4PgKHdHJBxIEoEQbtDeZeGrwoEX/U2HZI6+cD1JBEK4QZC/mR/kJw5oXzgu1g3RCF8niUAINzl3RiqLc+N6Hl8wO40FkgiEG8hgsRBukhkXyj8uncX+6mbMJkVOXBjBAWZ3hyV8kCQCIdwoPNifaWlR7g5D+DhJBEIIx+psg5KNcHANhMRA1mJZc+DhJBEIIRxr78fw0sW9j8MS4Mp3IH68+2ISRyWDxUIIx2mtg49+Z9vWVGHcIQiPJYlACOE43R3QVjewvaPJ5aGIoZNEIIalpaOLb4vq+GBHGTtLG+jstrg7JOFJwhJg/i22bSYzpMxyTzxiSGSMQAxZa0cXT321n/ve2w0YexHff9EMzpme4ubIhEeZdpFRn2jdYxCeDCf8ElJmuDuqo+rs7KS4uJi2Nu/fkTAoKIi0tDT8/Ye+17UkAjFkeyqaepIAQLdF86tXtzEtLZLMWKlEKqzCE2H+zTBtOfgFQIDn/24UFxcTHh5OVlYWSnnAHhAjpLWmurqa4uJisrOzh/w66RoSQ1bV1D6gram9i7qWTjdEIzxeSLRXJAGAtrY2YmNjvToJACiliI2NHfadjVMTgVLqDKXULqVUoVLqDjvPX6qU2mr9WaOUmu7MeMTopEWHDNiVLDkyqGd7RSG8mbcngSNGch5OSwRKKTPwMHAmkA9crJTK73fYfuAErfU04PfAY86KR4zeuPgw/n7pLKJDjL7HtOhgHrpkFgmetqOWEGJYnDlGMBco1FrvA1BKvQQsA3YcOUBrvabP8WuBNCfGI0bJbFKcmp/I2z8+jtqWDhIiAkkIlyQghLdzZiJIBfrueF0MzDvK8dcC79p7Qil1A3ADQEZGhqPi8xhtnd3sKW+kpK6V5MhgJiSFEeTvueP4qdHBpEYHuzsMIYamuQqayiE4BiKS3R2NR3Lm1cZeR5W2e6BSJ2EkgsX2ntdaP4a126igoMDue3irzm4LL28s4q7Xt/e03X12PpfNzyDATypRCjEqxRvgtRuhutCYyrrsYRh3MrhwPOCuu+4iLi6O22+/HYBf//rXJCYmctttt7kshmNx5mBxMZDe53EacLj/QUqpacA/gWVa62onxuORDlQ1c89bO2za/rBqJ/uqmgd5hfB1Xd0Wmtu73B2G52sohZWXG0kAoLEUVlza+3i0LBboaIG2BugafJbOtddeyzPPPGN9iYWXXnqJSy+91DExOIgz7wjWA3lKqWygBFgOXNL3AKVUBvAqcLnWevfAtxj7alo66Oy2vcnptmhqmjvcFJHwZN+V1PP0mgN8V1LP+bNSOXtaCilRY7ebrr2zm0M1LZiUIiMmBH+/YXx3bSiBhn7fPTtbofYAxOWNLjBLNzRXGskFQJkgJsfY27mfrKwsYmNj2bx5M+Xl5cycOZPYWM/agMhpiUBr3aWUuhV4DzADT2qttyulbrI+/whwNxAL/N065alLa13grJg8UUpkMFEh/jZz8cMD/Ugbw/+4xcjsr2zikn+upaHVuBv446rvKapp4T9PG8+hmhYC/czkxIUS6D82uhQP17Xyfx/s5pVNxZiU4soFmdx0wrihz1ILjgL/EOhssW0PjbN7+LB0tfUmAQBtgbqDEDfBWFXdz3XXXcfTTz9NWVkZ11xzzeg/38Gcuo5Aa71Kaz1eaz1Oa/0Ha9sj1iSA1vo6rXW01nqG9cenkgBAekwIj142mxTrXPykiCAevWI2GbJSV/Szq7yxJwkc8eK6Il7bVMK5D69hyYNf8Md3d9pd+OeN3t9Rxssbi9HauEt+8qsDrNlbNfQ3iM6BJf9rOx5w4p3GxXq0uu0souzuBIv9LrvzzjuP1atXs379ek4//fTRf76Dee7UFB8yLyeW129ZRHVzB7GhATIvX9jlbx74vS3AbKKi0bjwaw3PrDnIonFx/GBykqvDc6iubgtvbhkwpMgHOyo4d+YQZ5mbTJC5GJb/C+qKICodUmZCQMjoA7TzrR9zIJjsX1IDAgI46aSTiIqKwmz2vDs2SQQeIiEiSBKAOKqJyeGMiwtlb5+JBJfMy+Dd78psjttWUu/1icDPbKIgK4ZNh+ps2mekRw39TRrL4dXroHhdb9vJd8Hi/zQqoo4qwGCITIf6YkCDyR+iM+0nCIxB4rVr1/Lyyy+P7nOdRBKBEC5SVm/MLEkaYUmO1KgQHr+ygC8Lq9hT3sRxeXHsrWziUI1tH/jEpIEDlv21d3ZT09xBRIg/oQGeeRm4YHYab317mFLr31tufBinTEoY+htU7LBNAgCf/xkmnwex40YXnMkEIbEQEAa6C8wBxo8dO3bs4Oyzz+a8884jL2+Ug9RO4pm/AUKMITXN7by2qYQHPt4DwE9OyePcmanEhAYO+71y4sPIiQ/refx9aQMvfHOI4tpWAM6YnMTszJijvseuskbu/3A3n++uZEZ6FL88cyLT0qKGHYuzjU8M5+WbFrCnvAmTSTE+IYzk4Uyi6LQzpbOr3X7//kgoBf7HTur5+fns27fPMZ/pJJIIhHCyLwur+f07O3se3/P2TuLDgxyyj8PE5AhevmkB+yubCfQ3k5sQRmTw4HXoa5rbuf2lzXxf1gjAV3urueqp9bx56yLSoh3Qd+5gadEhI48rfgIER0NrbW/bpKUQNfaqE4yWJAIhnOzVjcUD2zaVOGxDn+TIYJIjh/ZN+VBNa08SOKKmuYP9Vc0emQhGJTYHLn8dvvw/KN0CU34IMy93zGDxGCOJQAgny0sM49PdlTZtuQlhgxztXCEBZvxMii6L7SLG0EDXXwpK61s5UNVCSKCZcXFhhAU5IYaUGXD+Y9DRbNwdjJFS044mG9MI4WTnzkwlIrj3IhcZ7M+5M92zvWd2bCi3npxr0/bDWWnkuTgx7Thcz/l/X8PFj69l2UNfcc/bO6hqdNL6B79ACImRJHAUckfgBepbOlBKEXGUvl/huSanRPLqzQvZfrgBpRT5yeHkJhx7Zo8z+PuZuHpRFgWZ0RyobiElKphpaZGEB7nud6uts4v7P9zTMxsIYOWGIk6fnMgpkxJdFocnWr16Nbfffjvd3d1cd9113HGH7X5eWmtuv/12Vq1aRUhICE8//TSzZs0a9edKIvBgDa0dfLizgoc/KcRsUtx2Sh4nTUhwy228GJ3cBPdd/PuLDA5gcV48i900k7G+pYtv9tcMaD9Q7T2FFl/fXMJ97+3icF0rKVHB/OL0CZw7M3VU79nd3c0tt9zCBx98QFpaGnPmzGHp0qXk5/fu5/Xuu++yZ88e9uzZwzfffMPNN9/MN998M9rTka4hT/bV3mp+uvJb9lY2s7u8iVtf3My6AwP/AQnhTaJC/Tkub2C9n5w494ybDNfrm0v41avbKKlrRQMlda386tVtvL65ZFTvu27dOnJzc8nJySEgIIDly5fzxhtv2BzzxhtvcMUVV6CUYv78+dTV1VFaWjrIOw6dJAIPZbFoXlh7aED7aH/ZhHC3QD8zPz45j3HxRj0tpeC6xdlMT490c2RDc997u2jt7LZpa+3s5r73do3qfUtKSkhP763cn5aWRklJybCPGQnpY/BQJpMiMWLggqNEKUPhcO1d3RTXtmJSkBETitkkg4rONiEpnJU3LuBAdQshAWay40IJ8pKqqYfrWofVPlRaD9xzq/9G9EM5ZiQkEXiwS+dl8vbWUtq7LAAE+5s5e5pstedIJbUtPPjRHl7eWIyfycTNJ47jioWZxI5g1a8YntiwQGLDvO/vOSUqmBI7F/3R7guRlpZGUVHv7r7FxcWkpKQM+5iRkK4hDzYzI4pXblrIb8/J53dLJ/PmrYuYkOgZA45jxTvbSlmxoRiLho5uCw98tId1dgYyhTjiF6dPILjf3Uuwv5lfnD668tZz5sxhz5497N+/n46ODl566SWWLl1qc8zSpUt59tln0Vqzdu1aIiMjSU4e/ZdDuSPwYEoppqZFMjUtkm3FdTz6+T52lTVy0Zx0TstPlG6iUWrp6OL1zQNLHX+2q5Izp8idl7DvyOwgR88a8vPz46GHHuL000+nu7uba665hsmTJ/PII48AcNNNN7FkyRJWrVpFbm4uISEhPPXUU6M+H5BE4BX2lDdy8ePf0GTdp3ZbST1lDW389NTxmKQ/e8QC/cxMT49iR2mDTXt+SoTN4/qWDhrauogLCyQ4wDv6sYVznTszddQXfnuWLFnCkiVLbNpuuummnj8rpXj44Ycd/rnSNeQFdpU19iSBIx7/fB+H60c3OOXrzCbF5fMziA3tLR88PjHMZmrjuv3VXPTYWo6/7xNueWEju/rV6RFiLJA7Ai9gNg/81h9gNsnsFgfIT4nk1f9YyJ7yRvzMJiYkhveUOt5b0cSVT67vmSr48a5KKpvaee7aeUSF2K89L4Q3kkTgBfKTI0iKDKSsvrcWy89+MH7IFSfF0WXGhpJpZ4/oA9XNA+aLbytpoKS2tScRdHRZ2FxUy8r1RVg0XFiQzuzMKAL8pAtJeA9JBF4gMzaU566Zxye7Ktlb2cipExOZmx3r7rDGPHu1nYL9zTYlPjYfqmX542s5Mr379S0lvHjdPBaMG7hyVghP5ZOJoKOrmy6LJsRDt+izJy8xnDyZOupSeQlhnD8zlVf7rOa+c8lEMmN769m/srGYvmt8tIbn1x6SRCC8ivdcCR2gs9vCuv01/OPTvdS3dnDt4hxOnBAv/b3CrqiQAH591iSWzUilsqmNzNhQJqdE2KzktNhZ6Wlv9acQnsynZg19W1TH5U98w5eFVWwraeAnK7bwya7KY79Q+KzYsEBOmBDPBbPTmZMVM+Au8vxZaQPK3F+2INOFEXqn6qZ23t9exn2rv+f1zSWjLs8wFlxzzTUkJCQwZcoUu89rrbntttvIzc1l2rRpbNq0yWGf7VN3BF/sqaLfxkw8/vlefpCfQGig1PoXQ3eoupl/bypmS1Ed910wnc92VaCBS+ZmMDsj2t3hebT2rm4e+Wwfj3/Ru6H7cXlxPLB8JjGhXnJ3vnUlfHQP1BdDZBqccjdMu3BUb3nVVVdx6623csUVV9h93lklqMHHEkFo4MCZHKGBfpiUT90YiVFq6ejij6u+Z/X2MgA+213FonGx/PXC6STKTK5jOljdwhNf7rNp+2JPFXvKG5mX4wWTILauhLdug07rXUx9kfEYRpUMjj/+eA4cODDo84OVoHZEiQmfugIuzo0jtM/KUKXg1pNyZbWol9lX2cRL6w5x/4e7WbO3ipaOrmO/yIEO1bT0JIEjvtpbzY5S31tsVt7QSnnD8Lp1OrosA+7MAdq7LQ6Kysk+uqc3CRzR2Wq0O5GzSlCDj90R5KdEsvLGBXy6u5LGtk5OmpDAjPQod4flldo6uzEpXD5f/mB1M1c8uY7i2t5/iA8sn8GyGY5f7j8Yf5MJf7Ois9v2ahbg5zvfq2qbO3htcwkPfrwHgJ+ckseyGalED6FrJyMmhAU5MXy9r7e4X2JEIOO8ZGMa6ouH1+4gzipBDT6WCAAmp0YyOdU7NsDwRA2tHXy2u5InvtxPeJA/N50wjrnZMfibXXMR3F7SYJMEAP64aieLxsURF+6aksYZsSHceHwOD32yt6dtRnoU4xO95ELmAF8VVnHP2zt6Hv/XWzuICwvk7OnHLokcEezPH8+bygvrDvHe9jLmZMVw3eIcUqO9pFstMs3oDrLX7kTOKkENPpgIxOh8uquS217a0vP4q8IqVt64gIKsGJd8fmtX94C2xrYuOl3YreBvNnH14mympkXxzb4aJiaHsyAnlvhw36kG+8rGgd9+/725ZEiJACA7Pow7z5zELSflEhbgh7833U2dcrftGAGAf7DR7kRLly7loYceYvny5XzzzTcOK0ENkgjEMLR0dPHYF7aDfBYNn+yqcFkimJgUToDZREefC/81i7JcXpI7NjSQ0ycncfrkJJd+rqfITQjj0922U69z4weW6Tgak0kR7Y1reI4MCDt41tDFF1/Mp59+SlVVFWlpafzud7+js7MTcG4JapBEIIbBrBThdqbZhrpwhXZ+cgQvXDePBz7azaGaVi6em865M1OlHLeLnTszlZUbimhoMwbqI4L9WOaEsswea9qFo77w9/evf/3rqM87qwQ1ODkRKKXOAB4AzMA/tdZ/6ve8sj6/BGgBrtJaO26VhHCoQH8zN584jrX7q3vKKgT7mzl+fLzLYlBKMSc7hsevmENbV7d3fqMcA6akRvLvmxey43ADSsGk5AgpgeLFnJYIlFJm4GHgNKAYWK+UelNrvaPPYWcCedafecA/rP8VHmp+Tgwv3TCfD3eUExHkz0kTE5jihsH34ACzTPt1M6l/NXY4845gLlCotd4HoJR6CVgG9E0Ey4BntTEvaq1SKkoplay1LnViXGIUAvzMzMuOZZ5UPxVjjNbaYdMx3Wkkta6cOVSfCvSdY1VsbRvuMSilblBKbVBKbaislNpAQgjHCgoKorq62usLBmqtqa6uJihoeJMnnHlHYC+19v9bHsoxaK0fAx4DKCgo8O7/U0IIj5OWlkZxcTFj4YtmUFAQaWnDW9PgzERQDKT3eZwGHB7BMUII4VT+/v5kZ2e7Owy3cWbX0HogTymVrZQKAJYDb/Y75k3gCmWYD9TL+IAQQriW0+4ItNZdSqlbgfcwpo8+qbXerpS6yfr8I8AqjKmjhRjTR692VjxCCCHsc+o6Aq31KoyLfd+2R/r8WQO3ODMGIYQQR6e8bZRcKVUJHBzBS+OAKgeH4+l88ZzBN8/bF88ZfPO8R3rOmVpru6s/vS4RjJRSaoPWusDdcbiSL54z+OZ5++I5g2+etzPO2YtK/gkhhHAGSQRCCOHjfCkRPObuANzAF88ZfPO8ffGcwTfP2+Hn7DNjBEIIIezzpTsCIYQQdkgiEEIIHzfmEoFS6gyl1C6lVKFS6g47zyul1IPW57cqpWa5I05HGsI5X2o9161KqTVKqenuiNPRjnXefY6bo5TqVkpd4Mr4nGEo56yUOlEptUUptV0p9ZmrY3SGIfyORyql3lJKfWs9b6+vUqCUelIpVaGU+m6Q5x13LdNaj5kfjFIWe4EcIAD4Fsjvd8wS4F2MyqfzgW/cHbcLznkhEG3985nefs5DPe8+x32MscL9AnfH7YL/11EYe35kWB8nuDtuF533ncC91j/HAzVAgLtjH+V5Hw/MAr4b5HmHXcvG2h1Bz2Y4WusO4MhmOH31bIajtV4LRCmlkl0dqAMd85y11mu01rXWh2sxqrx6u6H8vwb4MfBvoMKVwTnJUM75EuBVrfUhAK21r5y3BsKt29+GYSSCLteG6Vha688xzmMwDruWjbVE4LDNcLzIcM/nWoxvEd7umOetlEoFzgMeYWwYyv/r8UC0UupTpdRGpdQVLovOeYZy3g8BkzDK2G8DbtdaW1wTnts47Frm1KJzbuCwzXC8yJDPRyl1EkYiWOzUiFxjKOd9P/BLrXX3WNiCkKGdsx8wGzgFCAa+Vkqt1VrvdnZwTjSU8z4d2AKcDIwDPlBKfaG1bnBybO7ksGvZWEsEvrgZzpDORyk1DfgncKbWutpFsTnTUM67AHjJmgTigCVKqS6t9esuidDxhvr7XaW1bgaalVKfA9MBb04EQznvq4E/aaPzvFAptR+YCKxzTYhu4bBr2VjrGvLFzXCOec5KqQzgVeByL/9m2Ncxz1trna21ztJaZwGvAP/hxUkAhvb7/QZwnFLKTykVAswDdro4TkcbynkfwrgLQimVCEwA9rk0Stdz2LVsTN0RaB/cDGeI53w3EAv83frtuEt7ecXGIZ73mDKUc9Za71RKrQa2Ahbgn1pru9MPvcUQ/1//HnhaKbUNo8vkl1prry5PrZT6F3AiEKeUKgZ+C/iD469lUmJCCCF83FjrGhJCCDFMkgiEEMLHSSIQQggfJ4lACCF8nCQCIYTwcZIIhHAgpdSVSqk91p8r3R2PEEMh00eFcBClVAywAWNFswY2ArP7FPwTwiPJHYEQI2Dd42CrUipIKRWqlNoO3AJ8oLWusV78PwDOcG+kQhzbmFpZLISraK3XK6XeBP4bo7jb80AnY6uyrfARckcgxMjdA5yG0RX0Z8ZeZVvhIyQRCDFyMRiboIQDQYy9yrbCR8hgsRAjZO0aegnIBpIxivttxNheEGATxmDx0XaZEsLtZIxAiBGw7vzVpbV+USllBtYAMzCqYK63HnaPJAHhDeSOQAghfJyMEQghhI+TRCCEED5OEoEQQvg4SQRCCOHjJBEIIYSPk0QghBA+ThKBEEL4uP8PJ0iygoRrfLAAAAAASUVORK5CYII=",
125
+ "text/plain": [
126
+ "<Figure size 432x288 with 1 Axes>"
127
+ ]
128
+ },
129
+ "metadata": {
130
+ "needs_background": "light"
131
+ },
132
+ "output_type": "display_data"
133
+ }
134
+ ],
135
+ "source": [
136
+ "import pandas as pd \n",
137
+ "import numpy as np\n",
138
+ "import matplotlib.pyplot as plt\n",
139
+ "import seaborn as sns\n",
140
+ "\n",
141
+ "df_a = pd.read_csv(\"/Users/johnnydevriese/Documents/cs229-spring-22/XCS229-PS1/stability/ds1_a.csv\")\n",
142
+ "df_b = pd.read_csv(\"/Users/johnnydevriese/Documents/cs229-spring-22/XCS229-PS1/stability/ds1_b.csv\")\n",
143
+ "\n",
144
+ "# print(df_a.head())\n",
145
+ "# print(df_b.head())\n",
146
+ "\n",
147
+ "sns.scatterplot(data=df_a, x=\"x0\", y=\"x1\", hue=\"y\").set(title='ds1_a.csv data')\n",
148
+ "# sns.scatterplot(data=df_b, x=\"x0\", y=\"x1\", hue=\"y\").set(title='ds1_b.csv data')\n"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": 53,
154
+ "metadata": {},
155
+ "outputs": [
156
+ {
157
+ "data": {
158
+ "text/plain": [
159
+ "array([2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,\n",
160
+ " 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,\n",
161
+ " 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,\n",
162
+ " 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,\n",
163
+ " 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,\n",
164
+ " 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.])"
165
+ ]
166
+ },
167
+ "execution_count": 53,
168
+ "metadata": {},
169
+ "output_type": "execute_result"
170
+ }
171
+ ],
172
+ "source": [
173
+ "import numpy as np\n",
174
+ "\n",
175
+ "size = 3\n",
176
+ "\n",
177
+ "theta = np.zeros(size)\n",
178
+ "# x = np.ones(size)\n",
179
+ "# x.dot(theta)\n",
180
+ "sigmoid = lambda x: 1 / 1 + np.exp(- x @ theta)\n",
181
+ "\n",
182
+ "sigmoid(x)"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "code",
187
+ "execution_count": 51,
188
+ "metadata": {},
189
+ "outputs": [],
190
+ "source": [
191
+ "x = np.array([[ 1. , -0.3226045 , 1.44016009],\n",
192
+ " [ 1. , 1.45213116, 2.26274285],\n",
193
+ " [ 1. , 0.85117646, 0.87461786],\n",
194
+ " [ 1. , -0.42506633, 0.49448067],\n",
195
+ " [ 1. , 1.07977541, 4.15222455],\n",
196
+ " [ 1. , 0.06839514, 3.60693507],\n",
197
+ " [ 1. , 3.86229163, 25.99079097],\n",
198
+ " [ 1. , 0.63078087, 0.69715553],\n",
199
+ " [ 1. , -0.64119999, 0.07896407],\n",
200
+ " [ 1. , 0.53135242, 4.50417546],\n",
201
+ " [ 1. , 2.19032426, 6.54381687],\n",
202
+ " [ 1. , -0.38465533, 0.42296288],\n",
203
+ " [ 1. , 0.21679465, 5.01812571],\n",
204
+ " [ 1. , 0.4846679 , 1.36387124],\n",
205
+ " [ 1. , 2.39608694, 10.87208984],\n",
206
+ " [ 1. , -0.51197311, 0.86848666],\n",
207
+ " [ 1. , 0.61284557, 1.92105571],\n",
208
+ " [ 1. , 1.20247 , 1.39818937],\n",
209
+ " [ 1. , 2.48716225, 4.41633191],\n",
210
+ " [ 1. , 0.27191322, 2.48805513],\n",
211
+ " [ 1. , 0.46650294, 3.4560205 ],\n",
212
+ " [ 1. , -0.77155423, 0.70674626],\n",
213
+ " [ 1. , 0.74548073, 0.78677636],\n",
214
+ " [ 1. , 1.34588118, 1.84466679],\n",
215
+ " [ 1. , 0.03825659, 1.37679837],\n",
216
+ " [ 1. , 0.97144296, 2.33499786],\n",
217
+ " [ 1. , 0.73578508, 5.36219354],\n",
218
+ " [ 1. , -0.98294485, 0.60613226],\n",
219
+ " [ 1. , -1.06748414, 0.51752952],\n",
220
+ " [ 1. , 0.16444179, 1.19318199],\n",
221
+ " [ 1. , -0.7898848 , 1.0424866 ],\n",
222
+ " [ 1. , -0.14329976, 0.56735751],\n",
223
+ " [ 1. , -0.82024735, 0.49960062],\n",
224
+ " [ 1. , 0.66933574, 2.03132583],\n",
225
+ " [ 1. , 2.18580501, 4.79542536],\n",
226
+ " [ 1. , 1.24524603, 2.22477477],\n",
227
+ " [ 1. , -0.38608243, 1.17125926],\n",
228
+ " [ 1. , 1.97265966, 0.94942542],\n",
229
+ " [ 1. , 0.11536834, 0.41934752],\n",
230
+ " [ 1. , -0.62494875, 0.36937505],\n",
231
+ " [ 1. , 0.73560961, 2.00940088],\n",
232
+ " [ 1. , 1.79850535, 1.03246785],\n",
233
+ " [ 1. , -0.14833684, 0.46835912],\n",
234
+ " [ 1. , -0.09403357, 1.07422495],\n",
235
+ " [ 1. , 1.95126591, 11.27001531],\n",
236
+ " [ 1. , -0.76667657, 0.48703998],\n",
237
+ " [ 1. , 0.53937355, 0.91782734],\n",
238
+ " [ 1. , -0.22698133, 1.18536598],\n",
239
+ " [ 1. , -0.39058305, 1.03621699],\n",
240
+ " [ 1. , 0.96225877, 1.0804548 ],\n",
241
+ " [ 1. , 2.87853267, 32.40471731],\n",
242
+ " [ 1. , 0.63533685, 1.31647006],\n",
243
+ " [ 1. , 2.44918624, 32.26914179],\n",
244
+ " [ 1. , 1.55800428, 1.41965492],\n",
245
+ " [ 1. , 1.58703176, 10.27789785],\n",
246
+ " [ 1. , 2.92302652, 6.32084127],\n",
247
+ " [ 1. , 1.16179168, 8.79339693],\n",
248
+ " [ 1. , 1.01926912, 7.27956453],\n",
249
+ " [ 1. , 1.64074981, 5.30590931],\n",
250
+ " [ 1. , 2.1625751 , 12.70579893],\n",
251
+ " [ 1. , 2.20355391, 6.93546526],\n",
252
+ " [ 1. , 1.80281512, 9.08987516],\n",
253
+ " [ 1. , 0.38185288, 2.08733116],\n",
254
+ " [ 1. , 2.73898669, 6.43298261],\n",
255
+ " [ 1. , 2.33229757, 13.46785592],\n",
256
+ " [ 1. , 2.1515753 , 3.30175484],\n",
257
+ " [ 1. , 2.08755952, 7.46399068],\n",
258
+ " [ 1. , 2.42536537, 26.4483108 ],\n",
259
+ " [ 1. , 1.22952296, 2.63268864],\n",
260
+ " [ 1. , 2.29617283, 12.01655075],\n",
261
+ " [ 1. , 3.30064721, 11.28460368],\n",
262
+ " [ 1. , 0.98606951, 2.43854265],\n",
263
+ " [ 1. , 1.71111559, 4.78419937],\n",
264
+ " [ 1. , 1.43791364, 2.54852746],\n",
265
+ " [ 1. , 2.06546113, 6.82796588],\n",
266
+ " [ 1. , 2.6042525 , 18.17196626],\n",
267
+ " [ 1. , 2.56193154, 2.67573402],\n",
268
+ " [ 1. , 1.67306091, 2.28230367],\n",
269
+ " [ 1. , 3.07760672, 13.17400532],\n",
270
+ " [ 1. , 0.80343565, 1.95356398],\n",
271
+ " [ 1. , 0.29409223, 1.8304515 ],\n",
272
+ " [ 1. , 4.17743477, 28.43916382],\n",
273
+ " [ 1. , 2.20240356, 14.48263971],\n",
274
+ " [ 1. , 2.75638736, 9.03904178],\n",
275
+ " [ 1. , 1.59509206, 3.23851668],\n",
276
+ " [ 1. , 2.31360137, 8.12523688],\n",
277
+ " [ 1. , 2.25188961, 6.10723549],\n",
278
+ " [ 1. , 1.74848706, 4.97610269],\n",
279
+ " [ 1. , 1.94191761, 6.17617417],\n",
280
+ " [ 1. , 3.23970139, 36.2488673 ],\n",
281
+ " [ 1. , 0.77654359, 1.719952 ],\n",
282
+ " [ 1. , 1.53339444, 7.11126102],\n",
283
+ " [ 1. , 1.44950114, 7.08491125],\n",
284
+ " [ 1. , 1.62476639, 8.74420153],\n",
285
+ " [ 1. , 1.96366438, 8.17721712],\n",
286
+ " [ 1. , 1.79561844, 2.61247159],\n",
287
+ " [ 1. , 1.95348024, 1.6333998 ],\n",
288
+ " [ 1. , 1.86352418, 5.88989936],\n",
289
+ " [ 1. , 2.91729395, 22.77683958],\n",
290
+ " [ 1. , 1.31972857, 2.84750546]])"
291
+ ]
292
+ },
293
+ {
294
+ "cell_type": "markdown",
295
+ "metadata": {},
296
+ "source": [
297
+ "# PS3: Working on Poisson Regression and reshaping the gradient calcuation"
298
+ ]
299
+ },
300
+ {
301
+ "cell_type": "code",
302
+ "execution_count": 17,
303
+ "metadata": {},
304
+ "outputs": [
305
+ {
306
+ "name": "stdout",
307
+ "output_type": "stream",
308
+ "text": [
309
+ "3\n",
310
+ "(3,)\n",
311
+ "(2, 2)\n",
312
+ "[[1 2]\n",
313
+ " [3 4]]\n"
314
+ ]
315
+ },
316
+ {
317
+ "data": {
318
+ "text/plain": [
319
+ "array([4, 6])"
320
+ ]
321
+ },
322
+ "execution_count": 17,
323
+ "metadata": {},
324
+ "output_type": "execute_result"
325
+ }
326
+ ],
327
+ "source": [
328
+ "# our code looks so funky because the problem says we have to do\n",
329
+ "# **full batch gradient ascent**\n",
330
+ "\n",
331
+ "foo = np.array([1 ,2, 3,]) \n",
332
+ "\n",
333
+ "# just an array that is three long\n",
334
+ "print(len(foo))\n",
335
+ "print(foo.shape)\n",
336
+ "\n",
337
+ "# three by 1 matrix \n",
338
+ "# three rows by 1 column \n",
339
+ "foo.reshape(3, 1).shape\n",
340
+ "\n",
341
+ "bar = np.array([1, 2, 3, 4])\n",
342
+ "\n",
343
+ "bar_reshaped = bar.reshape(2, 2)\n",
344
+ "\n",
345
+ "print(bar_reshaped.shape)\n",
346
+ "print(bar_reshaped)\n",
347
+ "\n",
348
+ "# np.sum(bar_reshaped, axis=0)\n",
349
+ "\n",
350
+ "# sum *down* the columns\n",
351
+ "bar_reshaped.sum(axis=0)\n",
352
+ "\n",
353
+ "# np.exp(bar, out=(2,2))\n",
354
+ "\n",
355
+ "# print(foo)"
356
+ ]
357
+ },
358
+ {
359
+ "cell_type": "code",
360
+ "execution_count": 24,
361
+ "metadata": {},
362
+ "outputs": [
363
+ {
364
+ "data": {
365
+ "text/plain": [
366
+ "-0.9262096826685897"
367
+ ]
368
+ },
369
+ "execution_count": 24,
370
+ "metadata": {},
371
+ "output_type": "execute_result"
372
+ }
373
+ ],
374
+ "source": [
375
+ "from numpy import linalg as LA\n",
376
+ "\n",
377
+ "LA.norm(np.array([1,2])) - LA.norm(np.array([1, 3]))"
378
+ ]
379
+ },
380
+ {
381
+ "cell_type": "code",
382
+ "execution_count": 22,
383
+ "metadata": {},
384
+ "outputs": [
385
+ {
386
+ "data": {
387
+ "text/plain": [
388
+ "1.0"
389
+ ]
390
+ },
391
+ "execution_count": 22,
392
+ "metadata": {},
393
+ "output_type": "execute_result"
394
+ }
395
+ ],
396
+ "source": [
397
+ "LA.norm(np.array([1,2]) - np.array([1,3]))"
398
+ ]
399
+ },
400
+ {
401
+ "cell_type": "code",
402
+ "execution_count": 31,
403
+ "metadata": {},
404
+ "outputs": [],
405
+ "source": [
406
+ "state = []\n",
407
+ "\n",
408
+ "for x in state:\n",
409
+ " # print(\"this is coef\", coef)\n",
410
+ " print(\"this is x\", x)"
411
+ ]
412
+ },
413
+ {
414
+ "cell_type": "code",
415
+ "execution_count": 34,
416
+ "metadata": {},
417
+ "outputs": [
418
+ {
419
+ "data": {
420
+ "text/plain": [
421
+ "1"
422
+ ]
423
+ },
424
+ "execution_count": 34,
425
+ "metadata": {},
426
+ "output_type": "execute_result"
427
+ }
428
+ ],
429
+ "source": [
430
+ "# print(\"hello world\")\n",
431
+ "\n",
432
+ "\n",
433
+ "def sign(a):\n",
434
+ " \"\"\"Gets the sign of a scalar input.\"\"\"\n",
435
+ " if a >= 0:\n",
436
+ " return 1\n",
437
+ " else:\n",
438
+ " return 0\n",
439
+ "\n",
440
+ "sign(22)"
441
+ ]
442
+ },
443
+ {
444
+ "cell_type": "markdown",
445
+ "metadata": {},
446
+ "source": [
447
+ "# Fashion MNIST Problem scratchpad"
448
+ ]
449
+ },
450
+ {
451
+ "cell_type": "code",
452
+ "execution_count": 16,
453
+ "metadata": {},
454
+ "outputs": [
455
+ {
456
+ "data": {
457
+ "text/plain": [
458
+ "array([[0.00626879, 0.01704033, 0.04632042, 0.93037047],\n",
459
+ " [0.01203764, 0.08894682, 0.24178252, 0.65723302],\n",
460
+ " [0.00446236, 0.66227241, 0.24363641, 0.08962882]])"
461
+ ]
462
+ },
463
+ "execution_count": 16,
464
+ "metadata": {},
465
+ "output_type": "execute_result"
466
+ }
467
+ ],
468
+ "source": [
469
+ "import numpy as np\n",
470
+ "\n",
471
+ "def softmax(x):\n",
472
+ " x = x - np.max(x,axis=1)[:,np.newaxis]\n",
473
+ " exp = np.exp(x)\n",
474
+ " s = exp / np.sum(exp,axis=1)[:,np.newaxis]\n",
475
+ " return s\n",
476
+ "\n",
477
+ "\n",
478
+ "overflow_test = np.array([[10000, 10010, 10]])\n",
479
+ "softmax([[22, 14, 16]])\n",
480
+ "\n",
481
+ "softmax(overflow_test)\n",
482
+ "\n",
483
+ "# correct solution:\n",
484
+ "def softmax_from_stackoverflow(x):\n",
485
+ " \"\"\"Compute softmax values for each sets of scores in x.\"\"\"\n",
486
+ " e_x = np.exp(x - np.max(x))\n",
487
+ " return e_x / e_x.sum(axis=0) # only difference\n",
488
+ "\n",
489
+ "# def softmax_2(x):\n",
490
+ "# my (correct) solution:\n",
491
+ "def softmax_2(z):\n",
492
+ " assert len(z.shape) == 2\n",
493
+ " s = np.max(z, axis=1)\n",
494
+ " s = s[:, np.newaxis] # necessary step to do broadcasting\n",
495
+ " e_x = np.exp(z - s)\n",
496
+ " div = np.sum(e_x, axis=1)\n",
497
+ " div = div[:, np.newaxis] # dito\n",
498
+ " return e_x / div\n",
499
+ "\n",
500
+ "# BOOM -- this seems like it has the same result and just needs keepdims argument! \n",
501
+ "def softmax_3(z):\n",
502
+ " return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)\n",
503
+ "\n",
504
+ "\n",
505
+ "\n",
506
+ "# softmax_from_stackoverflow(overflow_test)\n",
507
+ "\n",
508
+ "scores2D = np.array([[1, 2, 3, 6],\n",
509
+ " [2, 4, 5, 6],\n",
510
+ " [3, 8, 7, 6]])\n",
511
+ "\n",
512
+ "# softmax_from_stackoverflow(scores2D)\n",
513
+ "softmax(scores2D)\n",
514
+ "\n",
515
+ "softmax_2(scores2D)\n",
516
+ "\n",
517
+ "softmax_3(scores2D)"
518
+ ]
519
+ },
520
+ {
521
+ "cell_type": "code",
522
+ "execution_count": 31,
523
+ "metadata": {},
524
+ "outputs": [
525
+ {
526
+ "name": "stdout",
527
+ "output_type": "stream",
528
+ "text": [
529
+ "[[0. 0.]]\n",
530
+ "[0. 0.]\n"
531
+ ]
532
+ }
533
+ ],
534
+ "source": [
535
+ "def foo():\n",
536
+ " return (\"foo\", \"bar\", \"baz\")\n",
537
+ "\n",
538
+ "foo, bar, baz = foo()\n",
539
+ "\n",
540
+ "# print(baz)\n",
541
+ "\n",
542
+ "input_size = 2\n",
543
+ "num_hidden = 2\n",
544
+ "num_output = 2\n",
545
+ "\n",
546
+ "\n",
547
+ "W1 = np.random.normal(size = (input_size, num_hidden))\n",
548
+ "b1 = np.zeros((1, num_hidden)) \n",
549
+ "W2 = np.random.normal(size = (num_hidden, num_output)), \n",
550
+ "b2= np.zeros((1, num_output))\n",
551
+ "dict(W1=W1, b1=b1, W2=W2, b2=b2)\n",
552
+ "\n",
553
+ "working = {\n",
554
+ " 'W1': np.random.normal(size = (input_size, num_hidden)),\n",
555
+ " 'b1': np.zeros(num_hidden),\n",
556
+ " 'W2': np.random.normal(size = (num_hidden, 10)),\n",
557
+ " 'b2': np.zeros(10)\n",
558
+ " }\n",
559
+ "\n",
560
+ "# print(working)\n",
561
+ "\n",
562
+ "b1 = np.zeros((1, 2))\n",
563
+ "print(b1)\n",
564
+ "\n",
565
+ "b1_again = np.zeros(2)\n",
566
+ "print(b1_again)\n"
567
+ ]
568
+ },
569
+ {
570
+ "cell_type": "markdown",
571
+ "metadata": {},
572
+ "source": [
573
+ "# Naive Bayes Classifier "
574
+ ]
575
+ },
576
+ {
577
+ "cell_type": "code",
578
+ "execution_count": 74,
579
+ "metadata": {},
580
+ "outputs": [
581
+ {
582
+ "data": {
583
+ "text/plain": [
584
+ "{'puttin': 1, 'fone': 2}"
585
+ ]
586
+ },
587
+ "execution_count": 74,
588
+ "metadata": {},
589
+ "output_type": "execute_result"
590
+ }
591
+ ],
592
+ "source": [
593
+ "ham = \"THANX 4 PUTTIN DA FONE DOWN ON ME!\"\n",
594
+ "ham2 = \"So how are you really. What are you up to. How's the masters. And so on.\"\n",
595
+ "\n",
596
+ "ham_split = ham.lower().split()\n",
597
+ "\n",
598
+ "# would have to be a dict so we can lookup words \n",
599
+ "ham_split\n",
600
+ "word_count_dict = {}\n",
601
+ "\n",
602
+ "for word in ham_split:\n",
603
+ " # temp = dictionary.get(word, 0)\n",
604
+ " # dictionary[word] += 1\n",
605
+ "\n",
606
+ " if word in word_count_dict:\n",
607
+ " word_count_dict[word] += 1\n",
608
+ " else:\n",
609
+ " word_count_dict[word] = 1\n",
610
+ " \n",
611
+ " # dictionary.setdefault(word, 1)\n",
612
+ "\n",
613
+ "# filter this dict if count > 5 \n",
614
+ "# for key, value in dictionary.items():\n",
615
+ "# print(key)\n",
616
+ "# print(value)\n",
617
+ "\n",
618
+ "word_count_dict.items()\n",
619
+ "\n",
620
+ "word_count_dict[\"puttin\"] = 22\n",
621
+ "word_count_dict[\"fone\"] = 12\n",
622
+ "\n",
623
+ "\n",
624
+ "word_count_dict_filtered = dict(filter(lambda item: item[1] > 10, word_count_dict.items()))\n",
625
+ "# foo = list(filter(lambda item: item[1] > 10, dictionary.items()))\n",
626
+ "\n",
627
+ "word_dict = {}\n",
628
+ "idx = 0\n",
629
+ "for key, _ in word_count_dict_filtered.items():\n",
630
+ " idx += 1\n",
631
+ " word_dict[key] = idx\n",
632
+ "\n",
633
+ "# dictionary\n",
634
+ "word_dict"
635
+ ]
636
+ },
637
+ {
638
+ "cell_type": "code",
639
+ "execution_count": 55,
640
+ "metadata": {},
641
+ "outputs": [
642
+ {
643
+ "name": "stdout",
644
+ "output_type": "stream",
645
+ "text": [
646
+ "Filter dictionary: {8: 'u', 9: 'z', 10: 'j'}\n"
647
+ ]
648
+ }
649
+ ],
650
+ "source": [
651
+ "my_dict = {8:'u',4:'t',9:'z',10:'j',5:'k',3:'s'}\n",
652
+ "\n",
653
+ "new_filt = dict(filter(lambda val: val[0] > 5, my_dict.items()))\n",
654
+ "print(\"Filter dictionary:\",new_filt)\n",
655
+ "\n",
656
+ "\n"
657
+ ]
658
+ },
659
+ {
660
+ "cell_type": "code",
661
+ "execution_count": null,
662
+ "metadata": {},
663
+ "outputs": [],
664
+ "source": [
665
+ "word_dict = {}\n",
666
+ "for message in messages:\n",
667
+ " # word_list = get_words(message)\n",
668
+ " for word in word_list:\n",
669
+ " if word in word_dict:\n",
670
+ " word_dict[word] += 1\n",
671
+ " else:\n",
672
+ " word_dict[word] = 1"
673
+ ]
674
+ },
675
+ {
676
+ "cell_type": "code",
677
+ "execution_count": null,
678
+ "metadata": {},
679
+ "outputs": [],
680
+ "source": [
681
+ "N, V = len(messages), len(word_dictionary)\n",
682
+ "data = np.zeros((N, V))\n",
683
+ "for i, m in enumerate(messages):\n",
684
+ " for w in get_words(m):\n",
685
+ " if w in word_dictionary:\n",
686
+ " data[i, word_dictionary[w]] += 1\n",
687
+ "return data"
688
+ ]
689
+ },
690
+ {
691
+ "cell_type": "code",
692
+ "execution_count": 78,
693
+ "metadata": {},
694
+ "outputs": [
695
+ {
696
+ "ename": "NameError",
697
+ "evalue": "name 'word_count' is not defined",
698
+ "output_type": "error",
699
+ "traceback": [
700
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
701
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
702
+ "\u001b[1;32m/Users/johnnydevriese/projects/jupyter/cs229-scratchpad.ipynb Cell 22'\u001b[0m in \u001b[0;36m<cell line: 7>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/cs229-scratchpad.ipynb#ch0000024?line=2'>3</a>\u001b[0m np\u001b[39m.\u001b[39mzeros((\u001b[39m1\u001b[39m, word_num))\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/cs229-scratchpad.ipynb#ch0000024?line=4'>5</a>\u001b[0m word_array \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39marray([])\u001b[39m.\u001b[39mreshape(\u001b[39m0\u001b[39m, word_num)\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/cs229-scratchpad.ipynb#ch0000024?line=6'>7</a>\u001b[0m word_array \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mvstack([word_array, word_count])\n",
703
+ "\u001b[0;31mNameError\u001b[0m: name 'word_count' is not defined"
704
+ ]
705
+ }
706
+ ],
707
+ "source": [
708
+ "word_num = 10\n",
709
+ "\n",
710
+ "np.zeros((1, word_num))\n",
711
+ "\n",
712
+ "word_array = np.array([]).reshape(0, word_num)\n",
713
+ "\n",
714
+ "word_array = np.vstack([word_array, word_count])"
715
+ ]
716
+ },
717
+ {
718
+ "cell_type": "code",
719
+ "execution_count": 86,
720
+ "metadata": {},
721
+ "outputs": [
722
+ {
723
+ "data": {
724
+ "text/plain": [
725
+ "array([[1, 2, 3]])"
726
+ ]
727
+ },
728
+ "execution_count": 86,
729
+ "metadata": {},
730
+ "output_type": "execute_result"
731
+ }
732
+ ],
733
+ "source": [
734
+ "np.ones((2, 3)).sum(axis=0)\n",
735
+ "\n",
736
+ "\n",
737
+ "np.array([[1 ,2, 3], [4, 5, 6]]).sum(axis=0) # sums column wise. \n",
738
+ "\n",
739
+ "np.array([[1 ,2, 3], [4, 5, 6]]).sum(axis=1) # sums row wise. \n",
740
+ "\n",
741
+ "\n",
742
+ "foo = np.array([[1 ,2, 3], [4, 5, 6]])\n",
743
+ "\n",
744
+ "foo[0:1,:]"
745
+ ]
746
+ },
747
+ {
748
+ "cell_type": "code",
749
+ "execution_count": 8,
750
+ "metadata": {},
751
+ "outputs": [
752
+ {
753
+ "data": {
754
+ "text/plain": [
755
+ "[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
756
+ " [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],\n",
757
+ " [0, 2, 4, 6, 8, 10, 12, 14, 16, 18],\n",
758
+ " [0, 3, 6, 9, 12, 15, 18, 21, 24, 27],\n",
759
+ " [0, 4, 8, 12, 16, 20, 24, 28, 32, 36],\n",
760
+ " [0, 5, 10, 15, 20, 25, 30, 35, 40, 45],\n",
761
+ " [0, 6, 12, 18, 24, 30, 36, 42, 48, 54],\n",
762
+ " [0, 7, 14, 21, 28, 35, 42, 49, 56, 63],\n",
763
+ " [0, 8, 16, 24, 32, 40, 48, 56, 64, 72],\n",
764
+ " [0, 9, 18, 27, 36, 45, 54, 63, 72, 81]]"
765
+ ]
766
+ },
767
+ "execution_count": 8,
768
+ "metadata": {},
769
+ "output_type": "execute_result"
770
+ }
771
+ ],
772
+ "source": [
773
+ "# [i for i in range(10) for j in range(10)] \n",
774
+ "\n",
775
+ "[[i*j for i in range(10)] for j in range(10)]\n",
776
+ "# p_x_given_z(x[i], mu[j], sigma[j]) * phi[j] \n",
777
+ "\n",
778
+ "# for i in range(n):\n",
779
+ "# for j in range(k):\n",
780
+ "\n",
781
+ "[[p_x_given_z(x[i], mu[j], sigma[j]) * phi[j] for i in range(n) for j in range(k)]]"
782
+ ]
783
+ }
784
+ ],
785
+ "metadata": {
786
+ "kernelspec": {
787
+ "display_name": "Python 3.10.5 ('pytorch-nightly')",
788
+ "language": "python",
789
+ "name": "python3"
790
+ },
791
+ "language_info": {
792
+ "codemirror_mode": {
793
+ "name": "ipython",
794
+ "version": 3
795
+ },
796
+ "file_extension": ".py",
797
+ "mimetype": "text/x-python",
798
+ "name": "python",
799
+ "nbconvert_exporter": "python",
800
+ "pygments_lexer": "ipython3",
801
+ "version": "3.10.5"
802
+ },
803
+ "orig_nbformat": 4,
804
+ "vscode": {
805
+ "interpreter": {
806
+ "hash": "8a8bcccfb183d1298694efece6cf41240378bc61621e95c864629a40c5876542"
807
+ }
808
+ }
809
+ },
810
+ "nbformat": 4,
811
+ "nbformat_minor": 2
812
+ }
code/cs229_random_gems.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Random Gems they point out
2
+
3
+ ### lecture 10
4
+
5
+ * Boosting is an ensembling method to decrease **bias** in trees
6
+
7
+ decision tree has low bias and high variance
8
+
9
+ ensembling a decision tree reduces the variance?
10
+
11
+ https://towardsdatascience.com/decision-tree-ensembles-bagging-and-boosting-266a8ba60fd9
12
+
13
+
code/data/cifar-10-batches-py/batches.meta ADDED
Binary file (158 Bytes). View file
 
code/data/cifar-10-batches-py/data_batch_1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54636561a3ce25bd3e19253c6b0d8538147b0ae398331ac4a2d86c6d987368cd
3
+ size 31035704
code/data/cifar-10-batches-py/data_batch_2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:766b2cef9fbc745cf056b3152224f7cf77163b330ea9a15f9392beb8b89bc5a8
3
+ size 31035320
code/data/cifar-10-batches-py/data_batch_3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f00d98ebfb30b3ec0ad19f9756dc2630b89003e10525f5e148445e82aa6a1f9
3
+ size 31035999
code/data/cifar-10-batches-py/data_batch_4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f7bb240661948b8f4d53e36ec720d8306f5668bd0071dcb4e6c947f78e9682b
3
+ size 31035696
code/data/cifar-10-batches-py/data_batch_5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d91802434d8376bbaeeadf58a737e3a1b12ac839077e931237e0dcd43adcb154
3
+ size 31035623
code/data/cifar-10-batches-py/readme.html ADDED
@@ -0,0 +1 @@
 
 
1
+ <meta HTTP-EQUIV="REFRESH" content="0; url=http://www.cs.toronto.edu/~kriz/cifar.html">
code/data/cifar-10-batches-py/test_batch ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f53d8d457504f7cff4ea9e021afcf0e0ad8e24a91f3fc42091b8adef61157831
3
+ size 31035526
code/data/cifar-10-python.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce
3
+ size 170498071
code/data_science_handbook.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
code/diffusers/hf_diffusers.ipynb ADDED
@@ -0,0 +1,508 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "tensor([1., 1., 1., 1., 1.], device='mps:0')\n"
13
+ ]
14
+ },
15
+ {
16
+ "name": "stderr",
17
+ "output_type": "stream",
18
+ "text": [
19
+ "/Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages/torch/_tensor_str.py:103: UserWarning: The operator 'aten::bitwise_and.Tensor_out' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1659484612588/work/aten/src/ATen/mps/MPSFallback.mm:11.)\n",
20
+ " nonzero_finite_vals = torch.masked_select(tensor_view, torch.isfinite(tensor_view) & tensor_view.ne(0))\n"
21
+ ]
22
+ }
23
+ ],
24
+ "source": [
25
+ "import torch\n",
26
+ "\n",
27
+ "import torch\n",
28
+ "\n",
29
+ "\n",
30
+ "# Check that MPS is available\n",
31
+ "if not torch.backends.mps.is_available():\n",
32
+ " if not torch.backends.mps.is_built():\n",
33
+ " print(\"MPS not available because the current PyTorch install was not \"\n",
34
+ " \"built with MPS enabled.\")\n",
35
+ " else:\n",
36
+ " print(\"MPS not available because the current MacOS version is not 12.3+ \"\n",
37
+ " \"and/or you do not have an MPS-enabled device on this machine.\")\n",
38
+ "\n",
39
+ "else:\n",
40
+ " mps_device = torch.device(\"mps\")\n",
41
+ "\n",
42
+ " # Create a Tensor directly on the mps device\n",
43
+ " x = torch.ones(5, device=mps_device)\n",
44
+ " # Or\n",
45
+ " # x = torch.ones(5, device=\"mps\")\n",
46
+ " print(x)\n",
47
+ "\n",
48
+ " # # Any operation happens on the GPU\n",
49
+ " # y = x * 2\n",
50
+ "\n",
51
+ " # # Move your model to mps just like any other device\n",
52
+ " # model = YourFavoriteNet()\n",
53
+ " # model.to(mps_device)\n",
54
+ "\n",
55
+ " # # Now every call runs on the GPU\n",
56
+ " # pred = model(x)"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": 3,
62
+ "metadata": {},
63
+ "outputs": [
64
+ {
65
+ "name": "stdout",
66
+ "output_type": "stream",
67
+ "text": [
68
+ "Collecting diffusers==0.2.4\n",
69
+ " Downloading diffusers-0.2.4-py3-none-any.whl (112 kB)\n",
70
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m113.0/113.0 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
71
+ "\u001b[?25hRequirement already satisfied: numpy in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from diffusers==0.2.4) (1.23.2)\n",
72
+ "Collecting filelock\n",
73
+ " Downloading filelock-3.8.0-py3-none-any.whl (10 kB)\n",
74
+ "Collecting importlib-metadata\n",
75
+ " Downloading importlib_metadata-4.12.0-py3-none-any.whl (21 kB)\n",
76
+ "Requirement already satisfied: torch>=1.4 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from diffusers==0.2.4) (1.12.1)\n",
77
+ "Requirement already satisfied: Pillow in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from diffusers==0.2.4) (9.2.0)\n",
78
+ "Collecting huggingface-hub<1.0,>=0.8.1\n",
79
+ " Downloading huggingface_hub-0.9.0-py3-none-any.whl (120 kB)\n",
80
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m120.5/120.5 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
81
+ "\u001b[?25hCollecting regex!=2019.12.17\n",
82
+ " Downloading regex-2022.8.17-cp310-cp310-macosx_11_0_arm64.whl (282 kB)\n",
83
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m283.0/283.0 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
84
+ "\u001b[?25hRequirement already satisfied: requests in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from diffusers==0.2.4) (2.28.1)\n",
85
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.8.1->diffusers==0.2.4) (4.3.0)\n",
86
+ "Requirement already satisfied: packaging>=20.9 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.8.1->diffusers==0.2.4) (21.3)\n",
87
+ "Collecting pyyaml>=5.1\n",
88
+ " Downloading PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl (173 kB)\n",
89
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m174.0/174.0 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
90
+ "\u001b[?25hCollecting tqdm\n",
91
+ " Downloading tqdm-4.64.0-py2.py3-none-any.whl (78 kB)\n",
92
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.4/78.4 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
93
+ "\u001b[?25hCollecting zipp>=0.5\n",
94
+ " Downloading zipp-3.8.1-py3-none-any.whl (5.6 kB)\n",
95
+ "Requirement already satisfied: certifi>=2017.4.17 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from requests->diffusers==0.2.4) (2022.6.15)\n",
96
+ "Requirement already satisfied: charset-normalizer<3,>=2 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from requests->diffusers==0.2.4) (2.1.1)\n",
97
+ "Requirement already satisfied: idna<4,>=2.5 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from requests->diffusers==0.2.4) (3.3)\n",
98
+ "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from requests->diffusers==0.2.4) (1.26.11)\n",
99
+ "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from packaging>=20.9->huggingface-hub<1.0,>=0.8.1->diffusers==0.2.4) (3.0.9)\n",
100
+ "Installing collected packages: zipp, tqdm, regex, pyyaml, filelock, importlib-metadata, huggingface-hub, diffusers\n",
101
+ "Successfully installed diffusers-0.2.4 filelock-3.8.0 huggingface-hub-0.9.0 importlib-metadata-4.12.0 pyyaml-6.0 regex-2022.8.17 tqdm-4.64.0 zipp-3.8.1\n",
102
+ "Collecting transformers\n",
103
+ " Downloading transformers-4.21.2-py3-none-any.whl (4.7 MB)\n",
104
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.7/4.7 MB\u001b[0m \u001b[31m20.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
105
+ "\u001b[?25hCollecting scipy\n",
106
+ " Downloading scipy-1.9.0-cp310-cp310-macosx_12_0_arm64.whl (29.9 MB)\n",
107
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m29.9/29.9 MB\u001b[0m \u001b[31m34.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
108
+ "\u001b[?25hCollecting ftfy\n",
109
+ " Downloading ftfy-6.1.1-py3-none-any.whl (53 kB)\n",
110
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
111
+ "\u001b[?25hRequirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from transformers) (0.9.0)\n",
112
+ "Collecting tokenizers!=0.11.3,<0.13,>=0.11.1\n",
113
+ " Downloading tokenizers-0.12.1.tar.gz (220 kB)\n",
114
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m220.7/220.7 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
115
+ "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n",
116
+ "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
117
+ "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
118
+ "\u001b[?25hRequirement already satisfied: packaging>=20.0 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from transformers) (21.3)\n",
119
+ "Requirement already satisfied: numpy>=1.17 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from transformers) (1.23.2)\n",
120
+ "Requirement already satisfied: tqdm>=4.27 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from transformers) (4.64.0)\n",
121
+ "Requirement already satisfied: requests in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from transformers) (2.28.1)\n",
122
+ "Requirement already satisfied: regex!=2019.12.17 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from transformers) (2022.8.17)\n",
123
+ "Requirement already satisfied: pyyaml>=5.1 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from transformers) (6.0)\n",
124
+ "Requirement already satisfied: filelock in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from transformers) (3.8.0)\n",
125
+ "Requirement already satisfied: wcwidth>=0.2.5 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ftfy) (0.2.5)\n",
126
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (4.3.0)\n",
127
+ "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from packaging>=20.0->transformers) (3.0.9)\n",
128
+ "Requirement already satisfied: idna<4,>=2.5 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from requests->transformers) (3.3)\n",
129
+ "Requirement already satisfied: certifi>=2017.4.17 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from requests->transformers) (2022.6.15)\n",
130
+ "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from requests->transformers) (1.26.11)\n",
131
+ "Requirement already satisfied: charset-normalizer<3,>=2 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from requests->transformers) (2.1.1)\n",
132
+ "Building wheels for collected packages: tokenizers\n",
133
+ " Building wheel for tokenizers (pyproject.toml) ... \u001b[?25lerror\n",
134
+ " \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
135
+ " \n",
136
+ " \u001b[31m×\u001b[0m \u001b[32mBuilding wheel for tokenizers \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mpyproject.toml\u001b[0m\u001b[1;32m)\u001b[0m did not run successfully.\n",
137
+ " \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
138
+ " \u001b[31m╰─>\u001b[0m \u001b[31m[51 lines of output]\u001b[0m\n",
139
+ " \u001b[31m \u001b[0m running bdist_wheel\n",
140
+ " \u001b[31m \u001b[0m running build\n",
141
+ " \u001b[31m \u001b[0m running build_py\n",
142
+ " \u001b[31m \u001b[0m creating build\n",
143
+ " \u001b[31m \u001b[0m creating build/lib.macosx-11.0-arm64-cpython-310\n",
144
+ " \u001b[31m \u001b[0m creating build/lib.macosx-11.0-arm64-cpython-310/tokenizers\n",
145
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/__init__.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers\n",
146
+ " \u001b[31m \u001b[0m creating build/lib.macosx-11.0-arm64-cpython-310/tokenizers/models\n",
147
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/models/__init__.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/models\n",
148
+ " \u001b[31m \u001b[0m creating build/lib.macosx-11.0-arm64-cpython-310/tokenizers/decoders\n",
149
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/decoders/__init__.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/decoders\n",
150
+ " \u001b[31m \u001b[0m creating build/lib.macosx-11.0-arm64-cpython-310/tokenizers/normalizers\n",
151
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/normalizers/__init__.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/normalizers\n",
152
+ " \u001b[31m \u001b[0m creating build/lib.macosx-11.0-arm64-cpython-310/tokenizers/pre_tokenizers\n",
153
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/pre_tokenizers/__init__.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/pre_tokenizers\n",
154
+ " \u001b[31m \u001b[0m creating build/lib.macosx-11.0-arm64-cpython-310/tokenizers/processors\n",
155
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/processors/__init__.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/processors\n",
156
+ " \u001b[31m \u001b[0m creating build/lib.macosx-11.0-arm64-cpython-310/tokenizers/trainers\n",
157
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/trainers/__init__.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/trainers\n",
158
+ " \u001b[31m \u001b[0m creating build/lib.macosx-11.0-arm64-cpython-310/tokenizers/implementations\n",
159
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/implementations/byte_level_bpe.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/implementations\n",
160
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/implementations/sentencepiece_unigram.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/implementations\n",
161
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/implementations/sentencepiece_bpe.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/implementations\n",
162
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/implementations/base_tokenizer.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/implementations\n",
163
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/implementations/__init__.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/implementations\n",
164
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/implementations/char_level_bpe.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/implementations\n",
165
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/implementations/bert_wordpiece.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/implementations\n",
166
+ " \u001b[31m \u001b[0m creating build/lib.macosx-11.0-arm64-cpython-310/tokenizers/tools\n",
167
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/tools/__init__.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/tools\n",
168
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/tools/visualizer.py -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/tools\n",
169
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/__init__.pyi -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers\n",
170
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/models/__init__.pyi -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/models\n",
171
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/decoders/__init__.pyi -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/decoders\n",
172
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/normalizers/__init__.pyi -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/normalizers\n",
173
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/pre_tokenizers/__init__.pyi -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/pre_tokenizers\n",
174
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/processors/__init__.pyi -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/processors\n",
175
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/trainers/__init__.pyi -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/trainers\n",
176
+ " \u001b[31m \u001b[0m copying py_src/tokenizers/tools/visualizer-styles.css -> build/lib.macosx-11.0-arm64-cpython-310/tokenizers/tools\n",
177
+ " \u001b[31m \u001b[0m running build_ext\n",
178
+ " \u001b[31m \u001b[0m running build_rust\n",
179
+ " \u001b[31m \u001b[0m error: can't find Rust compiler\n",
180
+ " \u001b[31m \u001b[0m \n",
181
+ " \u001b[31m \u001b[0m If you are using an outdated pip version, it is possible a prebuilt wheel is available for this package but pip is not able to install from it. Installing from the wheel would avoid the need for a Rust compiler.\n",
182
+ " \u001b[31m \u001b[0m \n",
183
+ " \u001b[31m \u001b[0m To update pip, run:\n",
184
+ " \u001b[31m \u001b[0m \n",
185
+ " \u001b[31m \u001b[0m pip install --upgrade pip\n",
186
+ " \u001b[31m \u001b[0m \n",
187
+ " \u001b[31m \u001b[0m and then retry package installation.\n",
188
+ " \u001b[31m \u001b[0m \n",
189
+ " \u001b[31m \u001b[0m If you did intend to build this package from source, try installing a Rust compiler from your system package manager and ensure it is on the PATH during installation. Alternatively, rustup (available at https://rustup.rs) is the recommended way to download and update the Rust compiler toolchain.\n",
190
+ " \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n",
191
+ " \n",
192
+ " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
193
+ "\u001b[?25h\u001b[31m ERROR: Failed building wheel for tokenizers\u001b[0m\u001b[31m\n",
194
+ "\u001b[0mFailed to build tokenizers\n",
195
+ "\u001b[31mERROR: Could not build wheels for tokenizers, which is required to install pyproject.toml-based projects\u001b[0m\u001b[31m\n",
196
+ "\u001b[0mCollecting ipywidgets<8,>=7\n",
197
+ " Downloading ipywidgets-7.7.2-py2.py3-none-any.whl (123 kB)\n",
198
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m123.4/123.4 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
199
+ "\u001b[?25hRequirement already satisfied: ipython>=4.0.0 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipywidgets<8,>=7) (8.4.0)\n",
200
+ "Requirement already satisfied: ipykernel>=4.5.1 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipywidgets<8,>=7) (6.15.1)\n",
201
+ "Collecting widgetsnbextension~=3.6.0\n",
202
+ " Downloading widgetsnbextension-3.6.1-py2.py3-none-any.whl (1.6 MB)\n",
203
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
204
+ "\u001b[?25hCollecting ipython-genutils~=0.2.0\n",
205
+ " Downloading ipython_genutils-0.2.0-py2.py3-none-any.whl (26 kB)\n",
206
+ "Collecting jupyterlab-widgets<3,>=1.0.0\n",
207
+ " Downloading jupyterlab_widgets-1.1.1-py3-none-any.whl (245 kB)\n",
208
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m245.3/245.3 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
209
+ "\u001b[?25hRequirement already satisfied: traitlets>=4.3.1 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipywidgets<8,>=7) (5.3.0)\n",
210
+ "Requirement already satisfied: nest-asyncio in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7) (1.5.5)\n",
211
+ "Requirement already satisfied: psutil in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7) (5.9.1)\n",
212
+ "Requirement already satisfied: matplotlib-inline>=0.1 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7) (0.1.6)\n",
213
+ "Requirement already satisfied: jupyter-client>=6.1.12 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7) (7.3.4)\n",
214
+ "Requirement already satisfied: debugpy>=1.0 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7) (1.6.3)\n",
215
+ "Requirement already satisfied: pyzmq>=17 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7) (23.2.1)\n",
216
+ "Requirement already satisfied: appnope in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7) (0.1.3)\n",
217
+ "Requirement already satisfied: tornado>=6.1 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7) (6.2)\n",
218
+ "Requirement already satisfied: packaging in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7) (21.3)\n",
219
+ "Requirement already satisfied: pexpect>4.3 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7) (4.8.0)\n",
220
+ "Requirement already satisfied: pickleshare in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7) (0.7.5)\n",
221
+ "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7) (3.0.30)\n",
222
+ "Requirement already satisfied: decorator in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7) (5.1.1)\n",
223
+ "Requirement already satisfied: jedi>=0.16 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7) (0.18.1)\n",
224
+ "Requirement already satisfied: setuptools>=18.5 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7) (65.3.0)\n",
225
+ "Requirement already satisfied: stack-data in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7) (0.4.0)\n",
226
+ "Requirement already satisfied: pygments>=2.4.0 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7) (2.13.0)\n",
227
+ "Requirement already satisfied: backcall in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7) (0.2.0)\n",
228
+ "Collecting notebook>=4.4.1\n",
229
+ " Downloading notebook-6.4.12-py3-none-any.whl (9.9 MB)\n",
230
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.9/9.9 MB\u001b[0m \u001b[31m31.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
231
+ "\u001b[?25hRequirement already satisfied: parso<0.9.0,>=0.8.0 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets<8,>=7) (0.8.3)\n",
232
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets<8,>=7) (2.8.2)\n",
233
+ "Requirement already satisfied: entrypoints in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets<8,>=7) (0.4)\n",
234
+ "Requirement already satisfied: jupyter-core>=4.9.2 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets<8,>=7) (4.11.1)\n",
235
+ "Collecting terminado>=0.8.3\n",
236
+ " Downloading terminado-0.15.0-py3-none-any.whl (16 kB)\n",
237
+ "Collecting nbformat\n",
238
+ " Downloading nbformat-5.4.0-py3-none-any.whl (73 kB)\n",
239
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.3/73.3 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
240
+ "\u001b[?25hCollecting jinja2\n",
241
+ " Downloading Jinja2-3.1.2-py3-none-any.whl (133 kB)\n",
242
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m133.1/133.1 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
243
+ "\u001b[?25hCollecting prometheus-client\n",
244
+ " Downloading prometheus_client-0.14.1-py3-none-any.whl (59 kB)\n",
245
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
246
+ "\u001b[?25hCollecting nbconvert>=5\n",
247
+ " Downloading nbconvert-7.0.0-py3-none-any.whl (271 kB)\n",
248
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m271.3/271.3 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
249
+ "\u001b[?25hCollecting argon2-cffi\n",
250
+ " Downloading argon2_cffi-21.3.0-py3-none-any.whl (14 kB)\n",
251
+ "Collecting Send2Trash>=1.8.0\n",
252
+ " Downloading Send2Trash-1.8.0-py3-none-any.whl (18 kB)\n",
253
+ "Requirement already satisfied: ptyprocess>=0.5 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets<8,>=7) (0.7.0)\n",
254
+ "Requirement already satisfied: wcwidth in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0->ipywidgets<8,>=7) (0.2.5)\n",
255
+ "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from packaging->ipykernel>=4.5.1->ipywidgets<8,>=7) (3.0.9)\n",
256
+ "Requirement already satisfied: executing in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from stack-data->ipython>=4.0.0->ipywidgets<8,>=7) (0.10.0)\n",
257
+ "Requirement already satisfied: asttokens in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from stack-data->ipython>=4.0.0->ipywidgets<8,>=7) (2.0.8)\n",
258
+ "Requirement already satisfied: pure-eval in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from stack-data->ipython>=4.0.0->ipywidgets<8,>=7) (0.2.2)\n",
259
+ "Collecting mistune<3,>=2.0.3\n",
260
+ " Downloading mistune-2.0.4-py2.py3-none-any.whl (24 kB)\n",
261
+ "Collecting beautifulsoup4\n",
262
+ " Downloading beautifulsoup4-4.11.1-py3-none-any.whl (128 kB)\n",
263
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m128.2/128.2 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
264
+ "\u001b[?25hCollecting tinycss2\n",
265
+ " Downloading tinycss2-1.1.1-py3-none-any.whl (21 kB)\n",
266
+ "Collecting defusedxml\n",
267
+ " Downloading defusedxml-0.7.1-py2.py3-none-any.whl (25 kB)\n",
268
+ "Collecting markupsafe>=2.0\n",
269
+ " Downloading MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl (17 kB)\n",
270
+ "Collecting bleach\n",
271
+ " Downloading bleach-5.0.1-py3-none-any.whl (160 kB)\n",
272
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m160.9/160.9 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
273
+ "\u001b[?25hCollecting nbclient>=0.5.0\n",
274
+ " Downloading nbclient-0.6.7-py3-none-any.whl (71 kB)\n",
275
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.8/71.8 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
276
+ "\u001b[?25hCollecting pandocfilters>=1.4.1\n",
277
+ " Downloading pandocfilters-1.5.0-py2.py3-none-any.whl (8.7 kB)\n",
278
+ "Collecting jupyterlab-pygments\n",
279
+ " Downloading jupyterlab_pygments-0.2.2-py2.py3-none-any.whl (21 kB)\n",
280
+ "Collecting lxml\n",
281
+ " Downloading lxml-4.9.1.tar.gz (3.4 MB)\n",
282
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m27.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
283
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
284
+ "\u001b[?25hCollecting fastjsonschema\n",
285
+ " Downloading fastjsonschema-2.16.1-py3-none-any.whl (22 kB)\n",
286
+ "Collecting jsonschema>=2.6\n",
287
+ " Downloading jsonschema-4.14.0-py3-none-any.whl (82 kB)\n",
288
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━\u001b[0m \u001b[32m82.4/82.4 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
289
+ "\u001b[?25hRequirement already satisfied: six>=1.5 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from python-dateutil>=2.8.2->jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets<8,>=7) (1.16.0)\n",
290
+ "Collecting argon2-cffi-bindings\n",
291
+ " Downloading argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl (53 kB)\n",
292
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
293
+ "\u001b[?25hCollecting attrs>=17.4.0\n",
294
+ " Downloading attrs-22.1.0-py2.py3-none-any.whl (58 kB)\n",
295
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.8/58.8 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
296
+ "\u001b[?25hCollecting pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0\n",
297
+ " Downloading pyrsistent-0.18.1-cp310-cp310-macosx_10_9_universal2.whl (81 kB)\n",
298
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
299
+ "\u001b[?25hRequirement already satisfied: cffi>=1.0.1 in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7) (1.15.1)\n",
300
+ "Collecting soupsieve>1.2\n",
301
+ " Downloading soupsieve-2.3.2.post1-py3-none-any.whl (37 kB)\n",
302
+ "Collecting webencodings\n",
303
+ " Downloading webencodings-0.5.1-py2.py3-none-any.whl (11 kB)\n",
304
+ "Requirement already satisfied: pycparser in /Users/johnnydevriese/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7) (2.21)\n",
305
+ "Building wheels for collected packages: lxml\n",
306
+ " Building wheel for lxml (setup.py) ... \u001b[?25ldone\n",
307
+ "\u001b[?25h Created wheel for lxml: filename=lxml-4.9.1-cp310-cp310-macosx_11_0_arm64.whl size=1423116 sha256=1e181d1aacf06a988fc9ff742b400e46f8f773abdef087dab0ac94e2435f8e60\n",
308
+ " Stored in directory: /Users/johnnydevriese/Library/Caches/pip/wheels/a4/ec/7b/8acde6da24b5aabeee049213d5bec12d1e9214d3cae276387b\n",
309
+ "Successfully built lxml\n",
310
+ "Installing collected packages: webencodings, Send2Trash, mistune, ipython-genutils, fastjsonschema, tinycss2, terminado, soupsieve, pyrsistent, prometheus-client, pandocfilters, markupsafe, lxml, jupyterlab-widgets, jupyterlab-pygments, defusedxml, bleach, attrs, jsonschema, jinja2, beautifulsoup4, argon2-cffi-bindings, nbformat, argon2-cffi, nbclient, nbconvert, notebook, widgetsnbextension, ipywidgets\n",
311
+ "Successfully installed Send2Trash-1.8.0 argon2-cffi-21.3.0 argon2-cffi-bindings-21.2.0 attrs-22.1.0 beautifulsoup4-4.11.1 bleach-5.0.1 defusedxml-0.7.1 fastjsonschema-2.16.1 ipython-genutils-0.2.0 ipywidgets-7.7.2 jinja2-3.1.2 jsonschema-4.14.0 jupyterlab-pygments-0.2.2 jupyterlab-widgets-1.1.1 lxml-4.9.1 markupsafe-2.1.1 mistune-2.0.4 nbclient-0.6.7 nbconvert-7.0.0 nbformat-5.4.0 notebook-6.4.12 pandocfilters-1.5.0 prometheus-client-0.14.1 pyrsistent-0.18.1 soupsieve-2.3.2.post1 terminado-0.15.0 tinycss2-1.1.1 webencodings-0.5.1 widgetsnbextension-3.6.1\n"
312
+ ]
313
+ }
314
+ ],
315
+ "source": [
316
+ "!pip install diffusers==0.2.4\n",
317
+ "!pip install transformers scipy ftfy\n",
318
+ "!pip install \"ipywidgets>=7,<8\""
319
+ ]
320
+ },
321
+ {
322
+ "cell_type": "code",
323
+ "execution_count": 2,
324
+ "metadata": {},
325
+ "outputs": [
326
+ {
327
+ "data": {
328
+ "application/vnd.jupyter.widget-view+json": {
329
+ "model_id": "8f312208bf4744df84d15d15e956afb2",
330
+ "version_major": 2,
331
+ "version_minor": 0
332
+ },
333
+ "text/plain": [
334
+ "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
335
+ ]
336
+ },
337
+ "metadata": {},
338
+ "output_type": "display_data"
339
+ }
340
+ ],
341
+ "source": [
342
+ "from huggingface_hub import notebook_login\n",
343
+ "\n",
344
+ "notebook_login()"
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "code",
349
+ "execution_count": 6,
350
+ "metadata": {},
351
+ "outputs": [
352
+ {
353
+ "name": "stdout",
354
+ "output_type": "stream",
355
+ "text": [
356
+ "GREP_COLOR=1;33\n",
357
+ "MANPATH=/opt/homebrew/share/man::\n",
358
+ "SHELL=/bin/zsh\n",
359
+ "HOMEBREW_REPOSITORY=/opt/homebrew\n",
360
+ "TMPDIR=/var/folders/4k/y4ljh2217c57vl68z1zkl0440000gn/T/\n",
361
+ "CONDA_SHLVL=2\n",
362
+ "PYTHONUNBUFFERED=1\n",
363
+ "CONDA_PROMPT_MODIFIER=(pytorch-1-12) \n",
364
+ "ORIGINAL_XDG_CURRENT_DESKTOP=undefined\n",
365
+ "MallocNanoZone=0\n",
366
+ "ZSH=/Users/johnnydevriese/.oh-my-zsh\n",
367
+ "PYTHONIOENCODING=utf-8\n",
368
+ "USER=johnnydevriese\n",
369
+ "LS_COLORS=di=34;40:ln=35;40:so=32;40:pi=33;40:ex=31;40:bd=34;46:cd=34;43:su=0;41:sg=0;46:tw=0;42:ow=0;43:\n",
370
+ "COMMAND_MODE=unix2003\n",
371
+ "CONDA_EXE=/Users/johnnydevriese/miniforge3/bin/conda\n",
372
+ "SSH_AUTH_SOCK=/private/tmp/com.apple.launchd.QcecMP1ruy/Listeners\n",
373
+ "__CF_USER_TEXT_ENCODING=0x1F5:0x0:0x0\n",
374
+ "PAGER=cat\n",
375
+ "VSCODE_AMD_ENTRYPOINT=vs/workbench/api/node/extensionHostProcess\n",
376
+ "ELECTRON_RUN_AS_NODE=1\n",
377
+ "_CE_CONDA=\n",
378
+ "LSCOLORS=exfxcxdxbxegedabagacad\n",
379
+ "CONDA_PREFIX_1=/Users/johnnydevriese/miniforge3\n",
380
+ "PATH=/Users/johnnydevriese/miniforge3/envs/pytorch-1-12/bin:/Users/johnnydevriese/miniforge3/envs/pytorch-1-12/bin:/Users/johnnydevriese/miniforge3/condabin:/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Applications/VMware Fusion Tech Preview.app/Contents/Public\n",
381
+ "LaunchInstanceID=82C5D7F6-FDB0-43D6-88D3-5FBAA6CD505A\n",
382
+ "CONDA_PREFIX=/Users/johnnydevriese/miniforge3/envs/pytorch-1-12\n",
383
+ "__CFBundleIdentifier=com.microsoft.VSCode\n",
384
+ "PWD=/Users/johnnydevriese/projects/machine_learning/code/diffusers\n",
385
+ "VSCODE_HANDLES_UNCAUGHT_ERRORS=true\n",
386
+ "XPC_FLAGS=0x0\n",
387
+ "_CE_M=\n",
388
+ "XPC_SERVICE_NAME=0\n",
389
+ "SHLVL=2\n",
390
+ "HOME=/Users/johnnydevriese\n",
391
+ "VSCODE_NLS_CONFIG={\"locale\":\"en-us\",\"availableLanguages\":{},\"_languagePackSupport\":true}\n",
392
+ "HOMEBREW_PREFIX=/opt/homebrew\n",
393
+ "CONDA_PYTHON_EXE=/Users/johnnydevriese/miniforge3/bin/python\n",
394
+ "LESS=-R\n",
395
+ "LOGNAME=johnnydevriese\n",
396
+ "VSCODE_IPC_HOOK=/Users/johnnydevriese/Library/Application Support/Code/1.70.2-main.sock\n",
397
+ "VSCODE_CODE_CACHE_PATH=/Users/johnnydevriese/Library/Application Support/Code/CachedData/e4503b30fc78200f846c62cf8091b76ff5547662\n",
398
+ "CONDA_DEFAULT_ENV=pytorch-1-12\n",
399
+ "VSCODE_PID=583\n",
400
+ "INFOPATH=/opt/homebrew/share/info:\n",
401
+ "HOMEBREW_CELLAR=/opt/homebrew/Cellar\n",
402
+ "VSCODE_CWD=/\n",
403
+ "SECURITYSESSIONID=186b3\n",
404
+ "LC_CTYPE=UTF-8\n",
405
+ "PYTHONPATH=/Users/johnnydevriese/.vscode/extensions/ms-toolsai.jupyter-2022.7.1102252217/pythonFiles:/Users/johnnydevriese/.vscode/extensions/ms-toolsai.jupyter-2022.7.1102252217/pythonFiles/lib/python\n",
406
+ "JUPYTER_PATH=/Users/johnnydevriese/.vscode/extensions/ms-toolsai.jupyter-2022.7.1102252217/temp/jupyter\n",
407
+ "PYDEVD_USE_FRAME_EVAL=NO\n",
408
+ "OLDPWD=/Users/johnnydevriese/projects/machine_learning/code/diffusers\n",
409
+ "CONDA_ROOT=/Users/johnnydevriese/miniforge3\n",
410
+ "JPY_PARENT_PID=7629\n",
411
+ "TERM=xterm-color\n",
412
+ "CLICOLOR=1\n",
413
+ "GIT_PAGER=cat\n",
414
+ "MPLBACKEND=module://matplotlib_inline.backend_inline\n",
415
+ "_=/usr/bin/env\n",
416
+ "PYTORCH_ENABLE_MPS_FALLBACK=1\n"
417
+ ]
418
+ }
419
+ ],
420
+ "source": [
421
+ "! env PYTORCH_ENABLE_MPS_FALLBACK=1"
422
+ ]
423
+ },
424
+ {
425
+ "cell_type": "code",
426
+ "execution_count": 7,
427
+ "metadata": {},
428
+ "outputs": [
429
+ {
430
+ "name": "stderr",
431
+ "output_type": "stream",
432
+ "text": [
433
+ "ftfy or spacy is not installed using BERT BasicTokenizer instead of ftfy.\n"
434
+ ]
435
+ },
436
+ {
437
+ "ename": "NotImplementedError",
438
+ "evalue": "The operator 'aten::index.Tensor' is not current implemented for the MPS device. If you want this op to be added in priority during the prototype phase of this feature, please comment on https://github.com/pytorch/pytorch/issues/77764. As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.",
439
+ "output_type": "error",
440
+ "traceback": [
441
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
442
+ "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)",
443
+ "\u001b[1;32m/Users/johnnydevriese/projects/machine_learning/code/diffusers/hf_diffusers.ipynb Cell 5\u001b[0m in \u001b[0;36m<cell line: 21>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/machine_learning/code/diffusers/hf_diffusers.ipynb#W1sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m prompt \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39ma photo of an astronaut riding a horse on mars\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/machine_learning/code/diffusers/hf_diffusers.ipynb#W1sZmlsZQ%3D%3D?line=19'>20</a>\u001b[0m \u001b[39m# with autocast(\"mps\"):\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/machine_learning/code/diffusers/hf_diffusers.ipynb#W1sZmlsZQ%3D%3D?line=20'>21</a>\u001b[0m image \u001b[39m=\u001b[39m pipe(prompt)[\u001b[39m\"\u001b[39m\u001b[39msample\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m0\u001b[39m] \n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/machine_learning/code/diffusers/hf_diffusers.ipynb#W1sZmlsZQ%3D%3D?line=22'>23</a>\u001b[0m image\u001b[39m.\u001b[39msave(\u001b[39m\"\u001b[39m\u001b[39mastronaut_rides_horse.png\u001b[39m\u001b[39m\"\u001b[39m)\n",
444
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages/torch/autograd/grad_mode.py:27\u001b[0m, in \u001b[0;36m_DecoratorContextManager.__call__.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[39m@functools\u001b[39m\u001b[39m.\u001b[39mwraps(func)\n\u001b[1;32m 25\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdecorate_context\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 26\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclone():\n\u001b[0;32m---> 27\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
445
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py:82\u001b[0m, in \u001b[0;36mStableDiffusionPipeline.__call__\u001b[0;34m(self, prompt, height, width, num_inference_steps, guidance_scale, eta, generator, output_type, **kwargs)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[39m# get prompt text embeddings\u001b[39;00m\n\u001b[1;32m 75\u001b[0m text_input \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtokenizer(\n\u001b[1;32m 76\u001b[0m prompt,\n\u001b[1;32m 77\u001b[0m padding\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mmax_length\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 80\u001b[0m return_tensors\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mpt\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 81\u001b[0m )\n\u001b[0;32m---> 82\u001b[0m text_embeddings \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtext_encoder(text_input\u001b[39m.\u001b[39;49minput_ids\u001b[39m.\u001b[39;49mto(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdevice))[\u001b[39m0\u001b[39m]\n\u001b[1;32m 84\u001b[0m \u001b[39m# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)\u001b[39;00m\n\u001b[1;32m 85\u001b[0m \u001b[39m# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`\u001b[39;00m\n\u001b[1;32m 86\u001b[0m \u001b[39m# corresponds to doing no classifier free guidance.\u001b[39;00m\n\u001b[1;32m 87\u001b[0m do_classifier_free_guidance \u001b[39m=\u001b[39m guidance_scale \u001b[39m>\u001b[39m \u001b[39m1.0\u001b[39m\n",
446
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages/torch/nn/modules/module.py:1130\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1126\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1127\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1128\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49m\u001b[39minput\u001b[39;49m, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1131\u001b[0m \u001b[39m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1132\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[39m=\u001b[39m [], []\n",
447
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py:721\u001b[0m, in \u001b[0;36mCLIPTextModel.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 693\u001b[0m \u001b[39m@add_start_docstrings_to_model_forward\u001b[39m(CLIP_TEXT_INPUTS_DOCSTRING)\n\u001b[1;32m 694\u001b[0m \u001b[39m@replace_return_docstrings\u001b[39m(output_type\u001b[39m=\u001b[39mBaseModelOutputWithPooling, config_class\u001b[39m=\u001b[39mCLIPTextConfig)\n\u001b[1;32m 695\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mforward\u001b[39m(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 702\u001b[0m return_dict: Optional[\u001b[39mbool\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 703\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Union[Tuple, BaseModelOutputWithPooling]:\n\u001b[1;32m 704\u001b[0m \u001b[39mr\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 705\u001b[0m \u001b[39m Returns:\u001b[39;00m\n\u001b[1;32m 706\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 719\u001b[0m \u001b[39m >>> pooled_output = outputs.pooler_output # pooled (EOS token) states\u001b[39;00m\n\u001b[1;32m 720\u001b[0m \u001b[39m ```\"\"\"\u001b[39;00m\n\u001b[0;32m--> 721\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtext_model(\n\u001b[1;32m 722\u001b[0m input_ids\u001b[39m=\u001b[39;49minput_ids,\n\u001b[1;32m 723\u001b[0m attention_mask\u001b[39m=\u001b[39;49mattention_mask,\n\u001b[1;32m 724\u001b[0m position_ids\u001b[39m=\u001b[39;49mposition_ids,\n\u001b[1;32m 725\u001b[0m output_attentions\u001b[39m=\u001b[39;49moutput_attentions,\n\u001b[1;32m 726\u001b[0m output_hidden_states\u001b[39m=\u001b[39;49moutput_hidden_states,\n\u001b[1;32m 727\u001b[0m return_dict\u001b[39m=\u001b[39;49mreturn_dict,\n\u001b[1;32m 728\u001b[0m )\n",
448
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages/torch/nn/modules/module.py:1130\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1126\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1127\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1128\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49m\u001b[39minput\u001b[39;49m, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1131\u001b[0m \u001b[39m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1132\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[39m=\u001b[39m [], []\n",
449
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-1-12/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py:656\u001b[0m, in \u001b[0;36mCLIPTextTransformer.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 652\u001b[0m last_hidden_state \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfinal_layer_norm(last_hidden_state)\n\u001b[1;32m 654\u001b[0m \u001b[39m# text_embeds.shape = [batch_size, sequence_length, transformer.width]\u001b[39;00m\n\u001b[1;32m 655\u001b[0m \u001b[39m# take features from the eot embedding (eot_token is the highest number in each sequence)\u001b[39;00m\n\u001b[0;32m--> 656\u001b[0m pooled_output \u001b[39m=\u001b[39m last_hidden_state[torch\u001b[39m.\u001b[39;49marange(last_hidden_state\u001b[39m.\u001b[39;49mshape[\u001b[39m0\u001b[39;49m]), input_ids\u001b[39m.\u001b[39;49margmax(dim\u001b[39m=\u001b[39;49m\u001b[39m-\u001b[39;49m\u001b[39m1\u001b[39;49m)]\n\u001b[1;32m 658\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m return_dict:\n\u001b[1;32m 659\u001b[0m \u001b[39mreturn\u001b[39;00m (last_hidden_state, pooled_output) \u001b[39m+\u001b[39m encoder_outputs[\u001b[39m1\u001b[39m:]\n",
450
+ "\u001b[0;31mNotImplementedError\u001b[0m: The operator 'aten::index.Tensor' is not current implemented for the MPS device. If you want this op to be added in priority during the prototype phase of this feature, please comment on https://github.com/pytorch/pytorch/issues/77764. As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS."
451
+ ]
452
+ }
453
+ ],
454
+ "source": [
455
+ "# make sure you're logged in with `huggingface-cli login`\n",
456
+ "from torch import autocast\n",
457
+ "from diffusers import StableDiffusionPipeline, LMSDiscreteScheduler\n",
458
+ "\n",
459
+ "lms = LMSDiscreteScheduler(\n",
460
+ " beta_start=0.00085, \n",
461
+ " beta_end=0.012, \n",
462
+ " beta_schedule=\"scaled_linear\"\n",
463
+ ")\n",
464
+ "\n",
465
+ "pipe = StableDiffusionPipeline.from_pretrained(\n",
466
+ " \"CompVis/stable-diffusion-v1-3\", \n",
467
+ " scheduler=lms,\n",
468
+ " torch_dtype=torch.float16, \n",
469
+ " revision=\"fp16\",\n",
470
+ " use_auth_token=True\n",
471
+ ").to(\"mps\")\n",
472
+ "\n",
473
+ "prompt = \"a photo of an astronaut riding a horse on mars\"\n",
474
+ "# with autocast(\"mps\"):\n",
475
+ "image = pipe(prompt)[\"sample\"][0] \n",
476
+ " \n",
477
+ "image.save(\"astronaut_rides_horse.png\")"
478
+ ]
479
+ }
480
+ ],
481
+ "metadata": {
482
+ "kernelspec": {
483
+ "display_name": "Python 3.10.6 ('pytorch-1-12')",
484
+ "language": "python",
485
+ "name": "python3"
486
+ },
487
+ "language_info": {
488
+ "codemirror_mode": {
489
+ "name": "ipython",
490
+ "version": 3
491
+ },
492
+ "file_extension": ".py",
493
+ "mimetype": "text/x-python",
494
+ "name": "python",
495
+ "nbconvert_exporter": "python",
496
+ "pygments_lexer": "ipython3",
497
+ "version": "3.10.6"
498
+ },
499
+ "orig_nbformat": 4,
500
+ "vscode": {
501
+ "interpreter": {
502
+ "hash": "d91b751b6cafe1e473109edab0583e459c2e471c181546b21e3fef0fb0f3aa3b"
503
+ }
504
+ }
505
+ },
506
+ "nbformat": 4,
507
+ "nbformat_minor": 2
508
+ }
code/eda_pandas.html ADDED
The diff for this file is too large to render. See raw diff
 
code/eda_pandas.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
code/eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.9774436090225563,
4
+ "eval_loss": 0.10330713540315628,
5
+ "eval_runtime": 10.9904,
6
+ "eval_samples_per_second": 12.101,
7
+ "eval_steps_per_second": 1.547
8
+ }
code/fin_hackerrank.ipynb ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "foo = [1, 2, 3, 3]\n",
10
+ "\n",
11
+ "for idx, value in enumerate(foo):\n",
12
+ " print(\"value\", value)\n",
13
+ " print(\"foo\", foo[:idx])\n",
14
+ " print(\"index\", idx)\n",
15
+ " print(\"sum\", sum(foo[:idx+1]))\n",
16
+ "\n",
17
+ "\n",
18
+ "sum(foo[:1])\n",
19
+ "\n",
20
+ "#!/bin/python3\n",
21
+ "\n",
22
+ "import math\n",
23
+ "import os\n",
24
+ "import random\n",
25
+ "import re\n",
26
+ "import sys\n",
27
+ "\n",
28
+ "\n",
29
+ "\n",
30
+ "#\n",
31
+ "# Complete the 'balancedSum' function below.\n",
32
+ "#\n",
33
+ "# The function is expected to return an INTEGER.\n",
34
+ "# The function accepts INTEGER_ARRAY arr as parameter.\n",
35
+ "#\n",
36
+ "\n",
37
+ "def balancedSum(arr):\n",
38
+ " # Write your code here\n",
39
+ " left_sum = 0 \n",
40
+ " right_sum = 0\n",
41
+ " for idx, value in enumerate(arr):\n",
42
+ " # print(idx, value)\n",
43
+ " # left_sum += value\n",
44
+ " # print(\"this is left_sum\", left_sum)\n",
45
+ " # print(\"this is the sum\", sum(arr[idx+2::]))\n",
46
+ " # print(sum(arr[:idx+1]) == sum(arr[idx+2::]))\n",
47
+ " # this way isn't fast enough. I suppose you could also just keep a running total \n",
48
+ " if sum(arr[:idx+1]) == sum(arr[idx+2::]):\n",
49
+ " return idx+1\n",
50
+ " # if left_sum == sum(arr[idx+2::]):\n",
51
+ " # print(\"success\")\n",
52
+ " # return idx+1 \n",
53
+ " \n",
54
+ " \n",
55
+ "\n",
56
+ "if __name__ == '__main__':\n",
57
+ " fptr = open(os.environ['OUTPUT_PATH'], 'w')\n",
58
+ "\n",
59
+ " arr_count = int(input().strip())\n",
60
+ "\n",
61
+ " arr = []\n",
62
+ "\n",
63
+ " for _ in range(arr_count):\n",
64
+ " arr_item = int(input().strip())\n",
65
+ " arr.append(arr_item)\n",
66
+ "\n",
67
+ " result = balancedSum(arr)\n",
68
+ "\n",
69
+ " fptr.write(str(result) + '\\n')\n",
70
+ "\n",
71
+ " fptr.close()\n"
72
+ ]
73
+ },
74
+ {
75
+ "cell_type": "code",
76
+ "execution_count": null,
77
+ "metadata": {},
78
+ "outputs": [],
79
+ "source": [
80
+ "arr = [1, 2, 3, 3]\n",
81
+ "left_sum = 0 \n",
82
+ "right_sum = 0 \n",
83
+ "\n",
84
+ "for idx, value in enumerate(arr):\n",
85
+ " if idx == 0: \n",
86
+ " # initialize \n",
87
+ " right_sum = sum(arr[idx+2::])\n",
88
+ "\n",
89
+ " left_sum += value\n",
90
+ " right_sum -= arr[idx+1]\n",
91
+ "\n",
92
+ " # compare\n",
93
+ "\n",
94
+ " if left_sum == right_sum:\n",
95
+ " print(\"success\")\n",
96
+ " print(idx)\n",
97
+ "\n",
98
+ "\n",
99
+ " \n",
100
+ " # print(\"value\", value)\n",
101
+ " # print(\"foo\", foo[:idx])\n",
102
+ " # print(\"index\", idx)\n",
103
+ " # print(\"sum\", sum(foo[:idx+1]))\n"
104
+ ]
105
+ }
106
+ ],
107
+ "metadata": {
108
+ "language_info": {
109
+ "name": "python"
110
+ },
111
+ "orig_nbformat": 4
112
+ },
113
+ "nbformat": 4,
114
+ "nbformat_minor": 2
115
+ }
code/fraud_detection.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
code/gcp_ml_engineer_questions.md ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### 1. You work for a textile manufacturer and have been asked to build a model to detect and classify fabric defects. You trained a machine learning model with high recall based on high resolution images taken at the end of the production line. You want quality control inspectors to gain trust in your model. Which technique should you use to understand the rationale of your classifier?
2
+
3
+
4
+ A. Use K-fold cross validation to understand how the model performs on different test datasets.
5
+
6
+ B. Use the Integrated Gradients method to efficiently compute feature attributions for each predicted image.
7
+
8
+ C. Use PCA (Principal Component Analysis) to reduce the original feature set to a smaller set of easily understood features.
9
+
10
+ D. Use k-means clustering to group similar images together, and calculate the Davies-Bouldin index to evaluate the separation between clusters.
11
+
12
+ <details>
13
+ <summary>Answer</summary>
14
+
15
+ A is not correct because K-fold cross validation offers no explanation on the predictions made by the model.
16
+ B is correct because it identifies the pixel of the input image that leads to the classification of the image itself.
17
+ C is not correct because PCA simplifies higher dimensional datasets but offers no added benefit to the scenario.
18
+ D is not correct because clustering images does not provide any insight into why the classification model made the predictions that it did.
19
+ </details>
20
+
21
+
22
+ ### 2. You need to write a generic test to verify whether Dense Neural Network (DNN) models automatically released by your team have a sufficient number of parameters to learn the task for which they were built. What should you do?
23
+
24
+
25
+ A. Train the model for a few iterations, and check for NaN values.
26
+
27
+ B. Train the model for a few iterations, and verify that the loss is constant.
28
+
29
+ C. Train a simple linear model, and determine if the DNN model outperforms it.
30
+
31
+ D. Train the model with no regularization, and verify that the loss function is close to zero.
32
+
33
+ <details>
34
+ <summary>Answer</summary>
35
+
36
+ A is not correct because the test does not check that the model has enough parameters to learn the task.
37
+ B is not correct because the loss should decrease if you have enough parameters to learn the task.
38
+ C is not correct because outperforming the linear model does not guarantee that the model has enough parameters to learn tasks with non-linear data representations. The option also doesn’t quantify a metric to give an indication of how well the model performed.
39
+ D is correct because the test can check that the model has enough parameters to memorize the task.
40
+ </details>
41
+
42
+
43
+ ### 3. Your team is using a TensorFlow Inception-v3 CNN model pretrained on ImageNet for an image classification prediction challenge on 10,000 images. You will use AI Platform to perform the model training. What TensorFlow distribution strategy and AI Platform training job configuration should you use to train the model and optimize for wall-clock time?
44
+
45
+
46
+ A. Default Strategy; Custom tier with a single master node and four v100 GPUs.
47
+
48
+ B. One Device Strategy; Custom tier with a single master node and four v100 GPUs.
49
+
50
+ C. One Device Strategy; Custom tier with a single master node and eight v100 GPUs.
51
+
52
+ D. MirroredStrategy; Custom tier with a single master node and four v100 GPUs.
53
+
54
+
55
+ <details>
56
+ <summary>Answer</summary>
57
+
58
+ A is not correct because Default Strategy does not distribute training across multiple devices.
59
+ B is not correct because One Device Strategy does not distribute training across multiple devices.
60
+ C is not correct because One Device Strategy does not distribute training across multiple devices.
61
+ D is correct because this is the only strategy that can perform distributed training; albeit there is only a single copy of the variables on the CPU host.
62
+ </details>
63
+
64
+
65
+ ### 4. You work on a team where the process for deploying a model into production starts with data scientists training different versions of models in a Kubeflow pipeline. The workflow then stores the new model artifact into the corresponding Cloud Storage bucket. You need to build the next steps of the pipeline after the submitted model is ready to be tested and deployed in production on AI Platform. How should you configure the architecture before deploying the model to production?
66
+
67
+
68
+ A. Deploy model in test environment -> Evaluate and test model -> Create a new AI Platform model version
69
+
70
+ B. Validate model -> Deploy model in test environment -> Create a new AI Platform model version
71
+
72
+ C. Create a new AI Platform model version -> Evaluate and test model -> Deploy model in test environment
73
+
74
+ D. Create a new AI Platform model version - > Deploy model in test environment -> Validate model
75
+
76
+
77
+ <details>
78
+ <summary>Answer</summary>
79
+ A is correct because the model can be validated after it is deployed to the test environment, and the release version is established before the model is deployed in production.
80
+ B is not correct because the model cannot be validated before being deployed to the test environment.
81
+ C is not correct because the model version is being set up for the release candidate before the model is validated. Moreover, the model cannot be validated before being deployed to the test environment.
82
+ D is not correct because the model version is being set up for the release candidate before the model is validated.
83
+ </details>
84
+
85
+
86
+ ### 5. You work for a maintenance company and have built and trained a deep learning model that identifies defects based on thermal images of underground electric cables. Your dataset contains 10,000 images, 100 of which contain visible defects. How should you evaluate the performance of the model on a test dataset?
87
+
88
+
89
+ A. Calculate the Area Under the Curve (AUC) value.
90
+
91
+ B. Calculate the number of true positive results predicted by the model.
92
+
93
+ C. Calculate the fraction of images predicted by the model to have a visible defect.
94
+
95
+ D. Calculate the Cosine Similarity to compare the model’s performance on the test dataset to the model’s performance on the training dataset.
96
+
97
+
98
+ <details>
99
+ <summary>Answer</summary>
100
+
101
+ A is correct because it is scale-invariant. AUC measures how well predictions are ranked, rather than their absolute values. AUC is also classification-threshold invariant. It measures the quality of the model's predictions irrespective of what classification threshold is chosen.
102
+ B is incorrect because calculating the number of true positives without considering false positives can lead to misleading results. For instance, the model could classify nearly every image as a defect. This would result in many true positives, but the model would in fact be a very poor discriminator.
103
+ C is incorrect because merely calculating the fraction of images that contain defects doesn’t indicate whether your model is accurate or not.
104
+ D is incorrect because this metric is more commonly used in distance-based models (e.g., K Nearest Neighbors). This isn’t an appropriate metric for checking the performance of an image classification model.
105
+ </details>
106
+
107
+
108
+
109
+ ### 6. You work for a manufacturing company that owns a high-value machine which has several machine settings and multiple sensors. A history of the machine’s hourly sensor readings and known failure event data are stored in BigQuery. You need to predict if the machine will fail within the next 3 days in order to schedule maintenance before the machine fails. Which data preparation and model training steps should you take?
110
+
111
+
112
+ A. Data preparation: Daily max value feature engineering; Model training: AutoML classification with BQML
113
+
114
+ B. Data preparation: Daily min value feature engineering; Model training: Logistic regression with BQML and AUTO_CLASS_WEIGHTS set to True
115
+
116
+ C. Data preparation: Rolling average feature engineering; Model training: Logistic regression with BQML and AUTO_CLASS_WEIGHTS set to False
117
+
118
+ D. Data preparation: Rolling average feature engineering; Model training: Logistic regression with BQML and AUTO_CLASS_WEIGHTS set to True
119
+
120
+ <details>
121
+ <summary>Answer</summary>
122
+
123
+ A is not correct because a rolling average is a better feature engineering technique, as it will smooth out the noise and fluctuation in the data to demonstrate whether there is a trend. Using the max value could be an artifact of some noise and may not capture the trend accurately.
124
+ B is not correct because a rolling average is a better feature engineering technique, as it will smooth out the noise and fluctuation in the data to demonstrate whether there is a trend. Using the min value could be an artifact of some noise and may not capture the trend accurately.
125
+ C is not correct because the model training does not balance class labels for an imbalanced dataset.
126
+ D is correct because it uses the rolling average of the sensor data and balances the weights using the BQML auto class weight balance parameter.
127
+ </details>
128
+
129
+
130
+ ### 7. You are an ML engineer at a media company. You want to use machine learning to analyze video content, identify objects, and alert users if there is inappropriate content. Which Google Cloud products should you use to build this project?
131
+
132
+ A. Pub/Sub, Cloud Function, Cloud Vision API
133
+
134
+ B. Pub/Sub, Cloud IoT, Dataflow, Cloud Vision API, Cloud Logging
135
+
136
+ C. Pub/Sub, Cloud Function, Video Intelligence API, Cloud Logging
137
+
138
+ D. Pub/Sub, Cloud Function, AutoML Video Intelligence, Cloud Logging
139
+
140
+ <details>
141
+ <summary>Answer</summary>
142
+
143
+ A is not correct as there is no tool for alerting and notifying.
144
+ B is not correct as it uses Cloud Vision API for processing videos.
145
+ C is correct as Video Intelligence API can find inappropriate components and other components satisfy the requirements of real-time processing and notification. (https://cloud.google.com/video-intelligence)
146
+ D is not correct as AutoML Video intelligence should be only used in case of customization.
147
+ </details>
148
+
149
+ ### 8. You work for a large retailer. You want to use ML to forecast future sales leveraging 10 years of historical sales data. The historical data is stored in Cloud Storage in Avro format. You want to rapidly experiment with all the available data. How should you build and train your model for the sales forecast?
150
+
151
+
152
+ A. Load data into BigQuery and use the ARIMA model type on BigQuery ML.
153
+
154
+ B. Convert the data into CSV format and create a regression model on AutoML Tables.
155
+
156
+ C. Convert the data into TFRecords and create an RNN model on TensorFlow on AI Platform Notebooks.
157
+
158
+ D. Convert and refactor the data into CSV format and use the built-in XGBoost algorithm on AI Platform Training.
159
+
160
+ <details>
161
+ <summary>Answer</summary>
162
+
163
+ A is correct because BigQuery ML is designed for fast and rapid experimentation and it is possible to use federated queries to read data directly from Cloud Storage. Moreover, ARIMA is considered one of the best in class for time series forecasting.
164
+ B is not correct because AutoML Tables is not ideal for fast iteration and rapid experimentation. Even if it does not require data cleanup and hyperparameter tuning, it takes at least one hour to create a model.
165
+ C is not correct because in order to build a custom TensorFlow model, you would still need to do data cleanup and hyperparameter tuning.
166
+ D is not correct because using AI Platform Training requires preprocessing your data in a particular CSV structure and it is not ideal for fast iteration, as training times can take a long time because it cannot be distributed on multiple machines.
167
+ </details>
168
+
169
+
170
+
171
+ ### 9. You need to build an object detection model for a small startup company to identify if and where the company’s logo appears in an image. You were given a large repository of images, some with logos and some without. These images are not yet labelled. You need to label these pictures, and then train and deploy the model. What should you do?
172
+
173
+ A. Use Google Cloud’s Data Labelling Service to label your data. Use AutoML Object Detection to train and deploy the model.
174
+
175
+ B. Use Vision API to detect and identify logos in pictures and use it as a label. Use AI Platform to build and train a convolutional neural network.
176
+
177
+ C. Create two folders: one where the logo appears and one where it doesn’t. Manually place images in each folder. Use AI Platform to build and train a convolutional neural network.
178
+
179
+ D. Create two folders: one where the logo appears and one where it doesn’t. Manually place images in each folder. Use AI Platform to build and train a real time object detection model.
180
+
181
+ <details>
182
+ <summary>Answer</summary>
183
+
184
+ A is correct as this will allow you to easily create a request for a labelling task and deploy a high-performance model.
185
+ B is not correct because Vision API is not guaranteed to work with any company logos, and in the statement it explicitly mentions a small startup, which will further decrease the chance of success.
186
+ C is not correct because the task of manually labelling the data is time consuming and should be avoided if possible.
187
+ D is not correct because the task of labelling object detection data is very tedious, and real time object detection is designed detecting objects in videos rather than in images.
188
+ </details>
189
+
190
+ ### 10. You work for a large financial institution that is planning to use Dialogflow to create a chatbot for the company’s mobile app. You have reviewed old chat logs and tagged each conversation for intent based on each customer’s stated intention for contacting customer service. About 70% of customer inquiries are simple requests that are solved within 10 intents. The remaining 30% of inquiries require much longer and more complicated requests. Which intents should you automate first?
191
+
192
+ A. Automate a blend of the shortest and longest intents to be representative of all intents.
193
+
194
+ B. Automate the more complicated requests first because those require more of the agents’ time.
195
+
196
+ C. Automate the 10 intents that cover 70% of the requests so that live agents can handle the more complicated requests.
197
+
198
+ D. Automate intents in places where common words such as “payment” only appear once to avoid confusing the software.
199
+
200
+ <details>
201
+ <summary>Answer</summary>
202
+
203
+ A is incorrect because you should not automate the higher value requests.
204
+ B is incorrect because live agents are better suited to handle these complicated requests.
205
+ C is correct because it enables a machine to handle the most simple requests and gives the live agents more opportunity to handle higher value requests.
206
+ D is incorrect because Dialogflow can handle the same word in multiple intents.
207
+ </details>
208
+
209
+ ### 11. You work for a gaming company that develops and manages a popular massively multiplayer online (MMO) game. The game’s environment is open-ended, and a large number of positions and moves can be taken by a player. Your team has developed an ML model with TensorFlow that predicts the next move of each player. Edge deployment is not possible, but low-latency serving is required. How should you configure the deployment?
210
+
211
+ A. Use a Cloud TPU to optimize model training speed.
212
+
213
+ B. Use AI Platform Prediction with a NVIDIA GPU to make real-time predictions.
214
+
215
+ C. Use AI Platform Prediction with a high-CPU machine type to get a batch prediction for the players.
216
+
217
+ D. Use AI Platform Prediction with a high-memory machine type to get a batch prediction for the players.
218
+
219
+ <details>
220
+ <summary>Answer</summary>
221
+
222
+ A is not correct because changing the training will not improve the prediction latency.
223
+ B is correct because using a VM with a GPU and NVIDIA drivers enables you to use TensorRT. NVIDIA has developed TensorRT (an inference optimization library) for high-performance inference on GPUs. Google Cloud’s Deep Learning VMs are ideal for this case because they have everything you need pre-installed.
224
+ B is not correct because batch jobs do not satisfy the low-latency requirements for an online multiplayer game.
225
+ D is not correct because batch jobs do not satisfy the low-latency requirements for an online multiplayer game.
226
+ </details>
code/gcp_ml_engineer_tips.md ADDED
@@ -0,0 +1,626 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Tips
2
+
3
+ from https://towardsdatascience.com/how-i-passed-the-gcp-professional-ml-engineer-certification-47104f40bec5
4
+
5
+ General
6
+
7
+ Typical Big Data pipeline for streaming data:
8
+
9
+ Pub/Sub -> Dataflow -> BigQuery or Cloud Storage
10
+
11
+ Typical Big Data pipeline for batch data:
12
+
13
+ Pub/Sub -> Cloud Run or Cloud Functions -> Dataflow -> BigQuery or Cloud Storage
14
+
15
+ * Use the general use APIs by default (Vision, Video Intelligence, Natural Language…). Only use AutoML if you have custom needs (custom labels, etc.)
16
+ * To de-identify sensible data, you can redact, tokenize or hash, using BigQuery, Cloud Storage, Datastore, or Data Loss Protection (DLP)
17
+ * Difference between TensorBoard and TensorFlow Model Analysis: the former evaluates during training, based on mini-batches, while the latter evalutes after training, can be done in slices of data and is based on the full data
18
+ * AI Explanations: with tabular data, you can use Shapely or integrated ingredients for large feature spaces; with images, you can use integrated gradients for pixel-level explanations or XRAI for region-level explanations.
19
+ * When to use Kubeflow over TFX? When you need PyTorch, XGBoost or if you want to dockerize every step of the flow
20
+ * Keras: use the Sequential API by default. If you have multiple inputs or outputs, layer sharing or a non-linear topology, change to the Functional API, unless you have a RNN. If that is the case, Keras Subclasses instead
21
+ * 3 methods for optimizing TensorFlow pipelines: prefetch, interleave and cache
22
+
23
+
24
+ ### BigQuery ML
25
+
26
+ It supports the following types of model: linear regression, binary and multiclass logistic regression, k-means, matrix factorization, time series, boosted trees, deep neural networks, AutoML models and imported TensorFlow models
27
+ Use it for quick and easy models, prototyping etc.
28
+
29
+ ### Storage
30
+
31
+ Choosing storage for analytics:
32
+
33
+ Structured data: Bigtable for millisecond latency, BigQuery for latency in seconds
34
+ Unstructured: use Cloud Storage by default, and Firebase storage for mobile
35
+
36
+ ### Accelerators
37
+
38
+ Choosing between CPUs, TPUs and GPUs:
39
+
40
+ Use CPUs for quick prototypes, simple/small models or if you have many C++ custom operations; use GPU if you have some custom C++ operations and/or medium to large models; use TPUs for big matrix computations, no custom TensorFlow operations and/or very large models that train for weeks or months
41
+
42
+ To improve performance on TPUs: if data pre-processing is a bottleneck, do it offline as a one-time cost; choose the larges batch size that fits in memory; keep the per-core batch size the same
43
+
44
+ ### Neural networks
45
+
46
+ Common pitfalls in backpropagation and their solutions:
47
+
48
+ vanishing gradients -> use ReLu
49
+ exploding gradients -> use batch normalization
50
+ ReLu layers are dying -> lower learning rates
51
+
52
+ For multiclass classification, if:
53
+
54
+ labels and probabilities are mutually exclusive, use softmax_cross_entropy_with_logits_v2
55
+ labels are mutually exclusive, but not probabilities, use sparse_softmax_cross_entropy_with_logits
56
+ labels are not mutually exclusive, use sigmoid_cross_entropy_with_logits
57
+
58
+ # Learning Stuff
59
+
60
+
61
+ ### Labs
62
+
63
+ Recommending Products Using Cloud SQL and Spark
64
+ https://www.cloudskillsboost.google/course_sessions/554292/labs/102245
65
+
66
+
67
+
68
+ ```bash
69
+ echo "Authorizing Cloud Dataproc to connect with Cloud SQL"
70
+ CLUSTER=rentals
71
+ CLOUDSQL=rentals
72
+ ZONE=us-central1-f
73
+ NWORKERS=2
74
+ machines="$CLUSTER-m"
75
+ for w in `seq 0 $(($NWORKERS - 1))`; do
76
+ machines="$machines $CLUSTER-w-$w"
77
+ done
78
+ echo "Machines to authorize: $machines in $ZONE ... finding their IP addresses"
79
+ ips=""
80
+ for machine in $machines; do
81
+ IP_ADDRESS=$(gcloud compute instances describe $machine --zone=$ZONE --format='value(networkInterfaces.accessConfigs[].natIP)' | sed "s/\['//g" | sed "s/'\]//g" )/32
82
+ echo "IP address of $machine is $IP_ADDRESS"
83
+ if [ -z $ips ]; then
84
+ ips=$IP_ADDRESS
85
+ else
86
+ ips="$ips,$IP_ADDRESS"
87
+ fi
88
+ done
89
+ echo "Authorizing [$ips] to access cloudsql=$CLOUDSQL"
90
+ gcloud sql instances patch $CLOUDSQL --authorized-networks $ips
91
+ ```
92
+
93
+
94
+ ### Recommending Products Using Cloud SQL and Spark -- Module Test
95
+
96
+ 1. True or False: Cloud SQL is a big data analytics warehouse
97
+
98
+ Answer: False -- Correct - Cloud SQL is a transaction RDBMS or relational database management system. It is designed for many more WRITES than READS.Whereas BigQuery is a big data analytics warehouse which is optimized for reporting READS.
99
+
100
+ 2.
101
+ Cloud SQL and Cloud Dataproc offer familiar tools (MySQL and Hadoop/Pig/Hive/Spark). What is the value-add provided by Google Cloud Platform? (Select the 2 correct options below )
102
+
103
+
104
+ * Google-proprietary extensions and bug fixes to MySQL, Hadoop, and so on
105
+
106
+ * It’s the same API, but Google implements it better
107
+
108
+
109
+ * Fully-managed versions of the software offer no-ops
110
+ Yes. No-ops is the main value-add here.
111
+
112
+ * Running it on Google infrastructure offers reliability and cost savings
113
+ Yes. You pay only for the resources you use. Cloud SQL can be shut down when it’s not being used. Hadoop clusters can be of preemptible nodes, and so on.
114
+
115
+ 3. You are thinking about migrating your Hadoop workloads to the cloud and you have a few workloads that are fault-tolerant (they can handle interruptions of individual VMs gracefully). What are some architecture considerations you should explore in the cloud? Choose all that apply
116
+
117
+ * You are thinking about migrating your Hadoop workloads to the cloud and you have a few workloads that are fault-tolerant (they can handle interruptions of individual VMs gracefully). What are some architecture considerations you should explore in the cloud? Choose all that apply
118
+
119
+
120
+ * Migrate your storage from on-cluster HDFS to off-cluster Google Cloud Storage (GCS)
121
+ Correct!
122
+
123
+ * Use PVMs or Preemptible Virtual Machines
124
+ Correct!
125
+
126
+ * Consider having multiple Cloud Dataproc instances for each priority workload and then turning them down when not in use
127
+ Correct!
128
+
129
+
130
+ 4. True or False: If you are migrating your Hadoop workload to the cloud, you must first rewrite all your Spark jobs to be compliant with the cloud.
131
+
132
+ Answer: False -- Correct - you can run your same Spark job code running on the same Hadoop software but running on cloud hardware with Cloud Dataproc.
133
+
134
+
135
+ 5. Complete the following: You should feed your machine learning model your _______ and not your _______. It will learn those for itself!
136
+
137
+ data, rules
138
+
139
+ 6. Relational databases are a good choice when you need:
140
+
141
+ * Fast queries on terabytes of data
142
+
143
+ * Streaming, high-throughput writes
144
+
145
+ * Aggregations on unstructured data
146
+
147
+ * Transactional updates on relatively small datasets -- correct
148
+
149
+ 7. Google Cloud Storage is a good option for storing data that: (Select the 2 correct options below).
150
+
151
+ * Will be accessed frequently and updated constantly with new transactions from a front-end and needs to be stored in a relational database
152
+
153
+ * Is ingested in real-time from sensors and other devices and supports SQL-based queries
154
+
155
+
156
+ * May be required to be read at some later time (i.e. load a CSV file into BigQuery) -- correct
157
+
158
+ * May be imported from a bucket into a Hadoop cluster for analysis -- correct
159
+
160
+
161
+
162
+
163
+ ### Lab -- Creating a Streaming Data Pipeline for a Real-Time Dashboard with Dataflow
164
+
165
+
166
+ Task 1. Create a Pub/Sub topic and BigQuery dataset
167
+ Task 2. Create a Cloud Storage bucket
168
+ Task 3. Set up a Dataflow Pipeline
169
+ Task 4. Analyze the taxi data using BigQuery
170
+ Task 5. Perform aggregations on the stream for reporting
171
+ Task 6. Create a real-time dashboard
172
+ Task 7. Create a time series dashboard
173
+ Task 8. Stop the Dataflow job
174
+
175
+
176
+ biggest thing is creating datflow pipeline from template and creating aggregate in bigquery
177
+
178
+ Task 3. Set up a Dataflow Pipeline
179
+
180
+ Dataflow is a serverless way to carry out data analysis. In this lab, you set up a streaming data pipeline to read sensor data from Pub/Sub, compute the maximum temperature within a time window, and write this out to BigQuery.
181
+
182
+ In the Cloud Console, go to Navigation menu > Dataflow.
183
+
184
+ In the top menu bar, click CREATE JOB FROM TEMPLATE.
185
+
186
+ Enter streaming-taxi-pipeline as the Job name for your Dataflow job.
187
+
188
+ Under Dataflow template, select the Pub/Sub Topic to BigQuery template.
189
+
190
+ Under Input Pub/Sub topic, enter projects/pubsub-public-data/topics/taxirides-realtime
191
+
192
+ Under BigQuery output table, enter <myprojectid>:taxirides.realtime
193
+
194
+ Under Temporary location, enter gs://<mybucket>/tmp/
195
+
196
+
197
+ And then use this SQL query to make aggregates
198
+
199
+
200
+ ```sql
201
+ WITH streaming_data AS (
202
+ SELECT
203
+ timestamp,
204
+ TIMESTAMP_TRUNC(timestamp, HOUR, 'UTC') AS hour,
205
+ TIMESTAMP_TRUNC(timestamp, MINUTE, 'UTC') AS minute,
206
+ TIMESTAMP_TRUNC(timestamp, SECOND, 'UTC') AS second,
207
+ ride_id,
208
+ latitude,
209
+ longitude,
210
+ meter_reading,
211
+ ride_status,
212
+ passenger_count
213
+ FROM
214
+ taxirides.realtime
215
+ WHERE ride_status = 'dropoff'
216
+ ORDER BY timestamp DESC
217
+ LIMIT 100000
218
+ )
219
+ # calculate aggregations on stream for reporting:
220
+ SELECT
221
+ ROW_NUMBER() OVER() AS dashboard_sort,
222
+ minute,
223
+ COUNT(DISTINCT ride_id) AS total_rides,
224
+ SUM(meter_reading) AS total_revenue,
225
+ SUM(passenger_count) AS total_passengers
226
+ FROM streaming_data
227
+ GROUP BY minute, timestamp
228
+ ```
229
+
230
+
231
+ ### Perform Foundational Data, ML, and AI Tasks in Google Cloud: Challenge Lab (Expert) Lab
232
+
233
+
234
+ Create a simple Dataproc job
235
+ Create a simple DataFlow job
236
+ Create a simple Dataprep job
237
+ Perform one of the three Google machine learning backed API tasks
238
+
239
+ Task 4: AI
240
+
241
+ Complete one of the tasks below, YOUR_PROJECT must be replaced with your lab project name.
242
+
243
+ Use Google Cloud Speech API to analyze the audio file gs://cloud-training/gsp323/task4.flac. Once you have analyzed the file you can upload the resulting analysis to gs://YOUR_PROJECT-marking/task4-gcs.result.
244
+
245
+ Use the Cloud Natural Language API to analyze the sentence from text about Odin. The text you need to analyze is "Old Norse texts portray Odin as one-eyed and long-bearded, frequently wielding a spear named Gungnir and wearing a cloak and a broad hat." Once you have analyzed the text you can upload the resulting analysis to gs://YOUR_PROJECT-marking/task4-cnl.result.
246
+
247
+ Use Google Video Intelligence and detect all text on the video gs://spls/gsp154/video/train.mp4. Once you have completed the processing of the video, pipe the output into a file and upload to gs://YOUR_PROJECT-marking/task4-gvi.result. Ensure the progress of the operation is complete and the service account you're uploading the output with has the Storage Object Admin role.
248
+
249
+
250
+
251
+ ### Invoking ML APIs from AI Platform Notebooks (jupyter notebook) labs
252
+
253
+ https://www.cloudskillsboost.google/course_sessions/570479/labs/102982
254
+
255
+ REAlly cool to see basic usage of some crazy powerful APIs!
256
+
257
+ Also noticed there is a new book out (put in amazon cart) for learning about AI on GCP.
258
+
259
+
260
+ ### cloud natural language
261
+
262
+
263
+ score of the sentiment ranges between -1.0 (negative) and 1.0 (positive) and corresponds to the overall emotional leaning of the text.
264
+
265
+ magnitude indicates the overall strength of emotion (both positive and negative) within the given text, between 0.0 and +inf. Unlike score, magnitude is not normalized; each expression of emotion within the text (both positive and negative) contributes to the text's magnitude (so longer text blocks may have greater magnitudes).
266
+
267
+ ### LAB Analyzing data using AI Platform Notebooks and BigQuery
268
+
269
+ In this lab, you analyze a large (70 million rows, 8 GB) airline dataset using BigQuery and AI Platform Notebooks.
270
+
271
+ Looking at flights and presenter points out how powerful it is to be able to make aggregates in bigquery and then analyze them later in notebooks.
272
+
273
+ for example we have 70M (8GB) records in big query that we then create an aggregate of and can actually plot these in our little Jupyter notebook for much cheaper.
274
+
275
+ ### LAB Improving Data Quality
276
+
277
+ Machine learning models can only consume numeric data, and that numeric data should be 1s or 0s. Data is said to be messy or untidy if it is missing attribute values, contains noise or outliers, has duplicates, wrong data, or upper/lower case column names, or is essentially not ready for ingestion by a machine learning algorithm.
278
+
279
+ In this lab, you will present and solve some of the most common issues of untidy data. Note that different problems will require different methods, and they are beyond the scope of this notebook.
280
+
281
+ What you learn
282
+
283
+ In this lab, you will:
284
+
285
+ Resolve missing values.
286
+
287
+ Convert the Date feature column to a datetime format.
288
+
289
+ Rename a feature column, remove a value from a feature column.
290
+
291
+ Create one-hot encoding features.
292
+
293
+ Understand temporal feature conversions.
294
+
295
+
296
+ In the notebook interface, navigate to **training-data-analyst > courses > machine_learning > deepdive2 > launching_into_ml > labs, and open improve_data_quality.ipynb**
297
+
298
+
299
+ Solutions notebook
300
+
301
+ https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/launching_into_ml/solutions/improve_data_quality.ipynb
302
+
303
+
304
+
305
+ #### Data Quality Issue #5:
306
+ Temporal Feature Columns
307
+
308
+
309
+ Our dataset now contains year, month, and day feature columns. Let's convert the month and day feature columns to meaningful representations as a way to get us thinking about changing temporal features -- as they are sometimes overlooked.
310
+
311
+ Note that the Feature Engineering course in this Specialization will provide more depth on methods to handle year, month, day, and hour feature columns.
312
+
313
+
314
+ ```python
315
+ # Here we map each temporal variable onto a circle such that the lowest value for that variable appears right next to the largest value. We compute the x- and y- component of that point using the sin and cos trigonometric functions.
316
+ df['day_sin'] = np.sin(df.day*(2.*np.pi/31))
317
+ df['day_cos'] = np.cos(df.day*(2.*np.pi/31))
318
+ df['month_sin'] = np.sin((df.month-1)*(2.*np.pi/12))
319
+ df['month_cos'] = np.cos((df.month-1)*(2.*np.pi/12))
320
+
321
+ # Let's drop month, and day
322
+ # TODO 5
323
+ df = df.drop(['month','day','year'], axis=1)
324
+ ```
325
+
326
+
327
+ ### Exploratory Data Analysis Using Python and BigQuery (LAB)
328
+
329
+ In the notebook interface, navigate to training-data-analyst > courses > machine_learning > deepdive2 > launching_into_ml > labs and open python.BQ_explore_data.ipynb.
330
+
331
+
332
+ ### Improve Data Quality - Quiz
333
+
334
+ 1. Which of the following refers to the Orderliness of data?
335
+
336
+
337
+ The data record with specific details appears only once in the database
338
+ The data represents reality within a reasonable period
339
+ None of the above
340
+ x - The data entered has the required format and structure
341
+
342
+ 2. Which of the following are categories of data quality tools?
343
+
344
+ Cleaning tools
345
+ Monitoring tools
346
+ Both A and B
347
+ None of the Above
348
+
349
+ 3. What are the features of low data quality?
350
+ Unreliable info
351
+ Duplicated data
352
+ Incomplete data
353
+ All of the above
354
+
355
+ 4. Which of the following are best practices for data quality management?
356
+ Resolving missing values
357
+ Automating data entry
358
+ Preventing duplicates
359
+ All of the above
360
+
361
+
362
+ 5. Which of the following is not a Data Quality attribute?
363
+ Consistency
364
+ Auditability
365
+ Accuracy
366
+ x - redundancy
367
+
368
+
369
+ ### Exploratory Data Analysis Using Python and BigQuery - LAB
370
+
371
+ https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/launching_into_ml/solutions/python.BQ_explore_data.ipynb
372
+
373
+ ### Quiz: Exploratory Data Analysis
374
+
375
+ 1. Which of the following is not true about Exploratory Data Analysis?
376
+
377
+
378
+ Discovers new knowledge.
379
+ Generates a posteriori hypothesis.
380
+ Does not provide insight into the data. - x
381
+ Deals with unknowns.
382
+
383
+
384
+ 2. Exploratory Data Analysis is majorly performed using the following methods:
385
+ Bivariate
386
+ Univariate
387
+ both A & B -x
388
+ None of the above
389
+
390
+ 3. What are the objectives of exploratory data analysis?
391
+
392
+ Gain maximum insight into the data set and its underlying structure.
393
+ Check for missing data and other mistakes.
394
+ Uncover a parsimonious model, one which explains the data with a minimum number of predictor variables.
395
+ All of the above - x
396
+
397
+
398
+
399
+ 4. Which of the following is not a component of Exploratory Data Analysis?
400
+
401
+ Anomaly Detection
402
+ Accounting and Summarizing
403
+ Statistical Analysis and Clustering
404
+ Hyperparameter tuning - x
405
+
406
+ 5. Which is the correct sequence of steps in data analysis and data visualisation of Exploratory Data Analysis?
407
+
408
+ Data Exploration -> Data Cleaning -> Model Building -> Present Results - x
409
+ Data Exploration -> Data Cleaning -> Present Results -> Model Building
410
+ Data Exploration -> Model Building -> Present Results -> Data Cleaning
411
+ Data Exploration -> Model Building -> Data Cleaning -> Present Results
412
+
413
+ ### Quiz: Supervised Learning
414
+
415
+ 1. Which model would you use if your problem required a discrete number of values or classes?
416
+
417
+ Regression Model
418
+ Classification Model - x
419
+ Supervised Model
420
+ Unsupervised Model
421
+
422
+
423
+ 2. Which of the following machine learning models have labels, or in other words, the correct answers to whatever it is that we want to learn to predict?
424
+
425
+ Unsupervised Model
426
+ None of the above.
427
+ Reinforcement Model
428
+ Supervised Model - x
429
+
430
+ 3. Which statement is true?
431
+
432
+ Depending on the problem you are trying to solve, the data you have, explainability, etc. will not determine which machine learning methods you use to find a solution.
433
+ None of the above
434
+ Determining which machine learning methods you use to find a solution depends only on the problem or hypothesis.
435
+ Depending on the problem you are trying to solve, the data you have, explainability, etc. will determine which machine learning methods you use to find a solution. - x
436
+
437
+ 4. What is a type of Supervised machine learning model?
438
+
439
+ Regression model
440
+ Classification model
441
+ Both A & B - x
442
+ None of the above
443
+
444
+ 5. When the data isn’t labelled, what is an alternative way of predicting the output?
445
+
446
+ Clustering Algorithms -x
447
+ Logistic Regression
448
+ Linear Regression
449
+ None of the above
450
+
451
+
452
+
453
+
454
+ ### Introduction to Linear Regression
455
+
456
+ training-data-analyst > courses > machine_learning > deepdive2 > launching_into_ml > Labs and open intro_linear_regression.ipynb.
457
+
458
+ https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/launching_into_ml/solutions/intro_linear_regression.ipynb
459
+
460
+ ### Quiz: Neural Networks
461
+
462
+ 1. Which activation functions are needed to get the complex chain functions that allow neural networks to learn data distributions.
463
+
464
+ Nonlinear activation functions - x
465
+ Linear activation functions
466
+ All of the above
467
+ none of the above
468
+
469
+ 2. A single unit for a non-input neuron has ____________________ a/an
470
+
471
+ Output of the activation function
472
+ Activation function
473
+ Weighted Sum
474
+ all of the above - x
475
+
476
+ 3. Which of the following activation functions are used for nonlinearity?
477
+
478
+ Tanh
479
+ Hyperbolic tangent
480
+ Sigmoid
481
+ All of the above - x
482
+
483
+
484
+ 4. Which activation function has a range between zero and Infinity?
485
+
486
+ ReLU - x
487
+ Tanh
488
+ Sigmoid
489
+ ELU
490
+
491
+ 5. If we wanted our outputs to be in the form of probabilities, which activation function should I choose in the final layer?
492
+
493
+ ReLU
494
+ Tanh
495
+ Sigmoid - x
496
+ ELU
497
+
498
+ ### Decision trees and Random Forests LAB
499
+
500
+ https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/launching_into_ml/solutions/decision_trees_and_random_Forests_in_Python.ipynb
501
+
502
+ ### Quiz: Decision Trees AND Random Forests
503
+
504
+ 1. In a decision classification tree, what does each decision or node consist of?
505
+
506
+ Euclidean distance minimizer
507
+ Mean squared error minimizer
508
+ Linear classifier of one feature - x
509
+ Linear classifier of all features
510
+
511
+ 2. Which of the following statements is true?
512
+
513
+ Mean squared error minimizer and euclidean distance minimizer are used in classification, not regression.
514
+ Mean squared error minimizer and euclidean distance minimizer are used in regression, not classification. - x
515
+ Mean squared error minimizer and euclidean distance minimizer are not used in regression and classification.
516
+ Mean squared error minimizer and euclidean distance minimizer are used in regression and classification.
517
+
518
+ 3. Decision trees are one of the most intuitive machine learning algorithms. They can be used for which of the following?
519
+
520
+ Regression
521
+ Classification
522
+ Both A & B -x
523
+ None of the above
524
+
525
+
526
+ 4. A random forest is usually more complex than an individual decision tree; this makes it harder to visually interpret ?
527
+
528
+ True - x
529
+ False
530
+
531
+
532
+ ### Optimization Quiz
533
+
534
+ 1. For the formula used to model the relationship i.e. y = mx + b, what does ‘m’ stand for?
535
+
536
+
537
+ It refers to a bias term which can be used for regression.
538
+ It captures the amount of change we've observed in our label in response to a small change in our feature. - x
539
+ Both a & b
540
+ None of the above
541
+
542
+ 2. What are the basic steps in an ML workflow (or process)?
543
+
544
+ Check for anomalies, missing data and clean the data
545
+ Perform statistical analysis and initial visualization
546
+ Collect data
547
+ All of the above - x
548
+
549
+ 3. Which of the following statements is true?
550
+
551
+ To calculate the Prediction y for any Input value x we have three unknowns, the m = slope(Gradient), b = y-intercept(also called bias) and z = third degree polynomial.
552
+ To calculate the Prediction y for any Input value x we have two unknowns, the m = slope(Gradient) and b = y-intercept(also called bias). - x
553
+ None of the above
554
+ To calculate the Prediction y for any Input value x we have three unknowns, the m = slope(Gradient), b = y-intercept(also called bias) and z = hyperplane.
555
+
556
+ ### Optimization Quiz 2
557
+
558
+ 1. Fill in the blanks: Simply speaking, __________ is the workhorse of basic loss functions. ______ is the sum of squared distances between our target variable and predicted values.
559
+
560
+
561
+ Log loss
562
+ Likelihood
563
+ Mean Squared Error - x
564
+ None of the above
565
+
566
+
567
+ 2. Which of the following loss functions is used for classification problems?
568
+
569
+ MSE
570
+ cross entropy - x
571
+ Both A & B
572
+ None of the above
573
+
574
+ 3. Fill in the blanks: At its core, a ________ is a method of evaluating how well your algorithm models your dataset. If your predictions are totally off, your _________ will output a higher number. If they’re pretty good, it will output a lower number. As you change pieces of your algorithm to try and improve your model, your ______ will tell you if you’re getting anywhere.
575
+
576
+ Loss function - x
577
+ Bias term
578
+ Activation functions
579
+ Linear model
580
+
581
+ 4. Loss functions can be broadly categorized into 2 types: Classification and Regression Loss. _____ is typically used for regression and ______ is typically used for classification.
582
+
583
+ Log Loss, Focus Loss
584
+ Mean Squared Error, Cross Entropy - x
585
+ Cross Entropy, Log Loss
586
+ None of the above
587
+
588
+ ### Optimization Quiz - Gradients
589
+
590
+ 1. Which of the following gradient descent methods is used to compute the entire dataset?
591
+
592
+ Mini-batch gradient descent
593
+ Gradient descent
594
+ None of the above
595
+ Batch gradient descent -x
596
+
597
+
598
+ 2. Fill in the blanks. ________________: Parameters are updated after computing the gradient of error with respect to the entire training set ________________: Parameters are updated after computing the gradient of error with respect to a single training example ________________: Parameters are updated after computing the gradient of error with respect to a subset of the training set
599
+
600
+ Mini Batch Gradient Descent, Batch Gradient Descent, Stochastic Gradient Descent
601
+ Mini-Batch Gradient Descent, Stochastic Gradient Descent, Batch Gradient Descent
602
+ Batch Gradient Descent, Stochastic Gradient Descent, Mini-Batch Gradient Descent - x
603
+ None of the above
604
+
605
+ 3. Select which statement is true.
606
+
607
+ Batch gradient descent, also called vanilla gradient descent, calculates the error for each example within the training dataset, but only after all training examples have been evaluated does the model get updated. This whole process is like a cycle and it's called a training epoch. - x
608
+
609
+ Batch gradient descent, also called vanilla gradient descent, calculates the gain for each example within the training dataset, but only before all training examples have been evaluated does the model get updated. This whole process is like a cycle and it's called a training epoch.
610
+
611
+ Batch gradient descent, also called vanilla gradient descent, calculates the error for each example within the training dataset, but only before all training examples have been evaluated does the model get updated.
612
+
613
+ None of the above
614
+
615
+ 4. Select the correct statement(s) regarding gradient descent.
616
+
617
+ In machine learning, we use gradient descent to determine if our model labels needs to be de-optimized.
618
+
619
+ Gradient descent is an optimization algorithm used to minimize some function by iteratively moving in the direction of steepest descent as defined by the negative of the gradient. In machine learning, we use gradient descent to update the parameters of our model.
620
+
621
+ Gradient descent is an optimization algorithm used to maximize some function by iteratively moving in the direction of steepest descent as defined by the negative of the gradient. In machine learning, we use gradient descent to update the parameters of our model.
622
+
623
+ All of the above
624
+
625
+
626
+
code/gcp_sample_questions.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Professional Machine Learning Engineer Exam Objectives
2
+ Frame ML problems
3
+ Architect ML solutions
4
+ Prepare and process data
5
+ Develop ML models
6
+ Automate & orchestrate ML pipelines
7
+ Monitor, optimize, and maintain ML solutions
8
+
9
+ Share Google Professional Machine Learning Engineer Sample Questions
10
+ NO.1 You are an ML engineer at a global shoe store. You manage the ML models for the company's website. You are asked to build a model that will recommend new products to the user based on their purchase behavior and similarity with other users. What should you do?
11
+ A. Build a collaborative-based filtering model
12
+ B. Build a classification model
13
+ C. Build a regression model using the features as predictors
14
+ D. Build a knowledge-based filtering model
15
+ Answer: A
16
+
17
+ NO.2 You have been asked to develop an input pipeline for an ML training model that processes images from disparate sources at a low latency. You discover that your input data does not fit in memory. How should you create a dataset following Google-recommended best practices?
18
+ A. Convert the images to tf .Tensor Objects, and then run tf. data. Dataset. from_tensors ().
19
+ B. Convert the images to tf .Tensor Objects, and then run Dataset. from_tensor_slices{).
20
+ C. Convert the images Into TFRecords, store the images in Cloud Storage, and then use the tf. data API to read the images for training
21
+ D. Create a tf.data.Dataset.prefetch transformation
22
+ Answer: C
23
+
24
+
25
+ NO.3 You work for an online retail company that is creating a visual search engine. You have set up an end-to-end ML pipeline on Google Cloud to classify whether an image contains your company's product. Expecting the release of new products in the near future, you configured a retraining functionality in the pipeline so that new data can be fed into your ML models. You also want to use Al Platform's continuous evaluation service to ensure that the models have high accuracy on your test data set. What should you do?
26
+ A. Keep the original test dataset unchanged even if newer products are incorporated into retraining
27
+ B. Extend your test dataset with images of the newer products when they are introduced to retraining
28
+ C. Replace your test dataset with images of the newer products when they are introduced to retraining.
29
+ D. Update your test dataset with images of the newer products when your evaluation metrics drop below a pre-decided threshold.
30
+ Answer: C
31
+
32
+ NO.4 You are developing a Kubeflow pipeline on Google Kubernetes Engine. The first step in the pipeline is to issue a query against BigQuery. You plan to use the results of that query as the input to the next step in your pipeline. You want to achieve this in the easiest way possible. What should you do?
33
+ A. Use the BigQuery console to execute your query and then save the query results Into a new BigQuery table.
34
+ B. Write a Python script that uses the BigQuery API to execute queries against BigQuery Execute this script as the first step in your Kubeflow pipeline
35
+ C. Locate the Kubeflow Pipelines repository on GitHub Find the BigQuery Query Component, copy that component's URL, and use it to load the component into your pipeline. Use the component to execute queries against BigQuery
36
+ D. Use the Kubeflow Pipelines domain-specific language to create a custom component that uses the Python BigQuery client library to execute queries
37
+ Answer: A
38
+
39
+ NO.5 You manage a team of data scientists who use a cloud-based backend system to submit training jobs. This system has become very difficult to administer, and you want to use a managed service instead. The data scientists you work with use many different frameworks, including Keras, PyTorch, theano. Scikit-team, and custom libraries. What should you do?
40
+ A. Set up Slurm workload manager to receive jobs that can be scheduled to run on your cloud infrastructure.
41
+ B. Create a library of VM images on Compute Engine; and publish these images on a centralized repository
42
+ C. Configure Kubeflow to run on Google Kubernetes Engine and receive training jobs through TFJob
43
+ D. Use the Al Platform custom containers feature to receive training jobs using any framework
44
+ Answer: A
45
+
46
+ def LRUCache(strArr):
47
+ CACHE_SIZE = 5
48
+ CACHE_DELIMITER = '-'
49
+
50
+ cache = []
51
+ for element in strArr:
52
+ if element in cache:
53
+ cache.append(element)
54
+ # remove first occurance of value in list
55
+ cache.remove(element)
56
+ else:
57
+ cache.append(element)
58
+
59
+ cacheString = CACHE_DELIMITER.join(cache[-CACHE_SIZE:])
60
+ return cacheString
61
+
62
+
63
+ # keep this function call here
64
+ print(LRUCache(input()))
code/huggingface_vit_beans/.DS_Store ADDED
Binary file (6.15 kB). View file
 
code/huggingface_vit_beans/huggingface_fine_tune_vit.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
code/interview_questions.md ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ https://brainstation.io/career-guides/machine-learning-engineer-interview-questions
2
+
3
+ ## What’s the trade-off between bias and variance?
4
+
5
+ https://machinelearningmastery.com/gentle-introduction-to-the-bias-variance-trade-off-in-machine-learning/
6
+
7
+ low bias ML algos: decision trees, k-nearest neighbors and support vector machines
8
+ high bias ML aglos: linear regression, linear discriminant analysis and logistic regression
9
+
10
+
11
+
12
+ http://cs229.stanford.edu/summer2020/BiasVarianceAnalysis.pdf
13
+
14
+ Andrew Ng talks about this in CS229. (need to double check)
15
+
16
+ **bias** is about what the model is assuming about the data
17
+
18
+ **variance** is about number of features in the data?? idk this doesnt seem right.
19
+
20
+ ## How is KNN different from k-means clustering?
21
+
22
+ k means
23
+ https://dzone.com/articles/10-interesting-use-cases-for-the-k-means-algorithm
24
+ https://www.wikiwand.com/en/K-means_clustering
25
+ https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
26
+
27
+
28
+ k-nearest neighbors
29
+ https://towardsdatascience.com/k-nearest-neighbors-knn-algorithm-23832490e3f4
30
+ https://www.wikiwand.com/en/K-nearest_neighbors_algorithm
31
+ https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html
32
+
33
+ ## What is cross validation and what are different methods of using it?
34
+
35
+ https://towardsdatascience.com/understanding-8-types-of-cross-validation-80c935a4976d
36
+
37
+ Leave p out cross-validation
38
+ Leave one out cross-validation
39
+ Holdout cross-validation
40
+ Repeated random subsampling validation
41
+ k-fold cross-validation
42
+ Stratified k-fold cross-validation
43
+ Time Series cross-validation
44
+ Nested cross-validation
45
+
46
+ ## Explain how a ROC curve works.
47
+
48
+ Aside: it says that it is for all thresholds of classifications but it's not like that's a parameter of logistic regression model. This answers that you can just change it after the fact.
49
+
50
+ https://towardsdatascience.com/classification-metrics-thresholds-explained-caff18ad2747
51
+
52
+ ```python
53
+ # Adjusting the threshold down from 0.5 to 0.25
54
+ # Any data point with a probability of 0.25 or higher will be
55
+ # classified as 1. clf = LogisticRegression()
56
+ clf.fit(X_train, y_train)
57
+ THRESHOLD = 0.25
58
+ y_pred = np.where(clf.predict_proba(X_test)[:,1] >= THRESHOLD, 1, 0)
59
+ y_pred_proba_new_threshold = (clf.predict_proba(X_test)[:,1] >= THRESHOLD).astype(int)
60
+ ```
61
+
62
+ The Receiver Operator Characteristic (ROC) curve is an evaluation metric for binary classification problems. It is a probability curve that plots the TPR against FPR at various threshold values and essentially separates the ‘signal’ from the ‘noise’. The Area Under the Curve (AUC) is the measure of the ability of a classifier to distinguish between classes and is used as a summary of the ROC curve.
63
+
64
+ https://www.analyticsvidhya.com/blog/2020/06/auc-roc-curve-machine-learning/
65
+
66
+ ## What's the difference between "likelihood" and "probability"
67
+
68
+ Probability quantifies anticipation (of outcome), likelihood quantifies trust (in model).
69
+
70
+ Suppose somebody challenges us to a 'profitable gambling game'. Then, probabilities will serve us to compute things like the expected profile of your gains and loses (mean, mode, median, variance, information ratio, value at risk, gamblers ruin, and so on). In contrast, likelihood will serve us to quantify whether we trust those probabilities in the first place; or whether we 'smell a rat'.
71
+
72
+ Incidentally -- since somebody above mentioned the religions of statistics -- I believe likelihood ratio to be an integral part of the Bayesian world as well as of the frequentist one: In the Bayesian world, Bayes formula just combines prior with likelihood to produce posterior.
73
+
74
+ https://stats.stackexchange.com/questions/2641/what-is-the-difference-between-likelihood-and-probability
75
+
76
+
77
+ ## How to prune decision trees?
78
+
79
+ 1. preprocessing - early stopping
80
+ 2. post processing - fit tree perfectly and then prune it back
81
+
82
+ https://www.kaggle.com/arunmohan003/pruning-decision-trees-tutorial
83
+
84
+
85
+ ## how can you choose a classifier based on a training set size ?
86
+
87
+ https://www.researchgate.net/post/How-to-decide-the-best-classifier-based-on-the-data-set-provided
88
+
89
+ As far as I know there is no a well defined rule for such task. In general, it depends on the kind of data and amount of samples x features. For instance, I would recommend to use naive Bayes or linear SVM for text classification/categorization. For datasets with numerical attributes: I would suggest linear SVM, neural networks or logistic regression if the amount of features is much greater than the number of samples. On the other hand, I would recommend neural networks or SVM with RBF or polynomial kernel if the amount of samples is not too large and greater than the number of features. Otherwise, if the number of samples is huge I would suggest to use neural networks or linear SVM, and so on. Obviously, there are other options for each scenario than those I have mentioned.
90
+
91
+ ## What methods for dimensionality reduction do you know and how do they compare with each other?
92
+
93
+ https://towardsdatascience.com/11-dimensionality-reduction-techniques-you-should-know-in-2021-dcb9500d388b
94
+
95
+ big ones seem to be:
96
+ The Principal Component Analysis (PCA) procedure is a dimension reduction technique that projects the data on kkk dimensions by maximizing the variance of the data as follows:
97
+
98
+ https://stanford.edu/~shervine/teaching/cs-229/cheatsheet-unsupervised-learning#dimension-reduction
99
+
100
+ Independent Component Analysis (ICA)
101
+ It is a technique meant to find the underlying generating sources.
102
+
103
+ LDA
104
+ https://www.wikiwand.com/en/Linear_discriminant_analysis
105
+
106
+ LDA is also closely related to principal component analysis (PCA) and factor analysis in that they both look for linear combinations of variables which best explain the data.[4] LDA explicitly attempts to model the difference between the classes of data. PCA, in contrast, does not take into account any difference in class, and factor analysis builds the feature combinations based on differences rather than similarities. Discriminant analysis is also different from factor analysis in that it is not an interdependence technique: a distinction between independent variables and dependent variables (also called criterion variables) must be made.
107
+
108
+ ## What’s an imbalanced dataset? Can you list some ways to deal with it?
109
+
110
+ Any dataset with an unequal class distribution is technically imbalanced.
111
+
112
+ Here are some techniques to handle imbalanced data:
113
+
114
+ Resample the training set: There are two approaches to make a balanced dataset out of an imbalanced one are under-sampling and over-sampling.
115
+ Generate synthetic samples: Using SMOTE (Synthetic Minority Oversampling Technique) to generate new and synthetic data to train the model.
116
+
117
+ # More Questions
118
+
119
+ https://elitedatascience.com/machine-learning-interview-questions-answers#:~:text=21%20Machine%20Learning%20Interview%20Questions%20and%20Answers%201,learning%20can%20help%20different%20types%20of%20businesses.%20
120
+
121
+
122
+ ## Explain the Bias-Variance Tradeoff.
123
+
124
+ https://elitedatascience.com/bias-variance-tradeoff
125
+
126
+ Predictive models have a tradeoff between bias (how well the model fits the data) and variance (how much the model changes based on changes in the inputs).
127
+
128
+ Simpler models are stable (low variance) but they don't get close to the truth (high bias).
129
+
130
+ More complex models are more prone to being overfit (high variance) but they are expressive enough to get close to the truth (low bias).
131
+
132
+ The best model for a given problem usually lies somewhere in the middle.
133
+
134
+
135
+
code/leetcode/blind_75.ipynb ADDED
@@ -0,0 +1,768 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# 217. Contains Duplicate\n",
8
+ "\n",
9
+ "Given an integer array nums, return true if any value appears at least twice in the array, and return false if every element is distinct.\n",
10
+ "\n",
11
+ "Example 1:\n",
12
+ "\n",
13
+ "Input: nums = [1,2,3,1]\n",
14
+ "Output: true\n",
15
+ "\n",
16
+ "Example 2:\n",
17
+ "\n",
18
+ "Input: nums = [1,2,3,4]\n",
19
+ "Output: false\n",
20
+ "\n",
21
+ "Example 3:\n",
22
+ "\n",
23
+ "Input: nums = [1,1,1,3,3,4,3,2,4,2]\n",
24
+ "Output: true\n",
25
+ "\n",
26
+ "Constraints:\n",
27
+ "\n",
28
+ " 1 <= nums.length <= 105\n",
29
+ " -109 <= nums[i] <= 109\n"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 9,
35
+ "metadata": {},
36
+ "outputs": [],
37
+ "source": [
38
+ "from typing import List\n",
39
+ "\n",
40
+ "\n",
41
+ "class Solution:\n",
42
+ " def containsDuplicate(self, nums: List[int]) -> bool:\n",
43
+ " return not (len(set(nums)) == len(nums))"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": null,
49
+ "metadata": {},
50
+ "outputs": [],
51
+ "source": [
52
+ "class SolutionNeetCode:\n",
53
+ " def containsDuplicate(self, nums: List[int]) -> bool:\n",
54
+ " hashset = set()\n",
55
+ "\n",
56
+ " for n in nums:\n",
57
+ " if n in hashset:\n",
58
+ " return True\n",
59
+ " hashset.add(n)\n",
60
+ " return False"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 7,
66
+ "metadata": {},
67
+ "outputs": [
68
+ {
69
+ "data": {
70
+ "text/plain": [
71
+ "True"
72
+ ]
73
+ },
74
+ "execution_count": 7,
75
+ "metadata": {},
76
+ "output_type": "execute_result"
77
+ }
78
+ ],
79
+ "source": [
80
+ "nums = [1, 2, 3, 1]\n",
81
+ "\n",
82
+ "not (len(set(nums)) == len(nums))"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": 10,
88
+ "metadata": {},
89
+ "outputs": [
90
+ {
91
+ "data": {
92
+ "text/plain": [
93
+ "True"
94
+ ]
95
+ },
96
+ "execution_count": 10,
97
+ "metadata": {},
98
+ "output_type": "execute_result"
99
+ }
100
+ ],
101
+ "source": [
102
+ "nums = [1, 2, 3, 1]\n",
103
+ "\n",
104
+ "Solution().containsDuplicate(nums)"
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "markdown",
109
+ "metadata": {},
110
+ "source": [
111
+ "# 242. Valid Anagram\n",
112
+ "\n",
113
+ "Given two strings s and t, return true if t is an anagram of s, and false otherwise.\n",
114
+ "\n",
115
+ "An Anagram is a word or phrase formed by rearranging the letters of a different word or phrase, typically using all the original letters exactly once.\n",
116
+ "\n",
117
+ "Example 1:\n",
118
+ "\n",
119
+ "Input: s = \"anagram\", t = \"nagaram\"\n",
120
+ "Output: true\n",
121
+ "\n",
122
+ "Example 2:\n",
123
+ "\n",
124
+ "Input: s = \"rat\", t = \"car\"\n",
125
+ "Output: false\n",
126
+ "\n",
127
+ "Constraints:\n",
128
+ "\n",
129
+ " 1 <= s.length, t.length <= 5 * 104\n",
130
+ " s and t consist of lowercase English letters.\n",
131
+ "\n",
132
+ "Follow up: What if the inputs contain Unicode characters? How would you adapt your solution to such a case?\n"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": 29,
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": [
141
+ "s = \"anagram\"\n",
142
+ "t = \"nagaramdsafhjsadlfhj\""
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": 47,
148
+ "metadata": {},
149
+ "outputs": [
150
+ {
151
+ "name": "stdout",
152
+ "output_type": "stream",
153
+ "text": [
154
+ "{'a': 3, 'n': 1, 'g': 1, 'r': 1, 'm': 1}\n",
155
+ "{'n': 1, 'a': 3, 'g': 1, 'r': 1, 'm': 1}\n"
156
+ ]
157
+ },
158
+ {
159
+ "data": {
160
+ "text/plain": [
161
+ "True"
162
+ ]
163
+ },
164
+ "execution_count": 47,
165
+ "metadata": {},
166
+ "output_type": "execute_result"
167
+ }
168
+ ],
169
+ "source": [
170
+ "from itertools import zip_longest\n",
171
+ "\n",
172
+ "s = \"anagram\"\n",
173
+ "t = \"nagaram\"\n",
174
+ "\n",
175
+ "s_dict = dict()\n",
176
+ "t_dict = dict()\n",
177
+ "\n",
178
+ "for s_char, t_char in zip_longest(s, t):\n",
179
+ " s_dict[s_char] = s_dict.get(s_char, 0) + 1\n",
180
+ " t_dict[t_char] = t_dict.get(t_char, 0) + 1\n",
181
+ "\n",
182
+ "print(s_dict)\n",
183
+ "print(t_dict)\n",
184
+ "\n",
185
+ "s_dict == t_dict"
186
+ ]
187
+ },
188
+ {
189
+ "cell_type": "code",
190
+ "execution_count": 23,
191
+ "metadata": {},
192
+ "outputs": [
193
+ {
194
+ "name": "stdout",
195
+ "output_type": "stream",
196
+ "text": [
197
+ "None\n"
198
+ ]
199
+ }
200
+ ],
201
+ "source": [
202
+ "print(s_dict.get(None))"
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": 42,
208
+ "metadata": {},
209
+ "outputs": [
210
+ {
211
+ "data": {
212
+ "text/plain": [
213
+ "'nagaramdsafhjsadlfhj'"
214
+ ]
215
+ },
216
+ "execution_count": 42,
217
+ "metadata": {},
218
+ "output_type": "execute_result"
219
+ }
220
+ ],
221
+ "source": [
222
+ "t"
223
+ ]
224
+ },
225
+ {
226
+ "cell_type": "markdown",
227
+ "metadata": {},
228
+ "source": [
229
+ "# 1. Two Sum\n",
230
+ "\n",
231
+ "Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.\n",
232
+ "\n",
233
+ "You may assume that each input would have exactly one solution, and you may not use the same element twice.\n",
234
+ "\n",
235
+ "You can return the answer in any order.\n",
236
+ "\n",
237
+ "Example 1:\n",
238
+ "\n",
239
+ "Input: nums = [2,7,11,15], target = 9\n",
240
+ "Output: [0,1]\n",
241
+ "Explanation: Because nums[0] + nums[1] == 9, we return [0, 1].\n",
242
+ "\n",
243
+ "Example 2:\n",
244
+ "\n",
245
+ "Input: nums = [3,2,4], target = 6\n",
246
+ "Output: [1,2]\n",
247
+ "\n",
248
+ "Example 3:\n",
249
+ "\n",
250
+ "Input: nums = [3,3], target = 6\n",
251
+ "Output: [0,1]\n"
252
+ ]
253
+ },
254
+ {
255
+ "cell_type": "code",
256
+ "execution_count": 50,
257
+ "metadata": {},
258
+ "outputs": [],
259
+ "source": [
260
+ "from typing import List\n",
261
+ "\n",
262
+ "\n",
263
+ "class Solution:\n",
264
+ " def twoSum(self, nums: List[int], target: int) -> List[int]:\n",
265
+ " prevMap = {} # val -> index\n",
266
+ "\n",
267
+ " for i, n in enumerate(nums):\n",
268
+ " diff = target - n\n",
269
+ " if diff in prevMap:\n",
270
+ " return [prevMap[diff], i]\n",
271
+ " prevMap[n] = i"
272
+ ]
273
+ },
274
+ {
275
+ "cell_type": "code",
276
+ "execution_count": 52,
277
+ "metadata": {},
278
+ "outputs": [
279
+ {
280
+ "data": {
281
+ "text/plain": [
282
+ "[0, 1]"
283
+ ]
284
+ },
285
+ "execution_count": 52,
286
+ "metadata": {},
287
+ "output_type": "execute_result"
288
+ }
289
+ ],
290
+ "source": [
291
+ "nums = [2, 7, 11, 5]\n",
292
+ "\n",
293
+ "target = 9\n",
294
+ "\n",
295
+ "Solution().twoSum(nums, target)"
296
+ ]
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": 68,
301
+ "metadata": {},
302
+ "outputs": [
303
+ {
304
+ "name": "stdout",
305
+ "output_type": "stream",
306
+ "text": [
307
+ "{3: 0, 2: 1, 4: 2}\n",
308
+ "[1, 2]\n"
309
+ ]
310
+ }
311
+ ],
312
+ "source": [
313
+ "nums_2 = [3, 2, 4]\n",
314
+ "target_2 = 6\n",
315
+ "\n",
316
+ "data = {k: v for v, k in enumerate(nums_2)}\n",
317
+ "print(data)\n",
318
+ "\n",
319
+ "\n",
320
+ "def find_two_sum(data, target):\n",
321
+ " for key, current_index in data.items():\n",
322
+ " difference = target - key\n",
323
+ " try:\n",
324
+ " found_index = data[difference]\n",
325
+ " if found_index == current_index:\n",
326
+ " # no match found -- continue looking\n",
327
+ " continue\n",
328
+ " return [current_index, found_index]\n",
329
+ " except KeyError:\n",
330
+ " # i guess uncessary because we have been told that a solution is guaranteed. \n",
331
+ " print(\"no index found\")\n",
332
+ " except Exception as e:\n",
333
+ " print(\"some other error\")\n",
334
+ "\n",
335
+ "\n",
336
+ "print(find_two_sum(data, target_2))"
337
+ ]
338
+ },
339
+ {
340
+ "cell_type": "markdown",
341
+ "metadata": {},
342
+ "source": [
343
+ "# 49. Group Anagrams\n",
344
+ "\n",
345
+ "Given an array of strings strs, group the anagrams together. You can return the answer in any order.\n",
346
+ "\n",
347
+ "An Anagram is a word or phrase formed by rearranging the letters of a different word or phrase, typically using all the original letters exactly once.\n",
348
+ "\n",
349
+ " \n",
350
+ "\n",
351
+ "Example 1:\n",
352
+ "\n",
353
+ "Input: strs = [\"eat\",\"tea\",\"tan\",\"ate\",\"nat\",\"bat\"]\n",
354
+ "Output: [[\"bat\"],[\"nat\",\"tan\"],[\"ate\",\"eat\",\"tea\"]]\n",
355
+ "\n",
356
+ "Example 2:\n",
357
+ "\n",
358
+ "Input: strs = [\"\"]\n",
359
+ "Output: [[\"\"]]\n",
360
+ "\n",
361
+ "Example 3:\n",
362
+ "\n",
363
+ "Input: strs = [\"a\"]\n",
364
+ "Output: [[\"a\"]]\n"
365
+ ]
366
+ },
367
+ {
368
+ "cell_type": "code",
369
+ "execution_count": 72,
370
+ "metadata": {},
371
+ "outputs": [
372
+ {
373
+ "data": {
374
+ "text/plain": [
375
+ "[['eat'], ['tea'], ['tan'], ['ate'], ['nat'], ['bat']]"
376
+ ]
377
+ },
378
+ "execution_count": 72,
379
+ "metadata": {},
380
+ "output_type": "execute_result"
381
+ }
382
+ ],
383
+ "source": [
384
+ "strs = [\"eat\",\"tea\",\"tan\",\"ate\",\"nat\",\"bat\"]\n",
385
+ "\n",
386
+ "output = []\n",
387
+ "for word in strs:\n",
388
+ " # check if anagram in output and if so append to that array \n",
389
+ " for \n",
390
+ " # can't think of how to do this without doing another double for loop! \n",
391
+ "\n",
392
+ " # if no anagram found then append\n",
393
+ " output.append([word])\n",
394
+ "\n",
395
+ "# [\"eat\"] in output\n",
396
+ "output"
397
+ ]
398
+ },
399
+ {
400
+ "cell_type": "code",
401
+ "execution_count": 76,
402
+ "metadata": {},
403
+ "outputs": [
404
+ {
405
+ "data": {
406
+ "text/plain": [
407
+ "dict_values([['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']])"
408
+ ]
409
+ },
410
+ "execution_count": 76,
411
+ "metadata": {},
412
+ "output_type": "execute_result"
413
+ }
414
+ ],
415
+ "source": [
416
+ "from collections import defaultdict\n",
417
+ "\n",
418
+ "class Solution:\n",
419
+ " def groupAnagrams(self, strs: List[str]) -> List[List[str]]:\n",
420
+ " ans = defaultdict(list)\n",
421
+ "\n",
422
+ " for s in strs:\n",
423
+ " count = [0] * 26\n",
424
+ " for c in s:\n",
425
+ " count[ord(c) - ord(\"a\")] += 1\n",
426
+ " # this maps the characters to values to check for anagram \n",
427
+ " ans[tuple(count)].append(s)\n",
428
+ " return ans.values()\n",
429
+ "\n",
430
+ "Solution().groupAnagrams(strs)"
431
+ ]
432
+ },
433
+ {
434
+ "cell_type": "markdown",
435
+ "metadata": {},
436
+ "source": [
437
+ "# 347. Top K Frequent Elements\n",
438
+ "\n",
439
+ "Given an integer array nums and an integer k, return the k most frequent elements. You may return the answer in any order.\n",
440
+ "\n",
441
+ " \n",
442
+ "\n",
443
+ "Example 1:\n",
444
+ "\n",
445
+ "Input: nums = [1,1,1,2,2,3], k = 2\n",
446
+ "Output: [1,2]\n",
447
+ "\n",
448
+ "Example 2:\n",
449
+ "\n",
450
+ "Input: nums = [1], k = 1\n",
451
+ "Output: [1]\n",
452
+ "\n",
453
+ " \n",
454
+ "\n",
455
+ "Constraints:\n",
456
+ "\n",
457
+ " 1 <= nums.length <= 105\n",
458
+ " -104 <= nums[i] <= 104\n",
459
+ " k is in the range [1, the number of unique elements in the array].\n",
460
+ " It is guaranteed that the answer is unique.\n",
461
+ "\n",
462
+ " \n",
463
+ "\n",
464
+ "Follow up: Your algorithm's time complexity must be better than O(n log n), where n is the array's size.\n"
465
+ ]
466
+ },
467
+ {
468
+ "cell_type": "code",
469
+ "execution_count": 80,
470
+ "metadata": {},
471
+ "outputs": [],
472
+ "source": [
473
+ "nums = [1,1,1,2,2,3] \n",
474
+ "k = 2\n",
475
+ "\n",
476
+ "from collections import OrderedDict\n",
477
+ "\n",
478
+ "# foo = OrderedDict()\n",
479
+ "foo = {}\n",
480
+ "\n",
481
+ "for number in nums:\n",
482
+ " foo[number] = foo.get(number, 0) + 1\n",
483
+ "\n",
484
+ "# neetcode says using a heap and popping off the top k elements would be k log(n) time too \n",
485
+ " \n"
486
+ ]
487
+ },
488
+ {
489
+ "cell_type": "code",
490
+ "execution_count": 81,
491
+ "metadata": {},
492
+ "outputs": [
493
+ {
494
+ "data": {
495
+ "text/plain": [
496
+ "{1: 3, 2: 2, 3: 1}"
497
+ ]
498
+ },
499
+ "execution_count": 81,
500
+ "metadata": {},
501
+ "output_type": "execute_result"
502
+ }
503
+ ],
504
+ "source": [
505
+ "foo"
506
+ ]
507
+ },
508
+ {
509
+ "cell_type": "markdown",
510
+ "metadata": {},
511
+ "source": [
512
+ "# 238. Product of Array Except Self\n",
513
+ "\n",
514
+ "Given an integer array nums, return an array answer such that answer[i] is equal to the product of all the elements of nums except nums[i].\n",
515
+ "\n",
516
+ "The product of any prefix or suffix of nums is guaranteed to fit in a 32-bit integer.\n",
517
+ "\n",
518
+ "You must write an algorithm that runs in O(n) time and without using the division operation.\n",
519
+ "\n",
520
+ " \n",
521
+ "\n",
522
+ "Example 1:\n",
523
+ "\n",
524
+ "Input: nums = [1,2,3,4]\n",
525
+ "Output: [24,12,8,6]\n",
526
+ "\n",
527
+ "Example 2:\n",
528
+ "\n",
529
+ "Input: nums = [-1,1,0,-3,3]\n",
530
+ "Output: [0,0,9,0,0]\n",
531
+ "\n",
532
+ " \n",
533
+ "\n",
534
+ "Constraints:\n",
535
+ "\n",
536
+ " 2 <= nums.length <= 105\n",
537
+ " -30 <= nums[i] <= 30\n",
538
+ " The product of any prefix or suffix of nums is guaranteed to fit in a 32-bit integer.\n",
539
+ "\n",
540
+ " \n",
541
+ "\n",
542
+ "Follow up: Can you solve the problem in O(1) extra space complexity? (The output array does not count as extra space for space complexity analysis.)\n"
543
+ ]
544
+ },
545
+ {
546
+ "cell_type": "code",
547
+ "execution_count": 88,
548
+ "metadata": {},
549
+ "outputs": [
550
+ {
551
+ "name": "stdout",
552
+ "output_type": "stream",
553
+ "text": [
554
+ "None\n",
555
+ "None\n",
556
+ "None\n",
557
+ "None\n"
558
+ ]
559
+ },
560
+ {
561
+ "data": {
562
+ "text/plain": [
563
+ "[24, 12, 8, 6]"
564
+ ]
565
+ },
566
+ "execution_count": 88,
567
+ "metadata": {},
568
+ "output_type": "execute_result"
569
+ }
570
+ ],
571
+ "source": [
572
+ "import math \n",
573
+ "\n",
574
+ "nums = [1,2,3,4]\n",
575
+ "output = [24,12,8,6]\n",
576
+ "\n",
577
+ "math.prod(nums) \n",
578
+ "\n",
579
+ "output_2 = []\n",
580
+ "for value in nums: \n",
581
+ " print(output_2.append(int(math.prod(nums) * (1.0/value))))\n",
582
+ "\n",
583
+ "output_2"
584
+ ]
585
+ },
586
+ {
587
+ "cell_type": "code",
588
+ "execution_count": 93,
589
+ "metadata": {},
590
+ "outputs": [
591
+ {
592
+ "data": {
593
+ "text/plain": [
594
+ "[1, 3, 4]"
595
+ ]
596
+ },
597
+ "execution_count": 93,
598
+ "metadata": {},
599
+ "output_type": "execute_result"
600
+ }
601
+ ],
602
+ "source": [
603
+ "# could concatenate arrays like this and then add them up? \n",
604
+ "nums[0:1] + nums[2:]\n"
605
+ ]
606
+ },
607
+ {
608
+ "cell_type": "markdown",
609
+ "metadata": {},
610
+ "source": [
611
+ "# MLE Flashcards problem \n",
612
+ "\n",
613
+ "Solve fib(n) using dynamic programming of memoization and tabulation\n"
614
+ ]
615
+ },
616
+ {
617
+ "cell_type": "code",
618
+ "execution_count": 111,
619
+ "metadata": {},
620
+ "outputs": [
621
+ {
622
+ "name": "stdout",
623
+ "output_type": "stream",
624
+ "text": [
625
+ "The 5-th Fibonacci number is: 5\n"
626
+ ]
627
+ }
628
+ ],
629
+ "source": [
630
+ "# def fib_memoized(n, memo={}):\n",
631
+ "# if n == 0 or n == 1:\n",
632
+ "# return n \n",
633
+ " \n",
634
+ "# if n not in memo:\n",
635
+ "# memo[n] = fib_memoized(n-1, memo) + fib_memoized(n-2, memo)\n",
636
+ "\n",
637
+ "# return memo\n",
638
+ " \n",
639
+ "# # memoization = {}\n",
640
+ "# fib_memoized(22)\n",
641
+ "# memoization\n",
642
+ "\n",
643
+ "# this shit is crazyyyy ! \n",
644
+ "def fib_n(n, memo={}):\n",
645
+ " # Check if the result is already memoized\n",
646
+ " if n in memo:\n",
647
+ " return memo[n]\n",
648
+ " \n",
649
+ " # Base cases\n",
650
+ " if n == 0:\n",
651
+ " result = 0\n",
652
+ " elif n == 1:\n",
653
+ " result = 1\n",
654
+ " else:\n",
655
+ " # Recursive calls with memoization\n",
656
+ " result = fib_n(n - 1, memo) + fib_n(n - 2, memo)\n",
657
+ " \n",
658
+ " # Memoize the result before returning\n",
659
+ " memo[n] = result\n",
660
+ " return result\n",
661
+ "\n",
662
+ "# # Example usage\n",
663
+ "n = 5\n",
664
+ "result = fib_n(n)\n",
665
+ "print(f\"The {n}-th Fibonacci number is: {result}\")\n"
666
+ ]
667
+ },
668
+ {
669
+ "cell_type": "code",
670
+ "execution_count": 110,
671
+ "metadata": {},
672
+ "outputs": [
673
+ {
674
+ "name": "stdout",
675
+ "output_type": "stream",
676
+ "text": [
677
+ "The 5-th Fibonacci number is: 5\n"
678
+ ]
679
+ }
680
+ ],
681
+ "source": [
682
+ "def fib_n(n):\n",
683
+ " # Base cases\n",
684
+ " if n == 0:\n",
685
+ " return 0\n",
686
+ " elif n == 1:\n",
687
+ " return 1\n",
688
+ " \n",
689
+ " # Create a table to store Fibonacci numbers\n",
690
+ " fib_table = [0] * (n + 1)\n",
691
+ " \n",
692
+ " # Initialize the base cases\n",
693
+ " fib_table[0] = 0\n",
694
+ " fib_table[1] = 1\n",
695
+ " \n",
696
+ " # Fill in the table using bottom-up approach\n",
697
+ " for i in range(2, n + 1):\n",
698
+ " fib_table[i] = fib_table[i - 1] + fib_table[i - 2]\n",
699
+ " \n",
700
+ " # The result is the value at index n\n",
701
+ " return fib_table[n]\n",
702
+ "\n",
703
+ "# Example usage\n",
704
+ "n = 5\n",
705
+ "result = fib_n(n)\n",
706
+ "print(f\"The {n}-th Fibonacci number is: {result}\")\n"
707
+ ]
708
+ },
709
+ {
710
+ "cell_type": "code",
711
+ "execution_count": 112,
712
+ "metadata": {},
713
+ "outputs": [
714
+ {
715
+ "name": "stdout",
716
+ "output_type": "stream",
717
+ "text": [
718
+ "13 is a prime number.\n"
719
+ ]
720
+ }
721
+ ],
722
+ "source": [
723
+ "def is_prime(n):\n",
724
+ " if n <= 1:\n",
725
+ " return False\n",
726
+ " elif n == 2:\n",
727
+ " return True\n",
728
+ " elif n % 2 == 0:\n",
729
+ " return False\n",
730
+ " else:\n",
731
+ " # Check for factors up to the square root of n\n",
732
+ " for i in range(3, int(n**0.5) + 1, 2):\n",
733
+ " if n % i == 0:\n",
734
+ " return False\n",
735
+ " return True\n",
736
+ "\n",
737
+ "# Example usage\n",
738
+ "number = 13\n",
739
+ "if is_prime(number):\n",
740
+ " print(f\"{number} is a prime number.\")\n",
741
+ "else:\n",
742
+ " print(f\"{number} is not a prime number.\")\n"
743
+ ]
744
+ }
745
+ ],
746
+ "metadata": {
747
+ "kernelspec": {
748
+ "display_name": "pytorch_m1",
749
+ "language": "python",
750
+ "name": "python3"
751
+ },
752
+ "language_info": {
753
+ "codemirror_mode": {
754
+ "name": "ipython",
755
+ "version": 3
756
+ },
757
+ "file_extension": ".py",
758
+ "mimetype": "text/x-python",
759
+ "name": "python",
760
+ "nbconvert_exporter": "python",
761
+ "pygments_lexer": "ipython3",
762
+ "version": "3.8.13"
763
+ },
764
+ "orig_nbformat": 4
765
+ },
766
+ "nbformat": 4,
767
+ "nbformat_minor": 2
768
+ }
code/leetcode/two_sum_grind_75.ipynb ADDED
@@ -0,0 +1,2195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 13,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "text/plain": [
11
+ "[1, 0]"
12
+ ]
13
+ },
14
+ "execution_count": 13,
15
+ "metadata": {},
16
+ "output_type": "execute_result"
17
+ }
18
+ ],
19
+ "source": [
20
+ "from typing import List\n",
21
+ "# best solution according to LEETCODE\n",
22
+ "class Solution:\n",
23
+ " def twoSum(self, nums: List[int], target: int) -> List[int]:\n",
24
+ " hashmap = {}\n",
25
+ " for i in range(len(nums)):\n",
26
+ " complement = target - nums[i]\n",
27
+ " if complement in hashmap:\n",
28
+ " return [i, hashmap[complement]]\n",
29
+ " hashmap[nums[i]] = i\n",
30
+ " \n",
31
+ "Solution().twoSum(case_1, 9)"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 4,
37
+ "metadata": {},
38
+ "outputs": [
39
+ {
40
+ "data": {
41
+ "text/plain": [
42
+ "[1, 0]"
43
+ ]
44
+ },
45
+ "execution_count": 4,
46
+ "metadata": {},
47
+ "output_type": "execute_result"
48
+ }
49
+ ],
50
+ "source": [
51
+ "case_1 = [2,7,11,15]\n",
52
+ "\n",
53
+ "Solution().twoSum(case_1, 9)"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 6,
59
+ "metadata": {},
60
+ "outputs": [
61
+ {
62
+ "data": {
63
+ "text/plain": [
64
+ "[0, 1]"
65
+ ]
66
+ },
67
+ "execution_count": 6,
68
+ "metadata": {},
69
+ "output_type": "execute_result"
70
+ }
71
+ ],
72
+ "source": [
73
+ "class Solution:\n",
74
+ " def twoSum(self, nums: List[int], target: int) -> List[int]:\n",
75
+ " for i in range(len(nums)):\n",
76
+ " for j in range(i + 1, len(nums)):\n",
77
+ " if nums[j] == target - nums[i]:\n",
78
+ " return [i, j]\n",
79
+ "\n",
80
+ "\n",
81
+ "Solution().twoSum(case_1, 9)"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": 11,
87
+ "metadata": {},
88
+ "outputs": [
89
+ {
90
+ "name": "stdout",
91
+ "output_type": "stream",
92
+ "text": [
93
+ "[2, 7, 11, 15]\n",
94
+ "7\n",
95
+ "2\n"
96
+ ]
97
+ },
98
+ {
99
+ "data": {
100
+ "text/plain": [
101
+ "[0, 1]"
102
+ ]
103
+ },
104
+ "execution_count": 11,
105
+ "metadata": {},
106
+ "output_type": "execute_result"
107
+ }
108
+ ],
109
+ "source": [
110
+ "class Solution:\n",
111
+ " def twoSum(self, nums, target):\n",
112
+ " \"\"\"\n",
113
+ " :type nums: List[int]\n",
114
+ " :type target: int\n",
115
+ " :rtype: List[int]\n",
116
+ " \"\"\"\n",
117
+ " print(nums)\n",
118
+ " hashmap = {}\n",
119
+ " for i, num in enumerate(nums):\n",
120
+ " n = target - num\n",
121
+ " print(n)\n",
122
+ " if n not in hashmap:\n",
123
+ " hashmap[num] = i\n",
124
+ " else:\n",
125
+ " return [hashmap[n], i]\n",
126
+ "\n",
127
+ "Solution().twoSum(case_1, 9)"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 15,
133
+ "metadata": {},
134
+ "outputs": [
135
+ {
136
+ "name": "stdout",
137
+ "output_type": "stream",
138
+ "text": [
139
+ " 1\n",
140
+ " 1 1\n",
141
+ " 1 2 1\n",
142
+ " 1 3 3 1\n",
143
+ " 1 4 6 4 1\n",
144
+ " 1 6 1 0 5 1\n"
145
+ ]
146
+ }
147
+ ],
148
+ "source": [
149
+ "# Print Pascal's Triangle in Python\n",
150
+ "\n",
151
+ "# input n\n",
152
+ "n = 6\n",
153
+ "\n",
154
+ "# iterarte upto n\n",
155
+ "for i in range(n):\n",
156
+ "\t# adjust space\n",
157
+ "\tprint(' '*(n-i), end='')\n",
158
+ "\n",
159
+ "\t# compute power of 11\n",
160
+ "\tprint(' '.join(map(str, str(11**i))))\n"
161
+ ]
162
+ },
163
+ {
164
+ "cell_type": "code",
165
+ "execution_count": 16,
166
+ "metadata": {},
167
+ "outputs": [
168
+ {
169
+ "name": "stdout",
170
+ "output_type": "stream",
171
+ "text": [
172
+ "1, 5, 10, 10, 5, 1"
173
+ ]
174
+ }
175
+ ],
176
+ "source": [
177
+ "# Python3 program to implement the above approach\n",
178
+ "\n",
179
+ "# Print the N-th row of the\n",
180
+ "# Pascal's Triangle\n",
181
+ "def generateNthRow (N):\n",
182
+ "\n",
183
+ "\t# nC0 = 1\n",
184
+ "\tprev = 1\n",
185
+ "\tprint(prev, end = '')\n",
186
+ "\n",
187
+ "\tfor i in range(1, N + 1):\n",
188
+ "\n",
189
+ "\t\t# nCr = (nCr-1 * (n - r + 1))/r\n",
190
+ "\t\tcurr = (prev * (N - i + 1)) // i\n",
191
+ "\t\tprint(\",\", curr, end = '')\n",
192
+ "\t\tprev = curr\n",
193
+ "\n",
194
+ "# Driver code\n",
195
+ "N = 5\n",
196
+ "\n",
197
+ "# Function calling\n",
198
+ "generateNthRow(N)\n",
199
+ "\n",
200
+ "# This code is contributed by himanshu77\n"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 21,
206
+ "metadata": {},
207
+ "outputs": [
208
+ {
209
+ "data": {
210
+ "text/plain": [
211
+ "22"
212
+ ]
213
+ },
214
+ "execution_count": 21,
215
+ "metadata": {},
216
+ "output_type": "execute_result"
217
+ }
218
+ ],
219
+ "source": [
220
+ "\"foo\".__hash__()\n",
221
+ "\n",
222
+ "bar = 22.0\n",
223
+ "\n",
224
+ "bar.__hash__()"
225
+ ]
226
+ },
227
+ {
228
+ "cell_type": "code",
229
+ "execution_count": 22,
230
+ "metadata": {},
231
+ "outputs": [
232
+ {
233
+ "name": "stdout",
234
+ "output_type": "stream",
235
+ "text": [
236
+ "[][][][][][][][][][][][][][][][][][][][][][][][][][('gfg@example.com', 'some value')][][][][][][][][][][][][][][][][][][][][][][][][]\n",
237
+ "\n",
238
+ "[][][][][][][][][][][][][][][][][][('portal@example.com', 'some other value')][][][][][][][][('gfg@example.com', 'some value')][][][][][][][][][][][][][][][][][][][][][][][][]\n",
239
+ "\n",
240
+ "some other value\n",
241
+ "\n",
242
+ "[][][][][][][][][][][][][][][][][][][][][][][][][][('gfg@example.com', 'some value')][][][][][][][][][][][][][][][][][][][][][][][][]\n"
243
+ ]
244
+ }
245
+ ],
246
+ "source": [
247
+ "class HashTable:\n",
248
+ "\n",
249
+ "\t# Create empty bucket list of given size\n",
250
+ "\tdef __init__(self, size):\n",
251
+ "\t\tself.size = size\n",
252
+ "\t\tself.hash_table = self.create_buckets()\n",
253
+ "\n",
254
+ "\tdef create_buckets(self):\n",
255
+ "\t\treturn [[] for _ in range(self.size)]\n",
256
+ "\n",
257
+ "\t# Insert values into hash map\n",
258
+ "\tdef set_val(self, key, val):\n",
259
+ "\t\t\n",
260
+ "\t\t# Get the index from the key\n",
261
+ "\t\t# using hash function\n",
262
+ "\t\thashed_key = hash(key) % self.size\n",
263
+ "\t\t\n",
264
+ "\t\t# Get the bucket corresponding to index\n",
265
+ "\t\tbucket = self.hash_table[hashed_key]\n",
266
+ "\n",
267
+ "\t\tfound_key = False\n",
268
+ "\t\tfor index, record in enumerate(bucket):\n",
269
+ "\t\t\trecord_key, record_val = record\n",
270
+ "\t\t\t\n",
271
+ "\t\t\t# check if the bucket has same key as\n",
272
+ "\t\t\t# the key to be inserted\n",
273
+ "\t\t\tif record_key == key:\n",
274
+ "\t\t\t\tfound_key = True\n",
275
+ "\t\t\t\tbreak\n",
276
+ "\n",
277
+ "\t\t# If the bucket has same key as the key to be inserted,\n",
278
+ "\t\t# Update the key value\n",
279
+ "\t\t# Otherwise append the new key-value pair to the bucket\n",
280
+ "\t\tif found_key:\n",
281
+ "\t\t\tbucket[index] = (key, val)\n",
282
+ "\t\telse:\n",
283
+ "\t\t\tbucket.append((key, val))\n",
284
+ "\n",
285
+ "\t# Return searched value with specific key\n",
286
+ "\tdef get_val(self, key):\n",
287
+ "\t\t\n",
288
+ "\t\t# Get the index from the key using\n",
289
+ "\t\t# hash function\n",
290
+ "\t\thashed_key = hash(key) % self.size\n",
291
+ "\t\t\n",
292
+ "\t\t# Get the bucket corresponding to index\n",
293
+ "\t\tbucket = self.hash_table[hashed_key]\n",
294
+ "\n",
295
+ "\t\tfound_key = False\n",
296
+ "\t\tfor index, record in enumerate(bucket):\n",
297
+ "\t\t\trecord_key, record_val = record\n",
298
+ "\t\t\t\n",
299
+ "\t\t\t# check if the bucket has same key as\n",
300
+ "\t\t\t# the key being searched\n",
301
+ "\t\t\tif record_key == key:\n",
302
+ "\t\t\t\tfound_key = True\n",
303
+ "\t\t\t\tbreak\n",
304
+ "\n",
305
+ "\t\t# If the bucket has same key as the key being searched,\n",
306
+ "\t\t# Return the value found\n",
307
+ "\t\t# Otherwise indicate there was no record found\n",
308
+ "\t\tif found_key:\n",
309
+ "\t\t\treturn record_val\n",
310
+ "\t\telse:\n",
311
+ "\t\t\treturn \"No record found\"\n",
312
+ "\n",
313
+ "\t# Remove a value with specific key\n",
314
+ "\tdef delete_val(self, key):\n",
315
+ "\t\t\n",
316
+ "\t\t# Get the index from the key using\n",
317
+ "\t\t# hash function\n",
318
+ "\t\thashed_key = hash(key) % self.size\n",
319
+ "\t\t\n",
320
+ "\t\t# Get the bucket corresponding to index\n",
321
+ "\t\tbucket = self.hash_table[hashed_key]\n",
322
+ "\n",
323
+ "\t\tfound_key = False\n",
324
+ "\t\tfor index, record in enumerate(bucket):\n",
325
+ "\t\t\trecord_key, record_val = record\n",
326
+ "\t\t\t\n",
327
+ "\t\t\t# check if the bucket has same key as\n",
328
+ "\t\t\t# the key to be deleted\n",
329
+ "\t\t\tif record_key == key:\n",
330
+ "\t\t\t\tfound_key = True\n",
331
+ "\t\t\t\tbreak\n",
332
+ "\t\tif found_key:\n",
333
+ "\t\t\tbucket.pop(index)\n",
334
+ "\t\treturn\n",
335
+ "\n",
336
+ "\t# To print the items of hash map\n",
337
+ "\tdef __str__(self):\n",
338
+ "\t\treturn \"\".join(str(item) for item in self.hash_table)\n",
339
+ "\n",
340
+ "\n",
341
+ "hash_table = HashTable(50)\n",
342
+ "\n",
343
+ "# insert some values\n",
344
+ "hash_table.set_val('gfg@example.com', 'some value')\n",
345
+ "print(hash_table)\n",
346
+ "print()\n",
347
+ "\n",
348
+ "hash_table.set_val('portal@example.com', 'some other value')\n",
349
+ "print(hash_table)\n",
350
+ "print()\n",
351
+ "\n",
352
+ "# search/access a record with key\n",
353
+ "print(hash_table.get_val('portal@example.com'))\n",
354
+ "print()\n",
355
+ "\n",
356
+ "# delete or remove a value\n",
357
+ "hash_table.delete_val('portal@example.com')\n",
358
+ "print(hash_table)\n"
359
+ ]
360
+ },
361
+ {
362
+ "cell_type": "markdown",
363
+ "metadata": {},
364
+ "source": [
365
+ "# Longest Substring Without Repeating Characters (medium) -- blind 75 \n",
366
+ "\n",
367
+ "Given a string s, find the length of the longest\n",
368
+ "substring\n",
369
+ "without repeating characters.\n",
370
+ "\n",
371
+ "Input: s = \"abcabcbb\"\n",
372
+ "Output: 3\n",
373
+ "Explanation: The answer is \"abc\", with the length of 3."
374
+ ]
375
+ },
376
+ {
377
+ "cell_type": "code",
378
+ "execution_count": 25,
379
+ "metadata": {},
380
+ "outputs": [
381
+ {
382
+ "data": {
383
+ "text/plain": [
384
+ "3"
385
+ ]
386
+ },
387
+ "execution_count": 25,
388
+ "metadata": {},
389
+ "output_type": "execute_result"
390
+ }
391
+ ],
392
+ "source": [
393
+ "class Solution:\n",
394
+ " def lengthOfLongestSubstring(self, s: str) -> int:\n",
395
+ " def check(start, end):\n",
396
+ " chars = set()\n",
397
+ " for i in range(start, end + 1):\n",
398
+ " c = s[i]\n",
399
+ " if c in chars:\n",
400
+ " return False\n",
401
+ " chars.add(c)\n",
402
+ " return True\n",
403
+ "\n",
404
+ " n = len(s)\n",
405
+ "\n",
406
+ " res = 0\n",
407
+ " for i in range(n):\n",
408
+ " for j in range(i, n):\n",
409
+ " if check(i, j):\n",
410
+ " res = max(res, j - i + 1)\n",
411
+ " return res\n",
412
+ "\n",
413
+ "s = \"abcabcbb\"\n",
414
+ "\n",
415
+ "Solution().lengthOfLongestSubstring(s)"
416
+ ]
417
+ },
418
+ {
419
+ "cell_type": "code",
420
+ "execution_count": 27,
421
+ "metadata": {},
422
+ "outputs": [
423
+ {
424
+ "data": {
425
+ "text/plain": [
426
+ "3"
427
+ ]
428
+ },
429
+ "execution_count": 27,
430
+ "metadata": {},
431
+ "output_type": "execute_result"
432
+ }
433
+ ],
434
+ "source": [
435
+ "class Solution:\n",
436
+ " def lengthOfLongestSubstring(self, s: str) -> int:\n",
437
+ " n = len(s)\n",
438
+ " ans = 0\n",
439
+ " # mp stores the current index of a character\n",
440
+ " mp = {}\n",
441
+ "\n",
442
+ " i = 0\n",
443
+ " # try to extend the range [i, j]\n",
444
+ " for j in range(n):\n",
445
+ " if s[j] in mp:\n",
446
+ " i = max(mp[s[j]], i)\n",
447
+ "\n",
448
+ " ans = max(ans, j - i + 1)\n",
449
+ " mp[s[j]] = j + 1\n",
450
+ "\n",
451
+ " return ans\n",
452
+ "\n",
453
+ "s = \"abcabcbb\"\n",
454
+ "\n",
455
+ "Solution().lengthOfLongestSubstring(s)"
456
+ ]
457
+ },
458
+ {
459
+ "cell_type": "markdown",
460
+ "metadata": {},
461
+ "source": [
462
+ "# 5. Longest Palindromic Substring\n",
463
+ "\n",
464
+ "Given a string s, return the longest palindromic substring in s.\n",
465
+ "\n",
466
+ "Input: s = \"babad\"\n",
467
+ "Output: \"bab\"\n",
468
+ "Explanation: \"aba\" is also a valid answer."
469
+ ]
470
+ },
471
+ {
472
+ "cell_type": "code",
473
+ "execution_count": null,
474
+ "metadata": {},
475
+ "outputs": [],
476
+ "source": [
477
+ "class Solution:\n",
478
+ " def longestPalindrome(self, s: str) -> str:\n",
479
+ " m = '' # Memory to remember a palindrome\n",
480
+ " for i in range(len(s)): # i = start, O = n\n",
481
+ " for j in range(len(s), i, -1): # j = end, O = n^2\n",
482
+ " if len(m) >= j-i: # To reduce time\n",
483
+ " break\n",
484
+ " elif s[i:j] == s[i:j][::-1]:\n",
485
+ " m = s[i:j]\n",
486
+ " break\n",
487
+ " return m"
488
+ ]
489
+ },
490
+ {
491
+ "cell_type": "markdown",
492
+ "metadata": {},
493
+ "source": [
494
+ "# 11. Container With Most Water \n",
495
+ "\n",
496
+ "You are given an integer array height of length n. There are n vertical lines drawn such that the two endpoints of the ith line are (i, 0) and (i, height[i]).\n",
497
+ "\n",
498
+ "Find two lines that together with the x-axis form a container, such that the container contains the most water.\n",
499
+ "\n",
500
+ "Return the maximum amount of water a container can store.\n",
501
+ "\n",
502
+ "Notice that you may not slant the container.\n"
503
+ ]
504
+ },
505
+ {
506
+ "cell_type": "code",
507
+ "execution_count": 29,
508
+ "metadata": {},
509
+ "outputs": [
510
+ {
511
+ "data": {
512
+ "text/plain": [
513
+ "49"
514
+ ]
515
+ },
516
+ "execution_count": 29,
517
+ "metadata": {},
518
+ "output_type": "execute_result"
519
+ }
520
+ ],
521
+ "source": [
522
+ "class Solution:\n",
523
+ " def maxArea(self, height: List[int]) -> int:\n",
524
+ " maxarea = 0\n",
525
+ " left = 0\n",
526
+ " right = len(height) - 1\n",
527
+ " \n",
528
+ " while left < right:\n",
529
+ " width = right - left\n",
530
+ " maxarea = max(maxarea, min(height[left], height[right]) * width)\n",
531
+ " if height[left] <= height[right]:\n",
532
+ " left += 1\n",
533
+ " else:\n",
534
+ " right -= 1\n",
535
+ " \n",
536
+ " return maxarea\n",
537
+ "\n",
538
+ "height = [1,8,6,2,5,4,8,3,7]\n",
539
+ "\n",
540
+ "Solution().maxArea(height)"
541
+ ]
542
+ },
543
+ {
544
+ "cell_type": "markdown",
545
+ "metadata": {},
546
+ "source": [
547
+ "# 15. 3Sum \n",
548
+ "\n",
549
+ "Given an integer array nums, return all the triplets [nums[i], nums[j], nums[k]] such that i != j, i != k, and j != k, and nums[i] + nums[j] + nums[k] == 0.\n",
550
+ "\n",
551
+ "Notice that the solution set must not contain duplicate triplets.\n"
552
+ ]
553
+ },
554
+ {
555
+ "cell_type": "code",
556
+ "execution_count": 32,
557
+ "metadata": {},
558
+ "outputs": [
559
+ {
560
+ "data": {
561
+ "text/plain": [
562
+ "[(-1, -1, 2), (-1, 0, 1)]"
563
+ ]
564
+ },
565
+ "execution_count": 32,
566
+ "metadata": {},
567
+ "output_type": "execute_result"
568
+ }
569
+ ],
570
+ "source": [
571
+ "class Solution:\n",
572
+ " def threeSum(self, nums):\n",
573
+ " res = []\n",
574
+ " nums.sort()\n",
575
+ " for i in range(len(nums)-2):\n",
576
+ " if i > 0 and nums[i] == nums[i-1]:\n",
577
+ " continue\n",
578
+ " l, r = i+1, len(nums)-1\n",
579
+ " while l < r:\n",
580
+ " s = nums[i] + nums[l] + nums[r]\n",
581
+ " if s < 0:\n",
582
+ " l +=1 \n",
583
+ " elif s > 0:\n",
584
+ " r -= 1\n",
585
+ " else:\n",
586
+ " res.append((nums[i], nums[l], nums[r]))\n",
587
+ " while l < r and nums[l] == nums[l+1]:\n",
588
+ " l += 1\n",
589
+ " while l < r and nums[r] == nums[r-1]:\n",
590
+ " r -= 1\n",
591
+ " l += 1; r -= 1\n",
592
+ " return res\n",
593
+ "\n",
594
+ "nums = [-1,0,1,2,-1,-4]\n",
595
+ "\n",
596
+ "Solution().threeSum(nums)"
597
+ ]
598
+ },
599
+ {
600
+ "cell_type": "markdown",
601
+ "metadata": {},
602
+ "source": [
603
+ "# 20. Valid Parenthesis \n",
604
+ "\n",
605
+ "Given a string s containing just the characters '(', ')', '{', '}', '[' and ']', determine if the input string is valid.\n",
606
+ "\n",
607
+ "An input string is valid if:\n",
608
+ "\n",
609
+ " Open brackets must be closed by the same type of brackets.\n",
610
+ " Open brackets must be closed in the correct order.\n",
611
+ " Every close bracket has a corresponding open bracket of the same type.\n"
612
+ ]
613
+ },
614
+ {
615
+ "cell_type": "code",
616
+ "execution_count": 118,
617
+ "metadata": {},
618
+ "outputs": [
619
+ {
620
+ "data": {
621
+ "text/plain": [
622
+ "False"
623
+ ]
624
+ },
625
+ "execution_count": 118,
626
+ "metadata": {},
627
+ "output_type": "execute_result"
628
+ }
629
+ ],
630
+ "source": [
631
+ "class Solution:\n",
632
+ " # @return a boolean\n",
633
+ " def isValid(self, s):\n",
634
+ " stack = []\n",
635
+ " dict = {\"]\":\"[\", \"}\":\"{\", \")\":\"(\"}\n",
636
+ " for char in s:\n",
637
+ " if char in dict.values():\n",
638
+ " stack.append(char)\n",
639
+ " elif char in dict.keys():\n",
640
+ " if stack == [] or dict[char] != stack.pop():\n",
641
+ " return False\n",
642
+ " else:\n",
643
+ " return False\n",
644
+ " return stack == []\n",
645
+ "\n",
646
+ "\n",
647
+ "s = \"()[]{}\"\n",
648
+ "s = \"()[]{}\"\n",
649
+ "Solution().isValid(s)\n"
650
+ ]
651
+ },
652
+ {
653
+ "cell_type": "markdown",
654
+ "metadata": {},
655
+ "source": [
656
+ "# 21. Merge Two Sorted List \n",
657
+ "\n",
658
+ "You are given the heads of two sorted linked lists list1 and list2.\n",
659
+ "\n",
660
+ "Merge the two lists in a one sorted list. The list should be made by splicing together the nodes of the first two lists.\n",
661
+ "\n",
662
+ "Return the head of the merged linked list."
663
+ ]
664
+ },
665
+ {
666
+ "cell_type": "code",
667
+ "execution_count": 37,
668
+ "metadata": {},
669
+ "outputs": [],
670
+ "source": [
671
+ "from typing import Optional\n",
672
+ "\n",
673
+ "class ListNode(object):\n",
674
+ " def __init__(self, x):\n",
675
+ " self.val = x\n",
676
+ " self.next = None\n",
677
+ "\n",
678
+ "\n",
679
+ "class Solution:\n",
680
+ " def mergeTwoLists(self, list1: Optional[ListNode], list2: Optional[ListNode]) -> Optional[ListNode]:\n",
681
+ " cur = dummy = ListNode()\n",
682
+ " while list1 and list2: \n",
683
+ " if list1.val < list2.val:\n",
684
+ " cur.next = list1\n",
685
+ " list1, cur = list1.next, list1\n",
686
+ " else:\n",
687
+ " cur.next = list2\n",
688
+ " list2, cur = list2.next, list2\n",
689
+ " \n",
690
+ " if list1 or list2:\n",
691
+ " cur.next = list1 if list1 else list2\n",
692
+ " \n",
693
+ " return dummy.next"
694
+ ]
695
+ },
696
+ {
697
+ "cell_type": "markdown",
698
+ "metadata": {},
699
+ "source": [
700
+ "# 23. Merge k Sorted Lists (hard)\n",
701
+ "\n",
702
+ "You are given an array of k linked-lists lists, each linked-list is sorted in ascending order.\n",
703
+ "\n",
704
+ "Merge all the linked-lists into one sorted linked-list and return it.\n"
705
+ ]
706
+ },
707
+ {
708
+ "cell_type": "code",
709
+ "execution_count": 39,
710
+ "metadata": {},
711
+ "outputs": [],
712
+ "source": [
713
+ "# brute force \n",
714
+ "\n",
715
+ "class Solution(object):\n",
716
+ " def mergeKLists(self, lists):\n",
717
+ " \"\"\"\n",
718
+ " :type lists: List[ListNode]\n",
719
+ " :rtype: ListNode\n",
720
+ " \"\"\"\n",
721
+ " self.nodes = []\n",
722
+ " head = point = ListNode(0)\n",
723
+ " for l in lists:\n",
724
+ " while l:\n",
725
+ " self.nodes.append(l.val)\n",
726
+ " l = l.next\n",
727
+ " for x in sorted(self.nodes):\n",
728
+ " point.next = ListNode(x)\n",
729
+ " point = point.next\n",
730
+ " return head.next"
731
+ ]
732
+ },
733
+ {
734
+ "cell_type": "code",
735
+ "execution_count": 38,
736
+ "metadata": {},
737
+ "outputs": [
738
+ {
739
+ "ename": "ModuleNotFoundError",
740
+ "evalue": "No module named 'Queue'",
741
+ "output_type": "error",
742
+ "traceback": [
743
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
744
+ "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
745
+ "Cell \u001b[0;32mIn[38], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mQueue\u001b[39;00m \u001b[39mimport\u001b[39;00m PriorityQueue\n\u001b[1;32m 3\u001b[0m \u001b[39mclass\u001b[39;00m \u001b[39mSolution\u001b[39;00m(\u001b[39mobject\u001b[39m):\n\u001b[1;32m 4\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mmergeKLists\u001b[39m(\u001b[39mself\u001b[39m, lists):\n",
746
+ "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'Queue'"
747
+ ]
748
+ }
749
+ ],
750
+ "source": [
751
+ "from Queue import PriorityQueue\n",
752
+ "\n",
753
+ "class Solution(object):\n",
754
+ " def mergeKLists(self, lists):\n",
755
+ " \"\"\"\n",
756
+ " :type lists: List[ListNode]\n",
757
+ " :rtype: ListNode\n",
758
+ " \"\"\"\n",
759
+ " head = point = ListNode(0)\n",
760
+ " q = PriorityQueue()\n",
761
+ " for l in lists:\n",
762
+ " if l:\n",
763
+ " q.put((l.val, l))\n",
764
+ " while not q.empty():\n",
765
+ " val, node = q.get()\n",
766
+ " point.next = ListNode(val)\n",
767
+ " point = point.next\n",
768
+ " node = node.next\n",
769
+ " if node:\n",
770
+ " q.put((node.val, node))\n",
771
+ " return head.next\n",
772
+ "\n"
773
+ ]
774
+ },
775
+ {
776
+ "cell_type": "markdown",
777
+ "metadata": {},
778
+ "source": [
779
+ "# 33. Search in Rotated Sorted Array (medium)\n",
780
+ "\n",
781
+ "There is an integer array nums sorted in ascending order (with distinct values).\n",
782
+ "\n",
783
+ "Prior to being passed to your function, nums is possibly rotated at an unknown pivot index k (1 <= k < nums.length) such that the resulting array is [nums[k], nums[k+1], ..., nums[n-1], nums[0], nums[1], ..., nums[k-1]] (0-indexed). For example, [0,1,2,4,5,6,7] might be rotated at pivot index 3 and become [4,5,6,7,0,1,2].\n",
784
+ "\n",
785
+ "Given the array nums after the possible rotation and an integer target, return the index of target if it is in nums, or -1 if it is not in nums.\n",
786
+ "\n",
787
+ "You must write an algorithm with O(log n) runtime complexity."
788
+ ]
789
+ },
790
+ {
791
+ "cell_type": "code",
792
+ "execution_count": 41,
793
+ "metadata": {},
794
+ "outputs": [
795
+ {
796
+ "data": {
797
+ "text/plain": [
798
+ "4"
799
+ ]
800
+ },
801
+ "execution_count": 41,
802
+ "metadata": {},
803
+ "output_type": "execute_result"
804
+ }
805
+ ],
806
+ "source": [
807
+ "class Solution:\n",
808
+ " def search(self, A: List[int], target: int) -> int:\n",
809
+ " n = len(A)\n",
810
+ " left, right = 0, n - 1\n",
811
+ " if n == 0: return -1\n",
812
+ " \n",
813
+ " while left <= right:\n",
814
+ " mid = left + (right - left) // 2\n",
815
+ " if A[mid] == target: return mid\n",
816
+ " \n",
817
+ " # inflection point to the right. Left is strictly increasing\n",
818
+ " if A[mid] >= A[left]:\n",
819
+ " if A[left] <= target < A[mid]:\n",
820
+ " right = mid - 1\n",
821
+ " else:\n",
822
+ " left = mid + 1\n",
823
+ " \n",
824
+ " # inflection point to the left of me. Right is strictly increasing\n",
825
+ " else:\n",
826
+ " if A[mid] < target <= A[right]:\n",
827
+ " left = mid + 1\n",
828
+ " else:\n",
829
+ " right = mid - 1\n",
830
+ " \n",
831
+ " return -1\n",
832
+ "\n",
833
+ "A = [4,5,6,7,0,1,2]\n",
834
+ "target = 0 \n",
835
+ "\n",
836
+ "# return index of pivot \n",
837
+ "Solution().search(A, target)"
838
+ ]
839
+ },
840
+ {
841
+ "cell_type": "markdown",
842
+ "metadata": {},
843
+ "source": [
844
+ "# 39. Combination Sum (medium)\n",
845
+ "\n",
846
+ "Given an array of distinct integers candidates and a target integer target, return a list of all unique combinations of candidates where the chosen numbers sum to target. You may return the combinations in any order.\n",
847
+ "\n",
848
+ "The same number may be chosen from candidates an unlimited number of times. Two combinations are unique if the\n",
849
+ "frequency\n",
850
+ "of at least one of the chosen numbers is different.\n",
851
+ "\n",
852
+ "The test cases are generated such that the number of unique combinations that sum up to target is less than 150 combinations for the given input.\n"
853
+ ]
854
+ },
855
+ {
856
+ "cell_type": "code",
857
+ "execution_count": 42,
858
+ "metadata": {},
859
+ "outputs": [
860
+ {
861
+ "data": {
862
+ "text/plain": [
863
+ "[[2, 2, 3], [7]]"
864
+ ]
865
+ },
866
+ "execution_count": 42,
867
+ "metadata": {},
868
+ "output_type": "execute_result"
869
+ }
870
+ ],
871
+ "source": [
872
+ "class Solution(object):\n",
873
+ " def combinationSum(self, candidates, target):\n",
874
+ " ret = []\n",
875
+ " self.dfs(candidates, target, [], ret)\n",
876
+ " return ret\n",
877
+ " \n",
878
+ " def dfs(self, nums, target, path, ret):\n",
879
+ " if target < 0:\n",
880
+ " return \n",
881
+ " if target == 0:\n",
882
+ " ret.append(path)\n",
883
+ " return \n",
884
+ " for i in range(len(nums)):\n",
885
+ " self.dfs(nums[i:], target-nums[i], path+[nums[i]], ret)\n",
886
+ "\n",
887
+ "candidates = [2,3,6,7]\n",
888
+ "targets = 7 \n",
889
+ "\n",
890
+ "Solution().combinationSum(candidates, targets)\n"
891
+ ]
892
+ },
893
+ {
894
+ "cell_type": "markdown",
895
+ "metadata": {},
896
+ "source": [
897
+ "# 48. Rotate Image (medium)\n",
898
+ "\n",
899
+ "You are given an n x n 2D matrix representing an image, rotate the image by 90 degrees (clockwise).\n",
900
+ "\n",
901
+ "You have to rotate the image in-place, which means you have to modify the input 2D matrix directly. DO NOT allocate another 2D matrix and do the rotation.\n",
902
+ "\n",
903
+ "\n",
904
+ "\n",
905
+ "**Bonus Question:** If you're not too confident with matrices and linear algebra, get some more practice by also coding a method that transposes the matrix on the other diagonal, and another that reflects from top to bottom. You can test your functions by printing out the matrix before and after each operation. Finally, use your functions to find three more solutions to this problem. Each solution uses two matrix operations.\n",
906
+ "\n",
907
+ "\n",
908
+ "**Interview Tip:** Terrified of being asked this question in an interview? Many people are: it can be intimidating due to the fiddly logic. Unfortunately, if you do a lot of interviewing, the probability of seeing it at least once is high, and some people have claimed to have seen it multiple times! This is one of the few questions where I recommend practicing until you can confidently code it and explain it without thinking too much.\n"
909
+ ]
910
+ },
911
+ {
912
+ "cell_type": "code",
913
+ "execution_count": null,
914
+ "metadata": {},
915
+ "outputs": [],
916
+ "source": [
917
+ "class Solution:\n",
918
+ " def rotate(self, matrix: List[List[int]]) -> None:\n",
919
+ " self.transpose(matrix)\n",
920
+ " self.reflect(matrix)\n",
921
+ " \n",
922
+ " def transpose(self, matrix):\n",
923
+ " n = len(matrix)\n",
924
+ " for i in range(n):\n",
925
+ " for j in range(i + 1, n):\n",
926
+ " matrix[j][i], matrix[i][j] = matrix[i][j], matrix[j][i]\n",
927
+ "\n",
928
+ " def reflect(self, matrix):\n",
929
+ " n = len(matrix)\n",
930
+ " for i in range(n):\n",
931
+ " for j in range(n // 2):\n",
932
+ " matrix[i][j], matrix[i][-j - 1] = matrix[i][-j - 1], matrix[i][j]\n",
933
+ "\n",
934
+ "matrix = [[1,2,3],[4,5,6],[7,8,9]]\n",
935
+ "\n"
936
+ ]
937
+ },
938
+ {
939
+ "cell_type": "markdown",
940
+ "metadata": {},
941
+ "source": [
942
+ "49. Group Anagrams\n",
943
+ "\n",
944
+ "Given an array of strings strs, group the anagrams together. You can return the answer in any order.\n",
945
+ "\n",
946
+ "An Anagram is a word or phrase formed by rearranging the letters of a different word or phrase, typically using all the original letters exactly once.\n",
947
+ "\n"
948
+ ]
949
+ },
950
+ {
951
+ "cell_type": "code",
952
+ "execution_count": 45,
953
+ "metadata": {},
954
+ "outputs": [
955
+ {
956
+ "data": {
957
+ "text/plain": [
958
+ "dict_values([['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']])"
959
+ ]
960
+ },
961
+ "execution_count": 45,
962
+ "metadata": {},
963
+ "output_type": "execute_result"
964
+ }
965
+ ],
966
+ "source": [
967
+ "import collections\n",
968
+ "\n",
969
+ "class Solution:\n",
970
+ " def groupAnagrams(self, strs):\n",
971
+ " ans = collections.defaultdict(list)\n",
972
+ " for s in strs:\n",
973
+ " count = [0] * 26\n",
974
+ " for c in s:\n",
975
+ " count[ord(c) - ord('a')] += 1\n",
976
+ " ans[tuple(count)].append(s)\n",
977
+ " return ans.values()\n",
978
+ "\n",
979
+ "strs = [\"eat\",\"tea\",\"tan\",\"ate\",\"nat\",\"bat\"]\n",
980
+ "\n",
981
+ "Solution().groupAnagrams(strs)\n"
982
+ ]
983
+ },
984
+ {
985
+ "cell_type": "markdown",
986
+ "metadata": {},
987
+ "source": [
988
+ "# 53. Maximum Subarray \n",
989
+ "iven an integer array nums, find the\n",
990
+ "subarray\n",
991
+ "which has the largest sum and return its sum.\n",
992
+ "\n",
993
+ "\n"
994
+ ]
995
+ },
996
+ {
997
+ "cell_type": "code",
998
+ "execution_count": null,
999
+ "metadata": {},
1000
+ "outputs": [],
1001
+ "source": [
1002
+ "# kadane algo \n",
1003
+ "for i in range(1, len(nums)):\n",
1004
+ " if nums[i-1] > 0:\n",
1005
+ " nums[i] += nums[i-1]\n",
1006
+ " return max(nums)\n",
1007
+ "\n",
1008
+ "\n",
1009
+ "from itertools import accumulate\n",
1010
+ "\n",
1011
+ "return max(accumulate(nums, lambda x, y: x+y if x > 0 else y))"
1012
+ ]
1013
+ },
1014
+ {
1015
+ "cell_type": "markdown",
1016
+ "metadata": {},
1017
+ "source": [
1018
+ "# spiral matrix\n",
1019
+ "\n",
1020
+ "Given an m x n matrix, return all elements of the matrix in spiral order.\n",
1021
+ "\n",
1022
+ "matrix = [[1,2,3],[4,5,6],[7,8,9]]\n",
1023
+ "output [1,2,3,6,9,8,7,4,5]\n"
1024
+ ]
1025
+ },
1026
+ {
1027
+ "cell_type": "code",
1028
+ "execution_count": 46,
1029
+ "metadata": {},
1030
+ "outputs": [],
1031
+ "source": [
1032
+ "class Solution:\n",
1033
+ " def spiralOrder(self, matrix: List[List[int]]) -> List[int]:\n",
1034
+ " res = []\n",
1035
+ " if len(matrix) == 0:\n",
1036
+ " return res\n",
1037
+ " row_begin = 0\n",
1038
+ " col_begin = 0\n",
1039
+ " row_end = len(matrix)-1 \n",
1040
+ " col_end = len(matrix[0])-1\n",
1041
+ " while (row_begin <= row_end and col_begin <= col_end):\n",
1042
+ " for i in range(col_begin,col_end+1):\n",
1043
+ " res.append(matrix[row_begin][i])\n",
1044
+ " row_begin += 1\n",
1045
+ " for i in range(row_begin,row_end+1):\n",
1046
+ " res.append(matrix[i][col_end])\n",
1047
+ " col_end -= 1\n",
1048
+ " if (row_begin <= row_end):\n",
1049
+ " for i in range(col_end,col_begin-1,-1):\n",
1050
+ " res.append(matrix[row_end][i])\n",
1051
+ " row_end -= 1\n",
1052
+ " if (col_begin <= col_end):\n",
1053
+ " for i in range(row_end,row_begin-1,-1):\n",
1054
+ " res.append(matrix[i][col_begin])\n",
1055
+ " col_begin += 1\n",
1056
+ " return res\n",
1057
+ " \n",
1058
+ " \n",
1059
+ " "
1060
+ ]
1061
+ },
1062
+ {
1063
+ "cell_type": "markdown",
1064
+ "metadata": {},
1065
+ "source": [
1066
+ "# Jump Game (medium) \n",
1067
+ "\n",
1068
+ "You are given an integer array nums. You are initially positioned at the array's first index, and each element in the array represents your maximum jump length at that position.\n",
1069
+ "\n",
1070
+ "Return true if you can reach the last index, or false otherwise."
1071
+ ]
1072
+ },
1073
+ {
1074
+ "cell_type": "code",
1075
+ "execution_count": 50,
1076
+ "metadata": {},
1077
+ "outputs": [
1078
+ {
1079
+ "data": {
1080
+ "text/plain": [
1081
+ "False"
1082
+ ]
1083
+ },
1084
+ "execution_count": 50,
1085
+ "metadata": {},
1086
+ "output_type": "execute_result"
1087
+ }
1088
+ ],
1089
+ "source": [
1090
+ "class Solution:\n",
1091
+ " def canJump(self, nums: List[int]) -> bool:\n",
1092
+ " m = 0\n",
1093
+ " for i, n in enumerate(nums):\n",
1094
+ " if i > m:\n",
1095
+ " return False\n",
1096
+ " m = max(m, i+n)\n",
1097
+ " return True\n",
1098
+ "\n",
1099
+ "nums = [2,3,1,1,4]\n",
1100
+ "\n",
1101
+ "nums = [3,2,1,0,4]\n",
1102
+ "\n",
1103
+ "Solution().canJump(nums)\n"
1104
+ ]
1105
+ },
1106
+ {
1107
+ "cell_type": "markdown",
1108
+ "metadata": {},
1109
+ "source": [
1110
+ "# 56. Merge Intervals\n",
1111
+ "\n",
1112
+ "Given an array of intervals where intervals[i] = [starti, endi], merge all overlapping intervals, and return an array of the non-overlapping intervals that cover all the intervals in the input.\n"
1113
+ ]
1114
+ },
1115
+ {
1116
+ "cell_type": "code",
1117
+ "execution_count": 54,
1118
+ "metadata": {},
1119
+ "outputs": [
1120
+ {
1121
+ "data": {
1122
+ "text/plain": [
1123
+ "[[1, 6], [8, 10], [15, 18]]"
1124
+ ]
1125
+ },
1126
+ "execution_count": 54,
1127
+ "metadata": {},
1128
+ "output_type": "execute_result"
1129
+ }
1130
+ ],
1131
+ "source": [
1132
+ "# time complexity analysis O(n log n )\n",
1133
+ "class Solution:\n",
1134
+ " def merge(self, intervals: List[List[int]]) -> List[List[int]]:\n",
1135
+ "\n",
1136
+ " intervals.sort(key=lambda x: x[0])\n",
1137
+ "\n",
1138
+ " merged = []\n",
1139
+ " for interval in intervals:\n",
1140
+ " # if the list of merged intervals is empty or if the current\n",
1141
+ " # interval does not overlap with the previous, simply append it.\n",
1142
+ " if not merged or merged[-1][1] < interval[0]:\n",
1143
+ " merged.append(interval)\n",
1144
+ " else:\n",
1145
+ " # otherwise, there is overlap, so we merge the current and previous\n",
1146
+ " # intervals.\n",
1147
+ " merged[-1][1] = max(merged[-1][1], interval[1])\n",
1148
+ "\n",
1149
+ " return merged\n",
1150
+ "\n",
1151
+ "intervals = [[1,3],[2,6],[8,10],[15,18]]\n",
1152
+ "\n",
1153
+ "Solution().merge(intervals)"
1154
+ ]
1155
+ },
1156
+ {
1157
+ "cell_type": "markdown",
1158
+ "metadata": {},
1159
+ "source": [
1160
+ "# 57. Insert Interval\n",
1161
+ "\n",
1162
+ "You are given an array of non-overlapping intervals intervals where intervals[i] = [starti, endi] represent the start and the end of the ith interval and intervals is sorted in ascending order by starti. You are also given an interval newInterval = [start, end] that represents the start and end of another interval.\n",
1163
+ "\n",
1164
+ "Insert newInterval into intervals such that intervals is still sorted in ascending order by starti and intervals still does not have any overlapping intervals (merge overlapping intervals if necessary).\n",
1165
+ "\n",
1166
+ "Return intervals after the insertion."
1167
+ ]
1168
+ },
1169
+ {
1170
+ "cell_type": "code",
1171
+ "execution_count": 59,
1172
+ "metadata": {},
1173
+ "outputs": [
1174
+ {
1175
+ "data": {
1176
+ "text/plain": [
1177
+ "[[1, 5], [6, 9]]"
1178
+ ]
1179
+ },
1180
+ "execution_count": 59,
1181
+ "metadata": {},
1182
+ "output_type": "execute_result"
1183
+ }
1184
+ ],
1185
+ "source": [
1186
+ "# class Solution:\n",
1187
+ "# def insert(self, intervals: List[List[int]], newInterval: List[int]) -> List[List[int]]:\n",
1188
+ "# # s, e = newInterval.start, newInterval.end\n",
1189
+ "# s, e = newInterval[0], newInterval[1]\n",
1190
+ "# left = [i for i in intervals if i.end < s]\n",
1191
+ "# right = [i for i in intervals if i.start > e]\n",
1192
+ "# if left + right != intervals:\n",
1193
+ "# s = min(s, intervals[len(left)].start)\n",
1194
+ "# e = max(e, intervals[~len(right)].end)\n",
1195
+ "# return left + [Interval(s, e)] + right\n",
1196
+ "\n",
1197
+ "\n",
1198
+ "class Solution:\n",
1199
+ " def insert(self, intervals: List[List[int]], newInterval: List[int]) -> List[List[int]]:\n",
1200
+ " s, e = newInterval[0], newInterval[1]\n",
1201
+ " left, right = [], []\n",
1202
+ " for i in intervals:\n",
1203
+ " if i[1] < s:\n",
1204
+ " left += i,\n",
1205
+ " elif i[0] > e:\n",
1206
+ " right += i,\n",
1207
+ " else:\n",
1208
+ " s = min(s, i[0])\n",
1209
+ " e = max(e, i[1])\n",
1210
+ " return left + [[s, e]] + right\n",
1211
+ "\n",
1212
+ "intervals = [[1,3],[6,9]]\n",
1213
+ "newInterval = [2,5]\n",
1214
+ "\n",
1215
+ "Solution().insert(intervals, newInterval)"
1216
+ ]
1217
+ },
1218
+ {
1219
+ "cell_type": "markdown",
1220
+ "metadata": {},
1221
+ "source": [
1222
+ "# 70. Climbing Stairs\n",
1223
+ "\n",
1224
+ "You are climbing a staircase. It takes n steps to reach the top.\n",
1225
+ "\n",
1226
+ "Each time you can either climb 1 or 2 steps. In how many distinct ways can you climb to the top?"
1227
+ ]
1228
+ },
1229
+ {
1230
+ "cell_type": "code",
1231
+ "execution_count": 60,
1232
+ "metadata": {},
1233
+ "outputs": [
1234
+ {
1235
+ "data": {
1236
+ "text/plain": [
1237
+ "2"
1238
+ ]
1239
+ },
1240
+ "execution_count": 60,
1241
+ "metadata": {},
1242
+ "output_type": "execute_result"
1243
+ }
1244
+ ],
1245
+ "source": [
1246
+ "class Solution:\n",
1247
+ " def climbStairs(self, n: int) -> int:\n",
1248
+ " a = b = 1\n",
1249
+ " for _ in range(n):\n",
1250
+ " a, b = b, a + b\n",
1251
+ " return a\n",
1252
+ "\n",
1253
+ "n = 2\n",
1254
+ "Solution().climbStairs(n)\n"
1255
+ ]
1256
+ },
1257
+ {
1258
+ "cell_type": "markdown",
1259
+ "metadata": {},
1260
+ "source": [
1261
+ "# 73. Set Matrix Zeros \n",
1262
+ "\n",
1263
+ "Given an m x n integer matrix matrix, if an element is 0, set its entire row and column to 0's.\n",
1264
+ "\n",
1265
+ "You must do it in place."
1266
+ ]
1267
+ },
1268
+ {
1269
+ "cell_type": "code",
1270
+ "execution_count": 63,
1271
+ "metadata": {},
1272
+ "outputs": [
1273
+ {
1274
+ "name": "stdout",
1275
+ "output_type": "stream",
1276
+ "text": [
1277
+ "[[1, 0, 1], [0, 0, 0], [1, 0, 1]]\n"
1278
+ ]
1279
+ }
1280
+ ],
1281
+ "source": [
1282
+ "class Solution(object):\n",
1283
+ " def setZeroes(self, matrix):\n",
1284
+ " \"\"\"\n",
1285
+ " :type matrix: List[List[int]]\n",
1286
+ " :rtype: void Do not return anything, modify matrix in-place instead.\n",
1287
+ " \"\"\"\n",
1288
+ " is_col = False\n",
1289
+ " R = len(matrix)\n",
1290
+ " C = len(matrix[0])\n",
1291
+ " for i in range(R):\n",
1292
+ " # Since first cell for both first row and first column is the same i.e. matrix[0][0]\n",
1293
+ " # We can use an additional variable for either the first row/column.\n",
1294
+ " # For this solution we are using an additional variable for the first column\n",
1295
+ " # and using matrix[0][0] for the first row.\n",
1296
+ " if matrix[i][0] == 0:\n",
1297
+ " is_col = True\n",
1298
+ " for j in range(1, C):\n",
1299
+ " # If an element is zero, we set the first element of the corresponding row and column to 0\n",
1300
+ " if matrix[i][j] == 0:\n",
1301
+ " matrix[0][j] = 0\n",
1302
+ " matrix[i][0] = 0\n",
1303
+ "\n",
1304
+ " # Iterate over the array once again and using the first row and first column, update the elements.\n",
1305
+ " for i in range(1, R):\n",
1306
+ " for j in range(1, C):\n",
1307
+ " if not matrix[i][0] or not matrix[0][j]:\n",
1308
+ " matrix[i][j] = 0\n",
1309
+ "\n",
1310
+ " # See if the first row needs to be set to zero as well\n",
1311
+ " if matrix[0][0] == 0:\n",
1312
+ " for j in range(C):\n",
1313
+ " matrix[0][j] = 0\n",
1314
+ "\n",
1315
+ " # See if the first column needs to be set to zero as well \n",
1316
+ " if is_col:\n",
1317
+ " for i in range(R):\n",
1318
+ " matrix[i][0] = 0\n",
1319
+ "\n",
1320
+ "matrix = [[1,1,1],[1,0,1],[1,1,1]]\n",
1321
+ "\n",
1322
+ "Solution().setZeroes(matrix)\n",
1323
+ "print(matrix)"
1324
+ ]
1325
+ },
1326
+ {
1327
+ "cell_type": "markdown",
1328
+ "metadata": {},
1329
+ "source": [
1330
+ "# 76. Minimum Window Substring (hard)\n",
1331
+ "\n",
1332
+ "Given two strings s and t of lengths m and n respectively, return the minimum window\n",
1333
+ "substring\n",
1334
+ "of s such that every character in t (including duplicates) is included in the window. If there is no such substring, return the empty string \"\".\n",
1335
+ "\n",
1336
+ "The testcases will be generated such that the answer is unique.\n"
1337
+ ]
1338
+ },
1339
+ {
1340
+ "cell_type": "code",
1341
+ "execution_count": null,
1342
+ "metadata": {},
1343
+ "outputs": [],
1344
+ "source": [
1345
+ "# solution from leetcode \n",
1346
+ "\n",
1347
+ "def minWindow(self, s, t):\n",
1348
+ " \"\"\"\n",
1349
+ " :type s: str\n",
1350
+ " :type t: str\n",
1351
+ " :rtype: str\n",
1352
+ " \"\"\"\n",
1353
+ "\n",
1354
+ " if not t or not s:\n",
1355
+ " return \"\"\n",
1356
+ "\n",
1357
+ " # Dictionary which keeps a count of all the unique characters in t.\n",
1358
+ " dict_t = Counter(t)\n",
1359
+ "\n",
1360
+ " # Number of unique characters in t, which need to be present in the desired window.\n",
1361
+ " required = len(dict_t)\n",
1362
+ "\n",
1363
+ " # left and right pointer\n",
1364
+ " l, r = 0, 0\n",
1365
+ "\n",
1366
+ " # formed is used to keep track of how many unique characters in t are present in the current window in its desired frequency.\n",
1367
+ " # e.g. if t is \"AABC\" then the window must have two A's, one B and one C. Thus formed would be = 3 when all these conditions are met.\n",
1368
+ " formed = 0\n",
1369
+ "\n",
1370
+ " # Dictionary which keeps a count of all the unique characters in the current window.\n",
1371
+ " window_counts = {}\n",
1372
+ "\n",
1373
+ " # ans tuple of the form (window length, left, right)\n",
1374
+ " ans = float(\"inf\"), None, None\n",
1375
+ "\n",
1376
+ " while r < len(s):\n",
1377
+ "\n",
1378
+ " # Add one character from the right to the window\n",
1379
+ " character = s[r]\n",
1380
+ " window_counts[character] = window_counts.get(character, 0) + 1\n",
1381
+ "\n",
1382
+ " # If the frequency of the current character added equals to the desired count in t then increment the formed count by 1.\n",
1383
+ " if character in dict_t and window_counts[character] == dict_t[character]:\n",
1384
+ " formed += 1\n",
1385
+ "\n",
1386
+ " # Try and contract the window till the point where it ceases to be 'desirable'.\n",
1387
+ " while l <= r and formed == required:\n",
1388
+ " character = s[l]\n",
1389
+ "\n",
1390
+ " # Save the smallest window until now.\n",
1391
+ " if r - l + 1 < ans[0]:\n",
1392
+ " ans = (r - l + 1, l, r)\n",
1393
+ "\n",
1394
+ " # The character at the position pointed by the `left` pointer is no longer a part of the window.\n",
1395
+ " window_counts[character] -= 1\n",
1396
+ " if character in dict_t and window_counts[character] < dict_t[character]:\n",
1397
+ " formed -= 1\n",
1398
+ "\n",
1399
+ " # Move the left pointer ahead, this would help to look for a new window.\n",
1400
+ " l += 1 \n",
1401
+ "\n",
1402
+ " # Keep expanding the window once we are done contracting.\n",
1403
+ " r += 1 \n",
1404
+ " return \"\" if ans[0] == float(\"inf\") else s[ans[1] : ans[2] + 1]\n",
1405
+ "\n",
1406
+ "s = \"ADOBECODEBANC\", t = \"ABC\""
1407
+ ]
1408
+ },
1409
+ {
1410
+ "cell_type": "code",
1411
+ "execution_count": null,
1412
+ "metadata": {},
1413
+ "outputs": [],
1414
+ "source": [
1415
+ "# soltuion from comments \n",
1416
+ "\n",
1417
+ "class Solution:\n",
1418
+ " def minWindow(s, t):\n",
1419
+ " need = collections.Counter(t) #hash table to store char frequency\n",
1420
+ " missing = len(t) #total number of chars we care\n",
1421
+ " start, end = 0, 0\n",
1422
+ " i = 0\n",
1423
+ " for j, char in enumerate(s, 1): #index j from 1\n",
1424
+ " if need[char] > 0:\n",
1425
+ " missing -= 1\n",
1426
+ " need[char] -= 1\n",
1427
+ " if missing == 0: #match all chars\n",
1428
+ " while i < j and need[s[i]] < 0: #remove chars to find the real start\n",
1429
+ " need[s[i]] += 1\n",
1430
+ " i += 1\n",
1431
+ " need[s[i]] += 1 #make sure the first appearing char satisfies need[char]>0\n",
1432
+ " missing += 1 #we missed this first char, so add missing by 1\n",
1433
+ " if end == 0 or j-i < end-start: #update window\n",
1434
+ " start, end = i, j\n",
1435
+ " i += 1 #update i to start+1 for next window\n",
1436
+ " return s[start:end]\n",
1437
+ "\n",
1438
+ " "
1439
+ ]
1440
+ },
1441
+ {
1442
+ "cell_type": "markdown",
1443
+ "metadata": {},
1444
+ "source": [
1445
+ "# 79. Word Search\n",
1446
+ "\n",
1447
+ "Given an m x n grid of characters board and a string word, return true if word exists in the grid.\n",
1448
+ "\n",
1449
+ "The word can be constructed from letters of sequentially adjacent cells, where adjacent cells are horizontally or vertically neighboring. The same letter cell may not be used more than once."
1450
+ ]
1451
+ },
1452
+ {
1453
+ "cell_type": "code",
1454
+ "execution_count": 64,
1455
+ "metadata": {},
1456
+ "outputs": [],
1457
+ "source": [
1458
+ "# depth first search \n",
1459
+ "\n",
1460
+ "\n",
1461
+ "def exist(self, board, word):\n",
1462
+ " if not board:\n",
1463
+ " return False\n",
1464
+ " for i in xrange(len(board)):\n",
1465
+ " for j in xrange(len(board[0])):\n",
1466
+ " if self.dfs(board, i, j, word):\n",
1467
+ " return True\n",
1468
+ " return False\n",
1469
+ "\n",
1470
+ "# check whether can find word, start at (i,j) position \n",
1471
+ "def dfs(self, board, i, j, word):\n",
1472
+ " if len(word) == 0: # all the characters are checked\n",
1473
+ " return True\n",
1474
+ " if i<0 or i>=len(board) or j<0 or j>=len(board[0]) or word[0]!=board[i][j]:\n",
1475
+ " return False\n",
1476
+ " tmp = board[i][j] # first character is found, check the remaining part\n",
1477
+ " board[i][j] = \"#\" # avoid visit agian \n",
1478
+ " # check whether can find \"word\" along one direction\n",
1479
+ " res = self.dfs(board, i+1, j, word[1:]) or self.dfs(board, i-1, j, word[1:]) \\\n",
1480
+ " or self.dfs(board, i, j+1, word[1:]) or self.dfs(board, i, j-1, word[1:])\n",
1481
+ " board[i][j] = tmp\n",
1482
+ " return res\n",
1483
+ "\n",
1484
+ "board = [[\"A\",\"B\",\"C\",\"E\"],[\"S\",\"F\",\"C\",\"S\"],[\"A\",\"D\",\"E\",\"E\"]]\n",
1485
+ "word = \"ABCCED\""
1486
+ ]
1487
+ },
1488
+ {
1489
+ "cell_type": "markdown",
1490
+ "metadata": {},
1491
+ "source": [
1492
+ "# 98. Validate Binary Search Tree\n",
1493
+ "\n",
1494
+ "Given the root of a binary tree, determine if it is a valid binary search tree (BST).\n",
1495
+ "\n",
1496
+ "A valid BST is defined as follows:\n",
1497
+ "\n",
1498
+ "The left\n",
1499
+ "subtree\n",
1500
+ "of a node contains only nodes with keys less than the node's key.\n",
1501
+ "The right subtree of a node contains only nodes with keys greater than the node's key.\n",
1502
+ "Both the left and right subtrees must also be binary search trees.\n"
1503
+ ]
1504
+ },
1505
+ {
1506
+ "cell_type": "code",
1507
+ "execution_count": 69,
1508
+ "metadata": {},
1509
+ "outputs": [
1510
+ {
1511
+ "ename": "AttributeError",
1512
+ "evalue": "'list' object has no attribute 'val'",
1513
+ "output_type": "error",
1514
+ "traceback": [
1515
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1516
+ "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
1517
+ "Cell \u001b[0;32mIn[69], line 19\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39misValidBST(root\u001b[39m.\u001b[39mleft, floor, root\u001b[39m.\u001b[39mval) \u001b[39mand\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39misValidBST(root\u001b[39m.\u001b[39mright, root\u001b[39m.\u001b[39mval, ceiling)\n\u001b[1;32m 17\u001b[0m root \u001b[39m=\u001b[39m [\u001b[39m2\u001b[39m,\u001b[39m1\u001b[39m,\u001b[39m3\u001b[39m]\n\u001b[0;32m---> 19\u001b[0m Solution()\u001b[39m.\u001b[39;49misValidBST(root)\n",
1518
+ "Cell \u001b[0;32mIn[69], line 12\u001b[0m, in \u001b[0;36mSolution.isValidBST\u001b[0;34m(self, root, floor, ceiling)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m root: \n\u001b[1;32m 11\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mTrue\u001b[39;00m\n\u001b[0;32m---> 12\u001b[0m \u001b[39mif\u001b[39;00m root\u001b[39m.\u001b[39;49mval \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m floor \u001b[39mor\u001b[39;00m root\u001b[39m.\u001b[39mval \u001b[39m>\u001b[39m\u001b[39m=\u001b[39m ceiling:\n\u001b[1;32m 13\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mFalse\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[39m# in the left branch, root is the new ceiling; contrarily root is the new floor in right branch\u001b[39;00m\n",
1519
+ "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'val'"
1520
+ ]
1521
+ }
1522
+ ],
1523
+ "source": [
1524
+ "# Definition for a binary tree node.\n",
1525
+ "# class TreeNode:\n",
1526
+ "# def __init__(self, val=0, left=None, right=None):\n",
1527
+ "# self.val = val\n",
1528
+ "# self.left = left\n",
1529
+ "# self.right = right\n",
1530
+ "\n",
1531
+ "class Solution:\n",
1532
+ " def isValidBST(self, root, floor=float('-inf'), ceiling=float('inf')):\n",
1533
+ " if not root: \n",
1534
+ " return True\n",
1535
+ " if root.val <= floor or root.val >= ceiling:\n",
1536
+ " return False\n",
1537
+ " # in the left branch, root is the new ceiling; contrarily root is the new floor in right branch\n",
1538
+ " return self.isValidBST(root.left, floor, root.val) and self.isValidBST(root.right, root.val, ceiling)\n",
1539
+ "\n",
1540
+ "root = [2,1,3]\n",
1541
+ "\n",
1542
+ "Solution().isValidBST(root)"
1543
+ ]
1544
+ },
1545
+ {
1546
+ "cell_type": "markdown",
1547
+ "metadata": {},
1548
+ "source": [
1549
+ "# 100. Same Tree\n",
1550
+ "\n",
1551
+ "Given the roots of two binary trees p and q, write a function to check if they are the same or not.\n",
1552
+ "\n",
1553
+ "Two binary trees are considered the same if they are structurally identical, and the nodes have the same value."
1554
+ ]
1555
+ },
1556
+ {
1557
+ "cell_type": "code",
1558
+ "execution_count": 70,
1559
+ "metadata": {},
1560
+ "outputs": [],
1561
+ "source": [
1562
+ "from collections import deque\n",
1563
+ "class Solution:\n",
1564
+ " def isSameTree(self, p, q):\n",
1565
+ " \"\"\"\n",
1566
+ " :type p: TreeNode\n",
1567
+ " :type q: TreeNode\n",
1568
+ " :rtype: bool\n",
1569
+ " \"\"\" \n",
1570
+ " def check(p, q):\n",
1571
+ " # if both are None\n",
1572
+ " if not p and not q:\n",
1573
+ " return True\n",
1574
+ " # one of p and q is None\n",
1575
+ " if not q or not p:\n",
1576
+ " return False\n",
1577
+ " if p.val != q.val:\n",
1578
+ " return False\n",
1579
+ " return True\n",
1580
+ " \n",
1581
+ " deq = deque([(p, q),])\n",
1582
+ " while deq:\n",
1583
+ " p, q = deq.popleft()\n",
1584
+ " if not check(p, q):\n",
1585
+ " return False\n",
1586
+ " \n",
1587
+ " if p:\n",
1588
+ " deq.append((p.left, q.left))\n",
1589
+ " deq.append((p.right, q.right))\n",
1590
+ " \n",
1591
+ " return True\n",
1592
+ "\n",
1593
+ " "
1594
+ ]
1595
+ },
1596
+ {
1597
+ "cell_type": "markdown",
1598
+ "metadata": {},
1599
+ "source": [
1600
+ "# 102. Binary Tree Level Order Traversal\n",
1601
+ "\n",
1602
+ "Given the root of a binary tree, return the level order traversal of its nodes' values. (i.e., from left to right, level by level)."
1603
+ ]
1604
+ },
1605
+ {
1606
+ "cell_type": "code",
1607
+ "execution_count": 71,
1608
+ "metadata": {},
1609
+ "outputs": [
1610
+ {
1611
+ "ename": "NameError",
1612
+ "evalue": "name 'null' is not defined",
1613
+ "output_type": "error",
1614
+ "traceback": [
1615
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1616
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
1617
+ "Cell \u001b[0;32mIn[71], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m root \u001b[39m=\u001b[39m [\u001b[39m3\u001b[39m,\u001b[39m9\u001b[39m,\u001b[39m20\u001b[39m,null,null,\u001b[39m15\u001b[39m,\u001b[39m7\u001b[39m]\n",
1618
+ "\u001b[0;31mNameError\u001b[0m: name 'null' is not defined"
1619
+ ]
1620
+ }
1621
+ ],
1622
+ "source": [
1623
+ "def levelOrder(self, root):\n",
1624
+ " ans, level = [], [root]\n",
1625
+ " while root and level:\n",
1626
+ " ans.append([node.val for node in level])\n",
1627
+ " LRpair = [(node.left, node.right) for node in level]\n",
1628
+ " level = [leaf for LR in LRpair for leaf in LR if leaf]\n",
1629
+ " return ans\n",
1630
+ "\n",
1631
+ "root = [3,9,20,null,null,15,7]"
1632
+ ]
1633
+ },
1634
+ {
1635
+ "cell_type": "code",
1636
+ "execution_count": 72,
1637
+ "metadata": {},
1638
+ "outputs": [],
1639
+ "source": [
1640
+ "from collections import deque\n",
1641
+ "def levelOrder(self, root):\n",
1642
+ " # traverse in order level, keeping track of (row number, current node)\n",
1643
+ " queue = deque([(0, root)])\n",
1644
+ " # keep track of the nodes in each row\n",
1645
+ " d = {}\n",
1646
+ "\n",
1647
+ " while queue:\n",
1648
+ " row, node = queue.popleft()\n",
1649
+ " if node:\n",
1650
+ " d[row] = d.get(row, []) + [node.val]\n",
1651
+ " queue += (row+1, node.left), (row+1, node.right)\n",
1652
+ "\n",
1653
+ " # return a list of lists containing node values in increasing order with respect to the row number\n",
1654
+ " return [d[row] for row in sorted(d.keys())]"
1655
+ ]
1656
+ },
1657
+ {
1658
+ "cell_type": "markdown",
1659
+ "metadata": {},
1660
+ "source": [
1661
+ "# 104. Maximum Depth of Binary Tree\n",
1662
+ "\n",
1663
+ "Given the root of a binary tree, return its maximum depth.\n",
1664
+ "\n",
1665
+ "A binary tree's maximum depth is the number of nodes along the longest path from the root node down to the farthest leaf node."
1666
+ ]
1667
+ },
1668
+ {
1669
+ "cell_type": "code",
1670
+ "execution_count": 76,
1671
+ "metadata": {},
1672
+ "outputs": [
1673
+ {
1674
+ "ename": "NameError",
1675
+ "evalue": "name 'null' is not defined",
1676
+ "output_type": "error",
1677
+ "traceback": [
1678
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1679
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
1680
+ "Cell \u001b[0;32mIn[76], line 17\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mmax\u001b[39m(dfs(root\u001b[39m.\u001b[39mleft, depth \u001b[39m+\u001b[39m \u001b[39m1\u001b[39m), dfs(root\u001b[39m.\u001b[39mright, depth \u001b[39m+\u001b[39m \u001b[39m1\u001b[39m))\n\u001b[1;32m 14\u001b[0m \u001b[39mreturn\u001b[39;00m dfs(root, \u001b[39m0\u001b[39m)\n\u001b[0;32m---> 17\u001b[0m root \u001b[39m=\u001b[39m [\u001b[39m3\u001b[39m,\u001b[39m9\u001b[39m,\u001b[39m20\u001b[39m,null,null,\u001b[39m15\u001b[39m,\u001b[39m7\u001b[39m]\n\u001b[1;32m 19\u001b[0m Solution()\u001b[39m.\u001b[39mmaxDepth(root)\n",
1681
+ "\u001b[0;31mNameError\u001b[0m: name 'null' is not defined"
1682
+ ]
1683
+ }
1684
+ ],
1685
+ "source": [
1686
+ "# Definition for a binary tree node.\n",
1687
+ "\n",
1688
+ "# https://leetcode.com/problems/maximum-depth-of-binary-tree/solutions/1769367/python3-recursive-dfs-explained/?orderBy=most_votes\n",
1689
+ "class TreeNode:\n",
1690
+ " def __init__(self, val=0, left=None, right=None):\n",
1691
+ " self.val = val\n",
1692
+ " self.left = left\n",
1693
+ " self.right = right\n",
1694
+ "\n",
1695
+ "class Solution:\n",
1696
+ " def maxDepth(self, root: Optional[TreeNode]) -> int:\n",
1697
+ " def dfs(root, depth):\n",
1698
+ " if not root: return depth\n",
1699
+ " return max(dfs(root.left, depth + 1), dfs(root.right, depth + 1))\n",
1700
+ " \n",
1701
+ " return dfs(root, 0)\n",
1702
+ "\n",
1703
+ "\n",
1704
+ "root = [3,9,20,null,null,15,7]\n",
1705
+ "\n",
1706
+ "Solution().maxDepth(root)\n",
1707
+ "\n"
1708
+ ]
1709
+ },
1710
+ {
1711
+ "cell_type": "markdown",
1712
+ "metadata": {},
1713
+ "source": [
1714
+ "### Here, \"Object references are passed by value.\""
1715
+ ]
1716
+ },
1717
+ {
1718
+ "cell_type": "code",
1719
+ "execution_count": 78,
1720
+ "metadata": {},
1721
+ "outputs": [
1722
+ {
1723
+ "name": "stdout",
1724
+ "output_type": "stream",
1725
+ "text": [
1726
+ "[0, 1]\n",
1727
+ "[0, 1]\n"
1728
+ ]
1729
+ }
1730
+ ],
1731
+ "source": [
1732
+ "listA = [0]\n",
1733
+ "listB = listA\n",
1734
+ "listB.append(1)\n",
1735
+ "print (listA)\n",
1736
+ "print(listB)"
1737
+ ]
1738
+ },
1739
+ {
1740
+ "cell_type": "code",
1741
+ "execution_count": 83,
1742
+ "metadata": {},
1743
+ "outputs": [
1744
+ {
1745
+ "name": "stdout",
1746
+ "output_type": "stream",
1747
+ "text": [
1748
+ "[0]\n",
1749
+ "[0, 1]\n",
1750
+ "[0]\n",
1751
+ "[0, 1]\n",
1752
+ "[0, 1]\n"
1753
+ ]
1754
+ }
1755
+ ],
1756
+ "source": [
1757
+ "def reassign(list):\n",
1758
+ " print(list)\n",
1759
+ " list = [0, 1]\n",
1760
+ " print(list)\n",
1761
+ "\n",
1762
+ "def append(list):\n",
1763
+ " print(list)\n",
1764
+ " list.append(1)\n",
1765
+ " print(list)\n",
1766
+ "\n",
1767
+ "list = [0]\n",
1768
+ "reassign(list)\n",
1769
+ "append(list)\n",
1770
+ "\n",
1771
+ "print(list)"
1772
+ ]
1773
+ },
1774
+ {
1775
+ "cell_type": "code",
1776
+ "execution_count": 85,
1777
+ "metadata": {},
1778
+ "outputs": [
1779
+ {
1780
+ "name": "stdout",
1781
+ "output_type": "stream",
1782
+ "text": [
1783
+ "some value\n"
1784
+ ]
1785
+ }
1786
+ ],
1787
+ "source": [
1788
+ "def foo(x):\n",
1789
+ " print(x)\n",
1790
+ "\n",
1791
+ "bar = 'some value'\n",
1792
+ "foo(bar)"
1793
+ ]
1794
+ },
1795
+ {
1796
+ "cell_type": "code",
1797
+ "execution_count": 88,
1798
+ "metadata": {},
1799
+ "outputs": [
1800
+ {
1801
+ "name": "stdout",
1802
+ "output_type": "stream",
1803
+ "text": [
1804
+ "another value\n",
1805
+ "some value\n"
1806
+ ]
1807
+ }
1808
+ ],
1809
+ "source": [
1810
+ "def foo(x):\n",
1811
+ " x = 'another value'\n",
1812
+ " print (x)\n",
1813
+ "\n",
1814
+ "bar = 'some value'\n",
1815
+ "foo(bar)\n",
1816
+ "print(bar)"
1817
+ ]
1818
+ },
1819
+ {
1820
+ "cell_type": "code",
1821
+ "execution_count": 93,
1822
+ "metadata": {},
1823
+ "outputs": [
1824
+ {
1825
+ "name": "stdout",
1826
+ "output_type": "stream",
1827
+ "text": [
1828
+ "[0]\n",
1829
+ "[0, 1]\n",
1830
+ "[0, 1]\n"
1831
+ ]
1832
+ }
1833
+ ],
1834
+ "source": [
1835
+ "def append_one(li):\n",
1836
+ " print(li)\n",
1837
+ " li.append(1)\n",
1838
+ " print(li)\n",
1839
+ " return li\n",
1840
+ "\n",
1841
+ "x = [0]\n",
1842
+ "x = append_one(x)\n",
1843
+ "print(x)"
1844
+ ]
1845
+ },
1846
+ {
1847
+ "cell_type": "markdown",
1848
+ "metadata": {},
1849
+ "source": [
1850
+ "# 105. Construct Binary Tree From Preorder and Inorder Traversal \n",
1851
+ "\n",
1852
+ "Given two integer arrays preorder and inorder where preorder is the preorder traversal of a binary tree and inorder is the inorder traversal of the same tree, construct and return the binary tree."
1853
+ ]
1854
+ },
1855
+ {
1856
+ "cell_type": "code",
1857
+ "execution_count": 102,
1858
+ "metadata": {},
1859
+ "outputs": [
1860
+ {
1861
+ "name": "stdout",
1862
+ "output_type": "stream",
1863
+ "text": [
1864
+ "3\n"
1865
+ ]
1866
+ }
1867
+ ],
1868
+ "source": [
1869
+ "class TreeNode:\n",
1870
+ " def __init__(self, val=0, left=None, right=None):\n",
1871
+ " self.val = val\n",
1872
+ " self.left = left\n",
1873
+ " self.right = right\n",
1874
+ "\n",
1875
+ "class Solution:\n",
1876
+ " def buildTree(self, preorder: List[int], inorder: List[int]) -> TreeNode:\n",
1877
+ "\n",
1878
+ " def array_to_tree(left, right):\n",
1879
+ " nonlocal preorder_index\n",
1880
+ " # if there are no elements to construct the tree\n",
1881
+ " if left > right: return None\n",
1882
+ "\n",
1883
+ " # select the preorder_index element as the root and increment it\n",
1884
+ " root_value = preorder[preorder_index]\n",
1885
+ " root = TreeNode(root_value)\n",
1886
+ "\n",
1887
+ "\n",
1888
+ " preorder_index += 1\n",
1889
+ "\n",
1890
+ " # build left and right subtree\n",
1891
+ " # excluding inorder_index_map[root_value] element because it's the root\n",
1892
+ " root.left = array_to_tree(left, inorder_index_map[root_value] - 1)\n",
1893
+ " root.right = array_to_tree(inorder_index_map[root_value] + 1, right)\n",
1894
+ "\n",
1895
+ " return root\n",
1896
+ "\n",
1897
+ " preorder_index = 0\n",
1898
+ "\n",
1899
+ " # build a hashmap to store value -> its index relations\n",
1900
+ " inorder_index_map = {}\n",
1901
+ " for index, value in enumerate(inorder):\n",
1902
+ " inorder_index_map[value] = index\n",
1903
+ "\n",
1904
+ " return array_to_tree(0, len(preorder) - 1)\n",
1905
+ "\n",
1906
+ "preorder = [3,9,20,15,7]\n",
1907
+ "inorder = [9,3,15,20,7]\n",
1908
+ "\n",
1909
+ "# preorder = [-1]\n",
1910
+ "# inorder = [-1]\n",
1911
+ "\n",
1912
+ "foo = Solution().buildTree(preorder, inorder)\n",
1913
+ "print(foo.val)"
1914
+ ]
1915
+ },
1916
+ {
1917
+ "cell_type": "markdown",
1918
+ "metadata": {},
1919
+ "source": [
1920
+ "# 121. Best Time to Buy and Sell Stock\n",
1921
+ "\n",
1922
+ "You are given an array prices where prices[i] is the price of a given stock on the ith day.\n",
1923
+ "\n",
1924
+ "You want to maximize your profit by choosing a single day to buy one stock and choosing a different day in the future to sell that stock.\n",
1925
+ "\n",
1926
+ "Return the maximum profit you can achieve from this transaction. If you cannot achieve any profit, return 0."
1927
+ ]
1928
+ },
1929
+ {
1930
+ "cell_type": "code",
1931
+ "execution_count": 109,
1932
+ "metadata": {},
1933
+ "outputs": [
1934
+ {
1935
+ "name": "stdout",
1936
+ "output_type": "stream",
1937
+ "text": [
1938
+ "inf\n"
1939
+ ]
1940
+ },
1941
+ {
1942
+ "data": {
1943
+ "text/plain": [
1944
+ "5"
1945
+ ]
1946
+ },
1947
+ "execution_count": 109,
1948
+ "metadata": {},
1949
+ "output_type": "execute_result"
1950
+ }
1951
+ ],
1952
+ "source": [
1953
+ "# could brute force with O(n^2) \n",
1954
+ "class Solution:\n",
1955
+ " def maxProfit(self, prices: List[int]) -> int:\n",
1956
+ " min_price = float('inf')\n",
1957
+ " print(min_price)\n",
1958
+ " max_profit = 0\n",
1959
+ " # for i in range(len(prices)):\n",
1960
+ " for price in prices:\n",
1961
+ " diff = price - min_price\n",
1962
+ "\n",
1963
+ " if price < min_price:\n",
1964
+ " min_price = price\n",
1965
+ " elif diff > max_profit:\n",
1966
+ " max_profit = diff\n",
1967
+ " \n",
1968
+ " return max_profit\n",
1969
+ "\n",
1970
+ "prices = [7,1,5,3,6,4]\n",
1971
+ "\n",
1972
+ "min(prices)\n",
1973
+ "\n",
1974
+ "Solution().maxProfit(prices)\n",
1975
+ "# Buy on day 2 (price = 1) and sell on day 5 (price = 6), profit = 6-1 = 5.\n",
1976
+ "# Note that buying on day 2 and selling on day 1 is not allowed because you must buy before you sell."
1977
+ ]
1978
+ },
1979
+ {
1980
+ "cell_type": "code",
1981
+ "execution_count": 114,
1982
+ "metadata": {},
1983
+ "outputs": [
1984
+ {
1985
+ "data": {
1986
+ "text/plain": [
1987
+ "1"
1988
+ ]
1989
+ },
1990
+ "execution_count": 114,
1991
+ "metadata": {},
1992
+ "output_type": "execute_result"
1993
+ }
1994
+ ],
1995
+ "source": [
1996
+ "prices = [7,1,5,3,6,4]\n",
1997
+ "\n",
1998
+ "# using the builtins but doesn't really work very nicely. \n",
1999
+ "min(prices)\n",
2000
+ "max(prices)\n",
2001
+ "prices.index(max(prices))\n",
2002
+ "prices.index(min(prices))"
2003
+ ]
2004
+ },
2005
+ {
2006
+ "cell_type": "markdown",
2007
+ "metadata": {},
2008
+ "source": [
2009
+ "# 124. Binary Tree Maximum Path Sum (hard)\n",
2010
+ "\n",
2011
+ "A path in a binary tree is a sequence of nodes where each pair of adjacent nodes in the sequence has an edge connecting them. A node can only appear in the sequence at most once. Note that the path does not need to pass through the root.\n",
2012
+ "\n",
2013
+ "The path sum of a path is the sum of the node's values in the path.\n",
2014
+ "\n",
2015
+ "Given the root of a binary tree, return the maximum path sum of any non-empty path."
2016
+ ]
2017
+ },
2018
+ {
2019
+ "cell_type": "code",
2020
+ "execution_count": null,
2021
+ "metadata": {},
2022
+ "outputs": [],
2023
+ "source": []
2024
+ },
2025
+ {
2026
+ "cell_type": "markdown",
2027
+ "metadata": {},
2028
+ "source": [
2029
+ "# 125. Valid Palindrome (easy)\n",
2030
+ "\n",
2031
+ "A phrase is a palindrome if, after converting all uppercase letters into lowercase letters and removing all non-alphanumeric characters, it reads the same forward and backward. Alphanumeric characters include letters and numbers.\n",
2032
+ "\n",
2033
+ "Given a string s, return true if it is a palindrome, or false otherwise."
2034
+ ]
2035
+ },
2036
+ {
2037
+ "cell_type": "code",
2038
+ "execution_count": 116,
2039
+ "metadata": {},
2040
+ "outputs": [
2041
+ {
2042
+ "name": "stdout",
2043
+ "output_type": "stream",
2044
+ "text": [
2045
+ "amanaplanacanalpanama\n",
2046
+ "amanaplanacanalpanama\n"
2047
+ ]
2048
+ },
2049
+ {
2050
+ "data": {
2051
+ "text/plain": [
2052
+ "True"
2053
+ ]
2054
+ },
2055
+ "execution_count": 116,
2056
+ "metadata": {},
2057
+ "output_type": "execute_result"
2058
+ }
2059
+ ],
2060
+ "source": [
2061
+ "def isPalindrome(self, s):\n",
2062
+ " l, r = 0, len(s)-1\n",
2063
+ " while l < r:\n",
2064
+ " while l < r and not s[l].isalnum():\n",
2065
+ " l += 1\n",
2066
+ " while l <r and not s[r].isalnum():\n",
2067
+ " r -= 1\n",
2068
+ " if s[l].lower() != s[r].lower():\n",
2069
+ " return False\n",
2070
+ " l +=1; r -= 1\n",
2071
+ " return True\n",
2072
+ "\n",
2073
+ "# so dope bro \n",
2074
+ "class Solution:\n",
2075
+ " def isPalindrome(self, s):\n",
2076
+ " s = ''.join(e for e in s if e.isalnum()).lower()\n",
2077
+ " print(s)\n",
2078
+ " print(s[::-1])\n",
2079
+ " return s==s[::-1]\n",
2080
+ "\n",
2081
+ "s = \"A man, a plan, a canal: Panama\"\n",
2082
+ "\n",
2083
+ "Solution().isPalindrome(s)"
2084
+ ]
2085
+ },
2086
+ {
2087
+ "cell_type": "code",
2088
+ "execution_count": 5,
2089
+ "metadata": {},
2090
+ "outputs": [
2091
+ {
2092
+ "data": {
2093
+ "text/plain": [
2094
+ "4.2"
2095
+ ]
2096
+ },
2097
+ "execution_count": 5,
2098
+ "metadata": {},
2099
+ "output_type": "execute_result"
2100
+ }
2101
+ ],
2102
+ "source": [
2103
+ "float(\"4.2/5\".split('/')[0].strip())\n",
2104
+ "\n",
2105
+ "float(\"4.2/5\".split(\"/\")[0].strip())"
2106
+ ]
2107
+ },
2108
+ {
2109
+ "attachments": {},
2110
+ "cell_type": "markdown",
2111
+ "metadata": {},
2112
+ "source": [
2113
+ "# Balanced sum \n",
2114
+ "\n",
2115
+ "Given an array of integers nums, calculate the pivot index of this array.\n",
2116
+ "\n",
2117
+ "The pivot index is the index where the sum of all the numbers strictly to the left of the index is equal to the sum of all the numbers strictly to the index's right.\n",
2118
+ "\n",
2119
+ "If the index is on the left edge of the array, then the left sum is 0 because there are no elements to the left. This also applies to the right edge of the array.\n",
2120
+ "\n",
2121
+ "Return the leftmost pivot index. If no such index exists, return -1."
2122
+ ]
2123
+ },
2124
+ {
2125
+ "cell_type": "code",
2126
+ "execution_count": 1,
2127
+ "metadata": {},
2128
+ "outputs": [],
2129
+ "source": [
2130
+ "# Time Complexity : O(n)\n",
2131
+ "# Space Complexity : O(1)\n",
2132
+ "class Solution(object):\n",
2133
+ " def pivotIndex(self, nums):\n",
2134
+ " # Initialize leftSum & rightSum to store the sum of all the numbers strictly to the index's left & right respectively...\n",
2135
+ " leftSum, rightSum = 0, sum(nums)\n",
2136
+ " # Traverse elements through the loop...\n",
2137
+ " for idx, ele in enumerate(nums):\n",
2138
+ " rightSum -= ele\n",
2139
+ " # If the sum of all the numbers strictly to the left of the index is equal to the sum of all the numbers strictly to the index's right...\n",
2140
+ " if leftSum == rightSum:\n",
2141
+ " return idx # Return the pivot index...\n",
2142
+ " leftSum += ele\n",
2143
+ " return -1 # If there is no index that satisfies the conditions in the problem statement..."
2144
+ ]
2145
+ },
2146
+ {
2147
+ "cell_type": "code",
2148
+ "execution_count": 2,
2149
+ "metadata": {},
2150
+ "outputs": [
2151
+ {
2152
+ "data": {
2153
+ "text/plain": [
2154
+ "3"
2155
+ ]
2156
+ },
2157
+ "execution_count": 2,
2158
+ "metadata": {},
2159
+ "output_type": "execute_result"
2160
+ }
2161
+ ],
2162
+ "source": [
2163
+ "nums = [1,7,3,6,5,6]\n",
2164
+ "Solution().pivotIndex(nums)"
2165
+ ]
2166
+ }
2167
+ ],
2168
+ "metadata": {
2169
+ "kernelspec": {
2170
+ "display_name": "Python 3",
2171
+ "language": "python",
2172
+ "name": "python3"
2173
+ },
2174
+ "language_info": {
2175
+ "codemirror_mode": {
2176
+ "name": "ipython",
2177
+ "version": 3
2178
+ },
2179
+ "file_extension": ".py",
2180
+ "mimetype": "text/x-python",
2181
+ "name": "python",
2182
+ "nbconvert_exporter": "python",
2183
+ "pygments_lexer": "ipython3",
2184
+ "version": "3.11.4"
2185
+ },
2186
+ "orig_nbformat": 4,
2187
+ "vscode": {
2188
+ "interpreter": {
2189
+ "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
2190
+ }
2191
+ }
2192
+ },
2193
+ "nbformat": 4,
2194
+ "nbformat_minor": 2
2195
+ }
code/lru_cache_leetcode.ipynb ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "OrderedDict([(1, 1)])\n",
13
+ "OrderedDict([(1, 1), (2, 2)])\n",
14
+ "OrderedDict([(2, 2), (1, 1)])\n",
15
+ "OrderedDict([(1, 1), (3, 3)])\n",
16
+ "OrderedDict([(1, 1), (3, 3)])\n",
17
+ "OrderedDict([(3, 3), (4, 4)])\n",
18
+ "OrderedDict([(3, 3), (4, 4)])\n",
19
+ "OrderedDict([(4, 4), (3, 3)])\n",
20
+ "OrderedDict([(3, 3), (4, 4)])\n"
21
+ ]
22
+ }
23
+ ],
24
+ "source": [
25
+ "from collections import OrderedDict\n",
26
+ "\n",
27
+ "class LRUCache:\n",
28
+ "\n",
29
+ " # initialising capacity\n",
30
+ " def __init__(self, capacity: int):\n",
31
+ " self.cache = OrderedDict()\n",
32
+ " self.capacity = capacity\n",
33
+ "\n",
34
+ " # we return the value of the key\n",
35
+ " # that is queried in O(1) and return -1 if we\n",
36
+ " # don't find the key in out dict / cache.\n",
37
+ " # And also move the key to the end\n",
38
+ " # to show that it was recently used.\n",
39
+ " def get(self, key: int) -> int:\n",
40
+ " if key not in self.cache:\n",
41
+ " return -1\n",
42
+ " else:\n",
43
+ " self.cache.move_to_end(key)\n",
44
+ " return self.cache[key]\n",
45
+ "\n",
46
+ " # first, we add / update the key by conventional methods.\n",
47
+ " # And also move the key to the end to show that it was recently used.\n",
48
+ " # But here we will also check whether the length of our\n",
49
+ " # ordered dictionary has exceeded our capacity,\n",
50
+ " # If so we remove the first key (least recently used)\n",
51
+ " def put(self, key: int, value: int) -> None:\n",
52
+ " self.cache[key] = value\n",
53
+ " self.cache.move_to_end(key)\n",
54
+ " if len(self.cache) > self.capacity:\n",
55
+ " self.cache.popitem(last = False)\n",
56
+ "\n",
57
+ "\n",
58
+ "# RUNNER\n",
59
+ "# initializing our cache with the capacity of 2\n",
60
+ "cache = LRUCache(2)\n",
61
+ "\n",
62
+ "\n",
63
+ "cache.put(1, 1)\n",
64
+ "print(cache.cache)\n",
65
+ "cache.put(2, 2)\n",
66
+ "print(cache.cache)\n",
67
+ "cache.get(1)\n",
68
+ "print(cache.cache)\n",
69
+ "cache.put(3, 3)\n",
70
+ "print(cache.cache)\n",
71
+ "cache.get(2)\n",
72
+ "print(cache.cache)\n",
73
+ "cache.put(4, 4)\n",
74
+ "print(cache.cache)\n",
75
+ "cache.get(1)\n",
76
+ "print(cache.cache)\n",
77
+ "cache.get(3)\n",
78
+ "print(cache.cache)\n",
79
+ "cache.get(4)\n",
80
+ "print(cache.cache)\n",
81
+ "\n",
82
+ "#This code was contributed by Sachin Negi\n"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "markdown",
87
+ "metadata": {},
88
+ "source": [
89
+ "# Stop Words (EASY) - 15 minutes\n",
90
+ "\n",
91
+ "Implement a function that takes a string text and an integer k, and returns the list of words that occur in the text at least k times. The words must be returned in the order of their first occurrence in the text."
92
+ ]
93
+ },
94
+ {
95
+ "cell_type": "code",
96
+ "execution_count": 5,
97
+ "metadata": {},
98
+ "outputs": [
99
+ {
100
+ "name": "stdout",
101
+ "output_type": "stream",
102
+ "text": [
103
+ "{'the': [2, 0], 'brown': [3, 1], 'fox': [1, 2], 'jumps': [1, 3], 'over': [1, 4], 'dog': [1, 7], 'and': [1, 8], 'runs': [1, 9], 'away': [1, 10], 'to': [1, 11], 'a': [1, 12], 'house': [1, 14]}\n"
104
+ ]
105
+ },
106
+ {
107
+ "data": {
108
+ "text/plain": [
109
+ "['the',\n",
110
+ " 'brown',\n",
111
+ " 'fox',\n",
112
+ " 'jumps',\n",
113
+ " 'over',\n",
114
+ " 'dog',\n",
115
+ " 'and',\n",
116
+ " 'runs',\n",
117
+ " 'away',\n",
118
+ " 'to',\n",
119
+ " 'a',\n",
120
+ " 'house']"
121
+ ]
122
+ },
123
+ "execution_count": 5,
124
+ "metadata": {},
125
+ "output_type": "execute_result"
126
+ }
127
+ ],
128
+ "source": [
129
+ "text = \"the brown fox jumps over the brown dog and runs away to a brown house\"\n",
130
+ "k = 2 \n",
131
+ "\n",
132
+ "def stop_words(text, k):\n",
133
+ " cnt = {}\n",
134
+ " for i, w in enumerate(text.split(\" \")):\n",
135
+ " if w not in cnt:\n",
136
+ " cnt[w] = [0, i]\n",
137
+ " cnt[w][0] += 1\n",
138
+ " print(cnt)\n",
139
+ " stop_words = [(i, w) for w, (c, i) in cnt.items() if c >= k]\n",
140
+ " res = [w for _, w in sorted(stop_words)]\n",
141
+ " return res\n",
142
+ "\n",
143
+ "\n",
144
+ "stop_words(text, 1)"
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "markdown",
149
+ "metadata": {},
150
+ "source": [
151
+ "# DPI Software Traffic Deviation Report - Medium SQL - 35 minutes\n",
152
+ "\n",
153
+ "As part of HackerSniff's DPI (Deep Packet Inspection) software analytics, a team needs a list of all the clients and traffic protocols they have used.\n",
154
+ "\n",
155
+ " \n",
156
+ "\n",
157
+ "The result should be in the following format: client, protocol.\n",
158
+ "\n",
159
+ " protocol is a comma-separated list of all the protocols for particular client, ordered descending by total traffic, which calculated as sum of traffic_in and traffic_out.\n",
160
+ " Results should be sorted ascending by client.\n"
161
+ ]
162
+ },
163
+ {
164
+ "cell_type": "code",
165
+ "execution_count": 7,
166
+ "metadata": {},
167
+ "outputs": [],
168
+ "source": [
169
+ "sql = \"\"\"\n",
170
+ "WITH _data AS (\n",
171
+ "SELECT client,\n",
172
+ " protocol,\n",
173
+ " SUM(traffic_in) + SUM(traffic_out) traffic\n",
174
+ "FROM traffic\n",
175
+ "GROUP BY client, protocol\n",
176
+ ")\n",
177
+ "\n",
178
+ "SELECT client,\n",
179
+ " GROUP_CONCAT(protocol ORDER BY traffic DESC) AS protocol\n",
180
+ "FROM _data\n",
181
+ "GROUP BY client\n",
182
+ "ORDER BY client\n",
183
+ "\"\"\"\n"
184
+ ]
185
+ },
186
+ {
187
+ "cell_type": "markdown",
188
+ "metadata": {},
189
+ "source": [
190
+ "# Balanced Sales Array (EASY) \n",
191
+ "\n",
192
+ "Given has an array of sales numbers, what is the index of the smallest index element for which the sums of all elements to the left and to the right are equal. The array may not be reordered.\n",
193
+ "For example, given the array sales = [1, 2, 3, 4, 6],we see that 1+2+3=6,Using zero based indexing,sales[3] = 4 is the value sought.The index is 3."
194
+ ]
195
+ },
196
+ {
197
+ "cell_type": "code",
198
+ "execution_count": 12,
199
+ "metadata": {},
200
+ "outputs": [
201
+ {
202
+ "name": "stdout",
203
+ "output_type": "stream",
204
+ "text": [
205
+ "16\n",
206
+ "16\n",
207
+ "[1, 3, 6, 10, 16]\n",
208
+ "[6, 10, 13, 15, 16]\n"
209
+ ]
210
+ },
211
+ {
212
+ "data": {
213
+ "text/plain": [
214
+ "3"
215
+ ]
216
+ },
217
+ "execution_count": 12,
218
+ "metadata": {},
219
+ "output_type": "execute_result"
220
+ }
221
+ ],
222
+ "source": [
223
+ "data = [1, 2, 3, 4, 6]\n",
224
+ "\n",
225
+ "class Solution:\n",
226
+ " \"\"\"\n",
227
+ " @param sales: a integer array\n",
228
+ " @return: return a Integer\n",
229
+ " \"\"\"\n",
230
+ " import itertools\n",
231
+ " def BalancedSalesArray(self, sales):\n",
232
+ " # write your code here\n",
233
+ " left,right = [],[]\n",
234
+ " leftsum = rightsum = 0\n",
235
+ " n = len(sales)\n",
236
+ " for i in range(n):\n",
237
+ " leftsum += sales[i] \n",
238
+ " left.append(leftsum)\n",
239
+ " rightsum += sales[n-1-i]\n",
240
+ " right.append(rightsum)\n",
241
+ " \n",
242
+ " print(leftsum)\n",
243
+ " print(rightsum)\n",
244
+ "\n",
245
+ " print(left)\n",
246
+ " print(right)\n",
247
+ "\n",
248
+ " n = len(sales)\n",
249
+ " for i in range(n):\n",
250
+ " if left[i] == right[n-1-i]:\n",
251
+ " return i \n",
252
+ " return -1\n",
253
+ " \n",
254
+ " \n",
255
+ "Solution().BalancedSalesArray(data)"
256
+ ]
257
+ }
258
+ ],
259
+ "metadata": {
260
+ "kernelspec": {
261
+ "display_name": "Python 3.10.5 ('pytorch-nightly')",
262
+ "language": "python",
263
+ "name": "python3"
264
+ },
265
+ "language_info": {
266
+ "codemirror_mode": {
267
+ "name": "ipython",
268
+ "version": 3
269
+ },
270
+ "file_extension": ".py",
271
+ "mimetype": "text/x-python",
272
+ "name": "python",
273
+ "nbconvert_exporter": "python",
274
+ "pygments_lexer": "ipython3",
275
+ "version": "3.10.5"
276
+ },
277
+ "orig_nbformat": 4,
278
+ "vscode": {
279
+ "interpreter": {
280
+ "hash": "8a8bcccfb183d1298694efece6cf41240378bc61621e95c864629a40c5876542"
281
+ }
282
+ }
283
+ },
284
+ "nbformat": 4,
285
+ "nbformat_minor": 2
286
+ }
code/m1_gpu_pytorch.ipynb ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "ename": "ModuleNotFoundError",
10
+ "evalue": "No module named 'torch'",
11
+ "output_type": "error",
12
+ "traceback": [
13
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
14
+ "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
15
+ "\u001b[1;32m/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb Cell 1'\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000000?line=0'>1</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtorch\u001b[39;00m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000000?line=1'>2</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtorch\u001b[39;00m \u001b[39mimport\u001b[39;00m nn\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000000?line=2'>3</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtorch\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mutils\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mdata\u001b[39;00m \u001b[39mimport\u001b[39;00m DataLoader\n",
16
+ "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'torch'"
17
+ ]
18
+ }
19
+ ],
20
+ "source": [
21
+ "import torch\n",
22
+ "from torch import nn\n",
23
+ "from torch.utils.data import DataLoader\n",
24
+ "from torchvision import datasets\n",
25
+ "from torchvision.transforms import ToTensor"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 7,
31
+ "metadata": {},
32
+ "outputs": [
33
+ {
34
+ "name": "stdout",
35
+ "output_type": "stream",
36
+ "text": [
37
+ "Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/nightly/cpu\n",
38
+ "Collecting torch\n",
39
+ " Downloading https://download.pytorch.org/whl/nightly/cpu/torch-1.13.0.dev20220703-cp310-none-macosx_11_0_arm64.whl (50.1 MB)\n",
40
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.1/50.1 MB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
41
+ "\u001b[?25hCollecting torchvision\n",
42
+ " Downloading https://download.pytorch.org/whl/nightly/cpu/torchvision-0.14.0.dev20220703-cp310-cp310-macosx_11_0_arm64.whl (1.4 MB)\n",
43
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
44
+ "\u001b[?25hCollecting torchaudio\n",
45
+ " Downloading https://download.pytorch.org/whl/nightly/cpu/torchaudio-0.14.0.dev20220603-cp310-cp310-macosx_11_0_arm64.whl (2.7 MB)\n",
46
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.7/2.7 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n",
47
+ "\u001b[?25hCollecting typing-extensions\n",
48
+ " Using cached typing_extensions-4.3.0-py3-none-any.whl (25 kB)\n",
49
+ "Collecting pillow!=8.3.*,>=5.3.0\n",
50
+ " Downloading Pillow-9.2.0-cp310-cp310-macosx_11_0_arm64.whl (2.8 MB)\n",
51
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
52
+ "\u001b[?25hCollecting requests\n",
53
+ " Using cached requests-2.28.1-py3-none-any.whl (62 kB)\n",
54
+ "Collecting numpy\n",
55
+ " Downloading numpy-1.23.0-cp310-cp310-macosx_11_0_arm64.whl (13.3 MB)\n",
56
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.3/13.3 MB\u001b[0m \u001b[31m44.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
57
+ "\u001b[?25hCollecting certifi>=2017.4.17\n",
58
+ " Downloading certifi-2022.6.15-py3-none-any.whl (160 kB)\n",
59
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m160.2/160.2 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
60
+ "\u001b[?25hCollecting idna<4,>=2.5\n",
61
+ " Using cached idna-3.3-py3-none-any.whl (61 kB)\n",
62
+ "Collecting charset-normalizer<3,>=2\n",
63
+ " Using cached charset_normalizer-2.1.0-py3-none-any.whl (39 kB)\n",
64
+ "Collecting urllib3<1.27,>=1.21.1\n",
65
+ " Using cached urllib3-1.26.9-py2.py3-none-any.whl (138 kB)\n",
66
+ "Installing collected packages: urllib3, typing-extensions, pillow, numpy, idna, charset-normalizer, certifi, torch, requests, torchvision, torchaudio\n",
67
+ "Successfully installed certifi-2022.6.15 charset-normalizer-2.1.0 idna-3.3 numpy-1.23.0 pillow-9.2.0 requests-2.28.1 torch-1.13.0.dev20220703 torchaudio-0.14.0.dev20220603 torchvision-0.14.0.dev20220703 typing-extensions-4.3.0 urllib3-1.26.9\n"
68
+ ]
69
+ }
70
+ ],
71
+ "source": [
72
+ "! pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": 9,
78
+ "metadata": {},
79
+ "outputs": [
80
+ {
81
+ "name": "stdout",
82
+ "output_type": "stream",
83
+ "text": [
84
+ "torch 1.13.0.dev20220703\n",
85
+ "device mps\n",
86
+ "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz\n"
87
+ ]
88
+ },
89
+ {
90
+ "name": "stderr",
91
+ "output_type": "stream",
92
+ "text": [
93
+ "100.0%\n"
94
+ ]
95
+ },
96
+ {
97
+ "name": "stdout",
98
+ "output_type": "stream",
99
+ "text": [
100
+ "Extracting data/cifar-10-python.tar.gz to data\n"
101
+ ]
102
+ },
103
+ {
104
+ "name": "stderr",
105
+ "output_type": "stream",
106
+ "text": [
107
+ "Downloading: \"https://github.com/pytorch/vision/zipball/v0.11.0\" to /Users/johnnydevriese/.cache/torch/hub/v0.11.0.zip\n",
108
+ "/Users/johnnydevriese/miniforge3/envs/pytorch-nightly/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and will be removed in 0.15, please use 'weights' instead.\n",
109
+ " warnings.warn(\n",
110
+ "/Users/johnnydevriese/miniforge3/envs/pytorch-nightly/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=None`.\n",
111
+ " warnings.warn(msg)\n"
112
+ ]
113
+ },
114
+ {
115
+ "name": "stdout",
116
+ "output_type": "stream",
117
+ "text": [
118
+ "Epoch: 001/001 | Batch 0000/1406 | Loss: 2.5887\n",
119
+ "Epoch: 001/001 | Batch 0100/1406 | Loss: 2.4339\n",
120
+ "Epoch: 001/001 | Batch 0200/1406 | Loss: 2.0386\n",
121
+ "Epoch: 001/001 | Batch 0300/1406 | Loss: 2.0561\n",
122
+ "Epoch: 001/001 | Batch 0400/1406 | Loss: 2.1730\n",
123
+ "Epoch: 001/001 | Batch 0500/1406 | Loss: 2.1067\n"
124
+ ]
125
+ },
126
+ {
127
+ "ename": "KeyboardInterrupt",
128
+ "evalue": "",
129
+ "output_type": "error",
130
+ "traceback": [
131
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
132
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
133
+ "\u001b[1;32m/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb Cell 3'\u001b[0m in \u001b[0;36m<cell line: 205>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=256'>257</a>\u001b[0m model \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39mto(DEVICE)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=258'>259</a>\u001b[0m optimizer \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39moptim\u001b[39m.\u001b[39mAdam(model\u001b[39m.\u001b[39mparameters(), lr\u001b[39m=\u001b[39m\u001b[39m0.0005\u001b[39m)\n\u001b[0;32m--> <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=260'>261</a>\u001b[0m minibatch_loss_list, train_acc_list, valid_acc_list \u001b[39m=\u001b[39m train_classifier_simple_v2(\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=261'>262</a>\u001b[0m model\u001b[39m=\u001b[39;49mmodel,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=262'>263</a>\u001b[0m num_epochs\u001b[39m=\u001b[39;49mNUM_EPOCHS,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=263'>264</a>\u001b[0m train_loader\u001b[39m=\u001b[39;49mtrain_loader,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=264'>265</a>\u001b[0m valid_loader\u001b[39m=\u001b[39;49mvalid_loader,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=265'>266</a>\u001b[0m test_loader\u001b[39m=\u001b[39;49mtest_loader,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=266'>267</a>\u001b[0m optimizer\u001b[39m=\u001b[39;49moptimizer,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=267'>268</a>\u001b[0m best_model_save_path\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=268'>269</a>\u001b[0m device\u001b[39m=\u001b[39;49mDEVICE,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=269'>270</a>\u001b[0m scheduler_on\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mvalid_acc\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=270'>271</a>\u001b[0m logging_interval\u001b[39m=\u001b[39;49m\u001b[39m100\u001b[39;49m,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=271'>272</a>\u001b[0m )\n",
134
+ "\u001b[1;32m/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb Cell 3'\u001b[0m in \u001b[0;36mtrain_classifier_simple_v2\u001b[0;34m(model, num_epochs, train_loader, valid_loader, test_loader, optimizer, device, logging_interval, best_model_save_path, scheduler, skip_train_acc, scheduler_on)\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=66'>67</a>\u001b[0m targets \u001b[39m=\u001b[39m targets\u001b[39m.\u001b[39mto(device)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=68'>69</a>\u001b[0m \u001b[39m# ## FORWARD AND BACK PROP\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=69'>70</a>\u001b[0m logits \u001b[39m=\u001b[39m model(features)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=70'>71</a>\u001b[0m loss \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mnn\u001b[39m.\u001b[39mfunctional\u001b[39m.\u001b[39mcross_entropy(logits, targets)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/johnnydevriese/projects/jupyter/m1_gpu_pytorch.ipynb#ch0000001?line=71'>72</a>\u001b[0m optimizer\u001b[39m.\u001b[39mzero_grad()\n",
135
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-nightly/lib/python3.10/site-packages/torch/nn/modules/module.py:1186\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1182\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1183\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1184\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1185\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1186\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49m\u001b[39minput\u001b[39;49m, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1187\u001b[0m \u001b[39m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1188\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[39m=\u001b[39m [], []\n",
136
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-nightly/lib/python3.10/site-packages/torchvision/models/vgg.py:66\u001b[0m, in \u001b[0;36mVGG.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mforward\u001b[39m(\u001b[39mself\u001b[39m, x: torch\u001b[39m.\u001b[39mTensor) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m torch\u001b[39m.\u001b[39mTensor:\n\u001b[0;32m---> 66\u001b[0m x \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfeatures(x)\n\u001b[1;32m 67\u001b[0m x \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mavgpool(x)\n\u001b[1;32m 68\u001b[0m x \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mflatten(x, \u001b[39m1\u001b[39m)\n",
137
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-nightly/lib/python3.10/site-packages/torch/nn/modules/module.py:1186\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1182\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1183\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1184\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1185\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1186\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49m\u001b[39minput\u001b[39;49m, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1187\u001b[0m \u001b[39m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1188\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[39m=\u001b[39m [], []\n",
138
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-nightly/lib/python3.10/site-packages/torch/nn/modules/container.py:141\u001b[0m, in \u001b[0;36mSequential.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mforward\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39minput\u001b[39m):\n\u001b[1;32m 140\u001b[0m \u001b[39mfor\u001b[39;00m module \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m:\n\u001b[0;32m--> 141\u001b[0m \u001b[39minput\u001b[39m \u001b[39m=\u001b[39m module(\u001b[39minput\u001b[39;49m)\n\u001b[1;32m 142\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39minput\u001b[39m\n",
139
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-nightly/lib/python3.10/site-packages/torch/nn/modules/module.py:1186\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1182\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1183\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1184\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1185\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1186\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49m\u001b[39minput\u001b[39;49m, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1187\u001b[0m \u001b[39m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1188\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[39m=\u001b[39m [], []\n",
140
+ "File \u001b[0;32m~/miniforge3/envs/pytorch-nightly/lib/python3.10/site-packages/torch/nn/modules/batchnorm.py:150\u001b[0m, in \u001b[0;36m_BatchNorm.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtraining \u001b[39mand\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtrack_running_stats:\n\u001b[1;32m 148\u001b[0m \u001b[39m# TODO: if statement only here to tell the jit to skip emitting this when it is None\u001b[39;00m\n\u001b[1;32m 149\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnum_batches_tracked \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m: \u001b[39m# type: ignore[has-type]\u001b[39;00m\n\u001b[0;32m--> 150\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mnum_batches_tracked\u001b[39m.\u001b[39;49madd_(\u001b[39m1\u001b[39;49m) \u001b[39m# type: ignore[has-type]\u001b[39;00m\n\u001b[1;32m 151\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmomentum \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m: \u001b[39m# use cumulative moving average\u001b[39;00m\n\u001b[1;32m 152\u001b[0m exponential_average_factor \u001b[39m=\u001b[39m \u001b[39m1.0\u001b[39m \u001b[39m/\u001b[39m \u001b[39mfloat\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnum_batches_tracked)\n",
141
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
142
+ ]
143
+ }
144
+ ],
145
+ "source": [
146
+ "#!/usr/bin/env python\n",
147
+ "# coding: utf-8\n",
148
+ "\n",
149
+ "import argparse\n",
150
+ "import os\n",
151
+ "import random\n",
152
+ "import time\n",
153
+ "\n",
154
+ "import numpy as np\n",
155
+ "import torch\n",
156
+ "import torchvision\n",
157
+ "from torch.utils.data import DataLoader\n",
158
+ "from torch.utils.data import SubsetRandomSampler\n",
159
+ "from torchvision import datasets, transforms\n",
160
+ "\n",
161
+ "\n",
162
+ "def set_all_seeds(seed):\n",
163
+ " os.environ[\"PL_GLOBAL_SEED\"] = str(seed)\n",
164
+ " random.seed(seed)\n",
165
+ " np.random.seed(seed)\n",
166
+ " torch.manual_seed(seed)\n",
167
+ " torch.cuda.manual_seed_all(seed)\n",
168
+ "\n",
169
+ "\n",
170
+ "def compute_accuracy(model, data_loader, device):\n",
171
+ " model.eval()\n",
172
+ " with torch.no_grad():\n",
173
+ " correct_pred, num_examples = 0, 0\n",
174
+ " for i, (features, targets) in enumerate(data_loader):\n",
175
+ "\n",
176
+ " features = features.to(device)\n",
177
+ " targets = targets.to(device)\n",
178
+ "\n",
179
+ " logits = model(features)\n",
180
+ " _, predicted_labels = torch.max(logits, 1)\n",
181
+ " num_examples += targets.size(0)\n",
182
+ " correct_pred += (predicted_labels.cpu() == targets.cpu()).sum()\n",
183
+ " return correct_pred.float() / num_examples * 100\n",
184
+ "\n",
185
+ "\n",
186
+ "def train_classifier_simple_v2(\n",
187
+ " model,\n",
188
+ " num_epochs,\n",
189
+ " train_loader,\n",
190
+ " valid_loader,\n",
191
+ " test_loader,\n",
192
+ " optimizer,\n",
193
+ " device,\n",
194
+ " logging_interval=50,\n",
195
+ " best_model_save_path=None,\n",
196
+ " scheduler=None,\n",
197
+ " skip_train_acc=False,\n",
198
+ " scheduler_on=\"valid_acc\",\n",
199
+ "):\n",
200
+ "\n",
201
+ " start_time = time.time()\n",
202
+ " minibatch_loss_list, train_acc_list, valid_acc_list = [], [], []\n",
203
+ " best_valid_acc, best_epoch = -float(\"inf\"), 0\n",
204
+ "\n",
205
+ " for epoch in range(num_epochs):\n",
206
+ "\n",
207
+ " epoch_start_time = time.time()\n",
208
+ " model.train()\n",
209
+ " for batch_idx, (features, targets) in enumerate(train_loader):\n",
210
+ "\n",
211
+ " features = features.to(device)\n",
212
+ " targets = targets.to(device)\n",
213
+ "\n",
214
+ " # ## FORWARD AND BACK PROP\n",
215
+ " logits = model(features)\n",
216
+ " loss = torch.nn.functional.cross_entropy(logits, targets)\n",
217
+ " optimizer.zero_grad()\n",
218
+ "\n",
219
+ " loss.backward()\n",
220
+ "\n",
221
+ " # ## UPDATE MODEL PARAMETERS\n",
222
+ " optimizer.step()\n",
223
+ "\n",
224
+ " # ## LOGGING\n",
225
+ " minibatch_loss_list.append(loss.item())\n",
226
+ " if not batch_idx % logging_interval:\n",
227
+ " print(\n",
228
+ " f\"Epoch: {epoch+1:03d}/{num_epochs:03d} \"\n",
229
+ " f\"| Batch {batch_idx:04d}/{len(train_loader):04d} \"\n",
230
+ " f\"| Loss: {loss:.4f}\"\n",
231
+ " )\n",
232
+ "\n",
233
+ " model.eval()\n",
234
+ "\n",
235
+ " elapsed = (time.time() - epoch_start_time) / 60\n",
236
+ " print(f\"Time / epoch without evaluation: {elapsed:.2f} min\")\n",
237
+ " with torch.no_grad(): # save memory during inference\n",
238
+ " if not skip_train_acc:\n",
239
+ " train_acc = compute_accuracy(model, train_loader, device=device).item()\n",
240
+ " else:\n",
241
+ " train_acc = float(\"nan\")\n",
242
+ " valid_acc = compute_accuracy(model, valid_loader, device=device).item()\n",
243
+ " train_acc_list.append(train_acc)\n",
244
+ " valid_acc_list.append(valid_acc)\n",
245
+ "\n",
246
+ " if valid_acc > best_valid_acc:\n",
247
+ " best_valid_acc, best_epoch = valid_acc, epoch + 1\n",
248
+ " if best_model_save_path:\n",
249
+ " torch.save(model.state_dict(), best_model_save_path)\n",
250
+ "\n",
251
+ " print(\n",
252
+ " f\"Epoch: {epoch+1:03d}/{num_epochs:03d} \"\n",
253
+ " f\"| Train: {train_acc :.2f}% \"\n",
254
+ " f\"| Validation: {valid_acc :.2f}% \"\n",
255
+ " f\"| Best Validation \"\n",
256
+ " f\"(Ep. {best_epoch:03d}): {best_valid_acc :.2f}%\"\n",
257
+ " )\n",
258
+ "\n",
259
+ " elapsed = (time.time() - start_time) / 60\n",
260
+ " print(f\"Time elapsed: {elapsed:.2f} min\")\n",
261
+ "\n",
262
+ " if scheduler is not None:\n",
263
+ "\n",
264
+ " if scheduler_on == \"valid_acc\":\n",
265
+ " scheduler.step(valid_acc_list[-1])\n",
266
+ " elif scheduler_on == \"minibatch_loss\":\n",
267
+ " scheduler.step(minibatch_loss_list[-1])\n",
268
+ " else:\n",
269
+ " raise ValueError(\"Invalid `scheduler_on` choice.\")\n",
270
+ "\n",
271
+ " elapsed = (time.time() - start_time) / 60\n",
272
+ " print(f\"Total Training Time: {elapsed:.2f} min\")\n",
273
+ "\n",
274
+ " test_acc = compute_accuracy(model, test_loader, device=device)\n",
275
+ " print(f\"Test accuracy {test_acc :.2f}%\")\n",
276
+ "\n",
277
+ " elapsed = (time.time() - start_time) / 60\n",
278
+ " print(f\"Total Time: {elapsed:.2f} min\")\n",
279
+ "\n",
280
+ " return minibatch_loss_list, train_acc_list, valid_acc_list\n",
281
+ "\n",
282
+ "\n",
283
+ "def get_dataloaders_cifar10(\n",
284
+ " batch_size,\n",
285
+ " num_workers=0,\n",
286
+ " validation_fraction=None,\n",
287
+ " train_transforms=None,\n",
288
+ " test_transforms=None,\n",
289
+ "):\n",
290
+ "\n",
291
+ " if train_transforms is None:\n",
292
+ " train_transforms = transforms.ToTensor()\n",
293
+ "\n",
294
+ " if test_transforms is None:\n",
295
+ " test_transforms = transforms.ToTensor()\n",
296
+ "\n",
297
+ " train_dataset = datasets.CIFAR10(\n",
298
+ " root=\"data\", train=True, transform=train_transforms, download=True\n",
299
+ " )\n",
300
+ "\n",
301
+ " valid_dataset = datasets.CIFAR10(root=\"data\", train=True, transform=test_transforms)\n",
302
+ "\n",
303
+ " test_dataset = datasets.CIFAR10(root=\"data\", train=False, transform=test_transforms)\n",
304
+ "\n",
305
+ " if validation_fraction is not None:\n",
306
+ " num = int(validation_fraction * 50000)\n",
307
+ " train_indices = torch.arange(0, 50000 - num)\n",
308
+ " valid_indices = torch.arange(50000 - num, 50000)\n",
309
+ "\n",
310
+ " train_sampler = SubsetRandomSampler(train_indices)\n",
311
+ " valid_sampler = SubsetRandomSampler(valid_indices)\n",
312
+ "\n",
313
+ " valid_loader = DataLoader(\n",
314
+ " dataset=valid_dataset,\n",
315
+ " batch_size=batch_size,\n",
316
+ " num_workers=num_workers,\n",
317
+ " sampler=valid_sampler,\n",
318
+ " )\n",
319
+ "\n",
320
+ " train_loader = DataLoader(\n",
321
+ " dataset=train_dataset,\n",
322
+ " batch_size=batch_size,\n",
323
+ " num_workers=num_workers,\n",
324
+ " drop_last=True,\n",
325
+ " sampler=train_sampler,\n",
326
+ " )\n",
327
+ "\n",
328
+ " else:\n",
329
+ " train_loader = DataLoader(\n",
330
+ " dataset=train_dataset,\n",
331
+ " batch_size=batch_size,\n",
332
+ " num_workers=num_workers,\n",
333
+ " drop_last=True,\n",
334
+ " shuffle=True,\n",
335
+ " )\n",
336
+ "\n",
337
+ " test_loader = DataLoader(\n",
338
+ " dataset=test_dataset,\n",
339
+ " batch_size=batch_size,\n",
340
+ " num_workers=num_workers,\n",
341
+ " shuffle=False,\n",
342
+ " )\n",
343
+ "\n",
344
+ " if validation_fraction is None:\n",
345
+ " return train_loader, test_loader\n",
346
+ " else:\n",
347
+ " return train_loader, valid_loader, test_loader\n",
348
+ "\n",
349
+ "\n",
350
+ "if __name__ == \"__main__\":\n",
351
+ "\n",
352
+ " # parser = argparse.ArgumentParser()\n",
353
+ " # parser.add_argument(\n",
354
+ " # \"--device\", type=str, required=True, help=\"Which GPU device to use.\"\n",
355
+ " # )\n",
356
+ "\n",
357
+ " # args = parser.parse_args()\n",
358
+ "\n",
359
+ "\n",
360
+ " RANDOM_SEED = 123\n",
361
+ " BATCH_SIZE = 32\n",
362
+ " NUM_EPOCHS = 1\n",
363
+ " # DEVICE = torch.device(args.device)\n",
364
+ " # Apple’s Metal Performance Shaders (MPS)\n",
365
+ " DEVICE = \"mps\"\n",
366
+ "\n",
367
+ " print('torch', torch.__version__)\n",
368
+ " print('device', DEVICE)\n",
369
+ "\n",
370
+ " train_transforms = torchvision.transforms.Compose(\n",
371
+ " [\n",
372
+ " torchvision.transforms.Resize((256, 256)),\n",
373
+ " torchvision.transforms.RandomCrop((224, 224)),\n",
374
+ " torchvision.transforms.ToTensor(),\n",
375
+ " torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),\n",
376
+ " ]\n",
377
+ " )\n",
378
+ "\n",
379
+ " test_transforms = torchvision.transforms.Compose(\n",
380
+ " [\n",
381
+ " torchvision.transforms.Resize((256, 256)),\n",
382
+ " torchvision.transforms.CenterCrop((224, 224)),\n",
383
+ " torchvision.transforms.ToTensor(),\n",
384
+ " torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),\n",
385
+ " ]\n",
386
+ " )\n",
387
+ "\n",
388
+ " train_loader, valid_loader, test_loader = get_dataloaders_cifar10(\n",
389
+ " batch_size=BATCH_SIZE,\n",
390
+ " validation_fraction=0.1,\n",
391
+ " train_transforms=train_transforms,\n",
392
+ " test_transforms=test_transforms,\n",
393
+ " num_workers=2,\n",
394
+ " )\n",
395
+ "\n",
396
+ " model = torch.hub.load(\n",
397
+ " \"pytorch/vision:v0.11.0\", \"vgg16_bn\", pretrained=False\n",
398
+ " )\n",
399
+ "\n",
400
+ " model.classifier[-1] = torch.nn.Linear(\n",
401
+ " in_features=4096, out_features=10 # as in original\n",
402
+ " ) # number of class labels in Cifar-10)\n",
403
+ "\n",
404
+ " model = model.to(DEVICE)\n",
405
+ "\n",
406
+ " optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)\n",
407
+ "\n",
408
+ " minibatch_loss_list, train_acc_list, valid_acc_list = train_classifier_simple_v2(\n",
409
+ " model=model,\n",
410
+ " num_epochs=NUM_EPOCHS,\n",
411
+ " train_loader=train_loader,\n",
412
+ " valid_loader=valid_loader,\n",
413
+ " test_loader=test_loader,\n",
414
+ " optimizer=optimizer,\n",
415
+ " best_model_save_path=None,\n",
416
+ " device=DEVICE,\n",
417
+ " scheduler_on=\"valid_acc\",\n",
418
+ " logging_interval=100,\n",
419
+ " )"
420
+ ]
421
+ },
422
+ {
423
+ "cell_type": "code",
424
+ "execution_count": 10,
425
+ "metadata": {},
426
+ "outputs": [
427
+ {
428
+ "data": {
429
+ "text/plain": [
430
+ "True"
431
+ ]
432
+ },
433
+ "execution_count": 10,
434
+ "metadata": {},
435
+ "output_type": "execute_result"
436
+ }
437
+ ],
438
+ "source": [
439
+ "import torch\n",
440
+ "\n",
441
+ "torch.has_mps"
442
+ ]
443
+ }
444
+ ],
445
+ "metadata": {
446
+ "kernelspec": {
447
+ "display_name": "Python 3.10.5 ('pytorch-nightly')",
448
+ "language": "python",
449
+ "name": "python3"
450
+ },
451
+ "language_info": {
452
+ "codemirror_mode": {
453
+ "name": "ipython",
454
+ "version": 3
455
+ },
456
+ "file_extension": ".py",
457
+ "mimetype": "text/x-python",
458
+ "name": "python",
459
+ "nbconvert_exporter": "python",
460
+ "pygments_lexer": "ipython3",
461
+ "version": "3.10.5"
462
+ },
463
+ "orig_nbformat": 4,
464
+ "vscode": {
465
+ "interpreter": {
466
+ "hash": "8a8bcccfb183d1298694efece6cf41240378bc61621e95c864629a40c5876542"
467
+ }
468
+ }
469
+ },
470
+ "nbformat": 4,
471
+ "nbformat_minor": 2
472
+ }
code/main.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import print_function
2
+ import argparse
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ import torch.optim as optim
7
+ from torchvision import datasets, transforms
8
+ from torch.optim.lr_scheduler import StepLR
9
+
10
+
11
+ class Net(nn.Module):
12
+ def __init__(self):
13
+ super(Net, self).__init__()
14
+ self.conv1 = nn.Conv2d(1, 32, 3, 1)
15
+ self.conv2 = nn.Conv2d(32, 64, 3, 1)
16
+ self.dropout1 = nn.Dropout(0.25)
17
+ self.dropout2 = nn.Dropout(0.5)
18
+ self.fc1 = nn.Linear(9216, 128)
19
+ self.fc2 = nn.Linear(128, 10)
20
+
21
+ def forward(self, x):
22
+ x = self.conv1(x)
23
+ x = F.relu(x)
24
+ x = self.conv2(x)
25
+ x = F.relu(x)
26
+ x = F.max_pool2d(x, 2)
27
+ x = self.dropout1(x)
28
+ x = torch.flatten(x, 1)
29
+ x = self.fc1(x)
30
+ x = F.relu(x)
31
+ x = self.dropout2(x)
32
+ x = self.fc2(x)
33
+ output = F.log_softmax(x, dim=1)
34
+ return output
35
+
36
+
37
+ def train(args, model, device, train_loader, optimizer, epoch):
38
+ model.train()
39
+ for batch_idx, (data, target) in enumerate(train_loader):
40
+ data, target = data.to(device), target.to(device)
41
+ optimizer.zero_grad()
42
+ output = model(data)
43
+ loss = F.nll_loss(output, target)
44
+ loss.backward()
45
+ optimizer.step()
46
+ if batch_idx % args.log_interval == 0:
47
+ print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
48
+ epoch, batch_idx * len(data), len(train_loader.dataset),
49
+ 100. * batch_idx / len(train_loader), loss.item()))
50
+ if args.dry_run:
51
+ break
52
+
53
+
54
+ def test(model, device, test_loader):
55
+ model.eval()
56
+ test_loss = 0
57
+ correct = 0
58
+ with torch.no_grad():
59
+ for data, target in test_loader:
60
+ data, target = data.to(device), target.to(device)
61
+ output = model(data)
62
+ test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
63
+ pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
64
+ correct += pred.eq(target.view_as(pred)).sum().item()
65
+
66
+ test_loss /= len(test_loader.dataset)
67
+
68
+ print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
69
+ test_loss, correct, len(test_loader.dataset),
70
+ 100. * correct / len(test_loader.dataset)))
71
+
72
+
73
+ def main():
74
+ # Training settings
75
+ parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
76
+ parser.add_argument('--batch-size', type=int, default=64, metavar='N',
77
+ help='input batch size for training (default: 64)')
78
+ parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
79
+ help='input batch size for testing (default: 1000)')
80
+ parser.add_argument('--epochs', type=int, default=14, metavar='N',
81
+ help='number of epochs to train (default: 14)')
82
+ parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
83
+ help='learning rate (default: 1.0)')
84
+ parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
85
+ help='Learning rate step gamma (default: 0.7)')
86
+ parser.add_argument('--no-cuda', action='store_true', default=False,
87
+ help='disables CUDA training')
88
+ parser.add_argument('--dry-run', action='store_true', default=False,
89
+ help='quickly check a single pass')
90
+ parser.add_argument('--seed', type=int, default=1, metavar='S',
91
+ help='random seed (default: 1)')
92
+ parser.add_argument('--log-interval', type=int, default=10, metavar='N',
93
+ help='how many batches to wait before logging training status')
94
+ parser.add_argument('--save-model', action='store_true', default=False,
95
+ help='For Saving the current Model')
96
+ args = parser.parse_args()
97
+ use_cuda = not args.no_cuda and torch.cuda.is_available()
98
+
99
+ torch.manual_seed(args.seed)
100
+
101
+ device = torch.device("cuda" if use_cuda else "cpu")
102
+
103
+ train_kwargs = {'batch_size': args.batch_size}
104
+ test_kwargs = {'batch_size': args.test_batch_size}
105
+ if use_cuda:
106
+ cuda_kwargs = {'num_workers': 1,
107
+ 'pin_memory': True,
108
+ 'shuffle': True}
109
+ train_kwargs.update(cuda_kwargs)
110
+ test_kwargs.update(cuda_kwargs)
111
+
112
+ transform=transforms.Compose([
113
+ transforms.ToTensor(),
114
+ transforms.Normalize((0.1307,), (0.3081,))
115
+ ])
116
+ dataset1 = datasets.MNIST('../data', train=True, download=True,
117
+ transform=transform)
118
+ dataset2 = datasets.MNIST('../data', train=False,
119
+ transform=transform)
120
+ train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
121
+ test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
122
+
123
+ model = Net().to(device)
124
+ optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
125
+
126
+ scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
127
+ for epoch in range(1, args.epochs + 1):
128
+ train(args, model, device, train_loader, optimizer, epoch)
129
+ test(model, device, test_loader)
130
+ scheduler.step()
131
+
132
+ if args.save_model:
133
+ torch.save(model.state_dict(), "mnist_cnn.pt")
134
+
135
+
136
+ if __name__ == '__main__':
137
+ main()
code/netflix.jpg ADDED
code/netflix.png ADDED
code/netflix_price_increase.ipynb ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 34,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAESCAYAAAD+GW7gAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAABEkUlEQVR4nO3dd2BUZb7/8feZMy2ZzCSZEBAFFFBBBRVpCkgTBBE2tADrqov7W+td14IICoi7lrWgd13cqyvXVS+yAqE3EQQRpShFUbEhKt2QZFJmJpl25vz+mGSkJKTOkPJ9/aOcnJnvc4B8ODnzPN9H0XVdRwghRKNnONsDEEIIER8S+EII0URI4AshRBMhgS+EEE2EBL4QQjQREvhCCNFESOA3IYcPH6ZDhw5kZWWddPz1119n6tSplb4+KyuLefPmAeDxeJgwYQI33ngjb7zxBl26dAHgnXfe4bXXXqvWuDRN48477yQ3N7fCcw4dOsS9995brfetTJcuXTh8+HCt3uPOO+9kyZIlADz00EPs37//tHO2bt1KRkYGGRkZ9O7dm6uvvjr666VLlzJhwoRajSFWBg4cyJAhQ8jIyGDkyJHceOONPP3004TD4dPOzc7OrrfXIX5lPNsDEPFlMBh49tln6dq1K+3atavWa3ft2sVFF10EwDfffENeXh7r16/n8OHD/OMf/wDgt7/9bbXH9O9//5sePXrQrFmzCs85evQoP/30U7XfO57uu+8+Jk2axIIFC1AUJXq8V69eLF++HIDZs2eTn5/PY489Fv36qFGj4j7Wqpo1axadO3cGIBAIcMstt/Cf//yHm2+++aTzWrRowfz588/GEEU1SOA3MVarldtuu42HHnqI+fPnYzabT/p6IBBg1qxZ7NixA03TuPTSS5k+fTrbtm1j48aNbNmyBaPRyNtvv012djYZGRm8+OKL0deXBdo999zDyJEjeeqpp+jXrx9///vf2bNnD6+//joGw68/WJaUlPDWW2+xcuVKAPbv38+0adMIBALous7YsWOZMGEC06dPJzs7m//3//4fr7/+Oq+++iobNmzA5/NRUlLClClTGDx4MLNnz+bIkSPk5ORw5MgRWrRowfPPP0/z5s3ZuXMnTzzxBIqi0Llz5+idajgc5umnn2bPnj14vV50XefJJ5+ka9euTJ06lYKCAg4dOkT//v259dZbmTp1KsePH+fcc88lLy8vei2tW7fGbrezYcMGBg0aVKU/j8OHDzNixAg+++wzZs+ezcGDB8nOziYnJ4fLLruMnj17smzZMg4fPszkyZMZPnw4AK+88grr1q0jHA5z3nnnMXPmTFq0aHHSe0+YMIHbbruNIUOGAPD8888DMHHiRKZMmUJ+fj4A/fr14/777690rGazma5du/Ljjz9y+PBhfve739G+fXuOHDnCM888wx/+8Ac+++wzQqEQzz//PJs2bUJVVbp06cLMmTMxm81VGreIHXmk0wTdfffdJCYm8t///d+nfe21115DVVWWLFnCihUraN68ObNmzWLw4MEMHDiQiRMn8vvf/54nn3ySNm3asHz5ciwWy2nv06xZM5555hlmzJjB+vXrWbZsGS+88MJJYQ+wfft22rZtS2pqKhB5vDRw4ECWLFnCa6+9xs6dO1EUJVrv9ddf58iRI2zdupW5c+eycuVKHnjggehPGAA7d+7kpZdeYu3atSQkJDB//nwCgQD33XcfU6dOZdmyZfTs2ROfzwfAnj17OH78OAsWLGDNmjWMGjWKOXPmRN/P5/OxevVqJk+ezF//+leuuOIKVq9ezfTp00/7qaNPnz6sX7++xn82u3bt4p///CdLly5l8+bN7N+/n3nz5jFjxgxmz54NwLJly/j+++/Jyspi+fLl9OvXj+nTp5/2XpmZmdHHTZqmsWLFCjIzM1m4cCGtWrVi6dKlzJs3jwMHDuB2uysdW3Z2Nh988AE9e/YE4JdffuGee+7hvffeIz09PXref/7zH/bu3cvy5ctZtWoVXq+XNWvWVHncInbkDr8JMhgMPP/884wcOZI+ffqc9LVNmzbhdrvZunUrAMFgkLS0tBrV6dOnD8OGDePee+/l7bffxul0nnbOjz/+SJs2baK/Hjx4MFOmTOGLL77gmmuuYfr06af9I3Heeefx3HPPsXLlSg4cOBC9My/To0cPkpKSALj00kspLCzk+++/x2g0cs011wAwfPjw6GOVLl26kJyczPz58zl06BCffPIJNpst+n5du3aN/v/WrVuZMmUKAOeff340/Mq0atWKd999t0a/XxB5/GO32wFo3rw51157LQBt2rShoKAAgA8++IAvv/ySMWPGAJGfUEpKSk57r2HDhvHcc8+Rk5PD119/zQUXXMAFF1zAtddeyx133MGxY8fo1asXkyZNitY81UMPPYTVaiUcDmMymcjMzGTIkCEcPnwYo9HIlVdeedpryj6zsFqtAPz9738HIo+8qjJuETsS+E1Uy5Yt+ctf/sKUKVMYOXJk9Hg4HObRRx+lX79+AHi9Xvx+f41q6LrO/v37adasGZ9//jndunU77RxFUU76EHDAgAG89957bN26lW3btvHPf/4zepdaZu/evdxzzz1MnDiR3r170717d/7yl79Ev14WNGXvX9Yu6tS2UUZj5K//pk2beOqpp7jtttu47rrraNeuHStWrIiel5iYWO77nfgeJ/761H+gquPUR2ynvj9E/oz++Mc/ctNNNwGRx3CFhYWnnZeQkMCQIUNYtWoVn332GZmZmQBcfvnlbNiwgW3btrF9+3YyMzOZM2cOnTp1Ou09TnyGX95Yyxvfqcdyc3MJh8NVHreIHXmk04QNHTqUvn378tZbb0WP9enTh3nz5hEIBAiHw8yYMSP6jF5VVUKhUJXf/80336S4uJjFixfz5ptv8sUXX5x2Ttu2bTl06FD015MmTWLNmjXceOONzJw5k6SkJA4ePIiqqgSDQQB27NhBp06duO222+jRowcbNmxA07QzjqVDhw7ous6HH34IwIYNG6Jhs2XLFgYMGMBNN91Ep06deP/99yt8v2uvvZYFCxYAkQ+SP/nkk5O+fvjw4Wp/GF5dffr0YdGiRXg8HgBeeuklHn744XLPHTduHEuXLmX37t3RZ/mzZs3if/7nfxg0aBDTpk3jwgsvZN++fXU2vmuuuYZVq1ZF/w49/vjjrF69ulrjFrEhd/hN3PTp09m1a1f01/fccw/PPvsso0aNQtM0LrnkkuiUzb59+/LMM88AlPuj/Im+/vprXn31VRYtWkSLFi149NFHmTRpEkuXLo0+boHII4xp06ZRVFSEw+HgnnvuYdq0aSxYsABVVRk0aBDdu3ensLAQi8XC2LFjefXVV1m3bh033HAD4XCYAQMGUFhYGA2S8phMJv75z3/y+OOP8+KLL3LJJZdEH1VNmDCBSZMmMWLECEKhEL17945+sHiqmTNn8sgjj3DDDTdwzjnn0LFjx5O+/tFHH502g6WuZWZmkp2dzbhx41AUhZYtW0b/XE7VqVMnVFVl6NCh0c9afv/73zN16lSGDx+O2WymQ4cO3HjjjXU2vgkTJnDkyBFGjx6Nruv06NGDW265BYPBUOVxi9hQpD2yONteffVVVFXl9ttvP9tDqZWDBw/y0EMPnTYtU4j6Qh7piLPuD3/4A9u3bycnJ+dsD6VW/v73v/Pkk09K2It6S+7whRCiiZA7fCGEaCJiFvh79uzhlltuASAvL4+7776b3/3ud0yYMIGDBw/GqqwQQogKxGSWzpw5c1ixYgUJCQlAZEn3iBEjGDZsGNu3bz9tsU15wuEwmlbzp02qqtTq9fW1VmOv15ivLd71GvO1xbteQ7o2k0mt8GsxCfw2bdowe/bs6Bzb3bt306FDByZOnMh5553HtGnTKn0PTdMpKCiu8RhSUhJr9fr6Wqux12vM1xbveo352uJdryFdW3p6+aumIUaBX7b0usyRI0dwOBy8+eabvPzyy8yZM4f77rvvjO+hqgopKYlnPOfMrzfU6vX1tVZjr9eYry3e9RrztcW7XmO5trgsvEpJSWHgwIFApMd2eU27TiV3+E2zXmO+tnjXa8zXFu96DenaznSHH5dZOl27do0uad+xYwcXXnhhPMoKIYQ4QVwCf8qUKSxfvpwJEybw0Ucfcdddd8WjrBBCiBPE7JFOq1atWLhwIRBpZ/vGG2/EqpQQQogqkIVXQgjRREjgCyFEEyGBL4QQTYT0wxdCiHrCvXMH+SuWcrDYi2624Bydib1b9zp7f7nDF0KIesC9cwe5895C0cOY0tIIuVxkv/oy7p076qyGBL4QQtQD7g/WY1CNBPLzUdAxWK0oRhOuJVl1VkMe6QghxFmk6GG0oiJK9u1DV40nbaCjWCwEs3+ps1pyhy+EEGeBogC+EoLHjxP0eDDYHRAInHSO7vdjanFOndWUwBdCiDhTtBCay4Xf5UILhQBIHjwUXQuh+/3oQNjnQw8FcY7OrLO68khHCCHiRkcvLiZYVERY0076iq1zZ+BmCtevJewtxuh01vksHQl8IYSIh1CQUGEhms9X4SmJl16KXlKMo2U6pqv71vkQJPCFECKWdB292EOwyI0eDld4WvFXX5K3aCHBY0c5Dlw8/yrUpKQ6HYoEvhBCxErAT6ioEM0fqPiUI4fJW7SQkq/3Rg4oCi1GZmCw2ep8OBL4QghRxxQ9jOZ2E/J60MPl700bKiwkf8Uy3Fs+Aj1yjrVDR9Iyx3Nu7x54NKXc19WGBL4QQtQRRQG9pIRgYWF09s2pwoEAhe+vp2DtanS/HwBTi3Nwjskk8fIrTpqHX9ck8IUQog4oWgjNXUSwuCR6x34iPRzGu/NT8pYsRst3AWCwJZE6/Dc4+vVDUWMfxxL4QghRKzq610vQ7T5tqmUZ3w/7yMtagP/nnyIHVJXkgYNIGTYcNTF+m6NL4AshRE0FA5GplqWPZk77ck4OriVZeHfvih6zXdUV5+ixmNKbx2uUURL4QghRXbqO7nUTdHvKnWqpFRdTsGYlhR9shNJn+ZYL2pKWOR7rhRfFe7RREvhCCFEdZ5hqqWshijZ/SP7KFYS9HgDUVCfOUWNI6t4DxXB2u9lI4AshRBVEp1p6POinfCir6zrFX36Ba9HCaHdLxWIh5YYbSb5uMAaz+WwM+TQS+EIIcQaKEnlEEzx+vNyplv5DB8nLWoDvu2+jL7D36UvqbzIwOpLjPNozk8AXQogKlE21DKj6aWEfKsjHtXwpnm1bo9MwEy65lLTM8ZjPa3U2hlspCXwhhDiNDiXFBAojXS0T7NboV8J+P4Xr36PgvXfRS/vXm1qeS9rYcSRc1immC6dqSwJfCCFOpIXQCgsJ+UrghEf1ejiM55PtuJYtQSvIB8Bgt+McMRJ7n2tRVPUsDbjqJPCFEALO2NXS/fXXHPm/twkcPACAYjSSPOh6UoYOw5CQcDZGWyMS+EIIUcECqmB2NnlLsij+/LPoMVv3HjhHjsHUrFm8R1lrEvhCiCZL0cNontKplid0tdS8HvJXraRo0wcQjrRLsLRrH1k41a792RpurUngCyGaHEUB3eeLdLUMBqPH9VCIok0fkL96BeHiYgCMzZpx3k2/xXBpbDtZxoMEvhCiSSmvq6Wu6xR//hl5S7IIHT8eOc+aQOqw4TgGXkeK006Ru+KtCRsKCXwhRJOhez2ndbX0H/g5snBq3/eRAwYDjr79SR3+G1S7/SyNNDYk8IUQjV85H8qG8l24li3Bs31b9Fhi58txjsnE3PLcszHKmJPAF0I0WuV9KBv2+ShYt5bCde+hByMLp8ytWuEcO57ESy49m8ONOQl8IUSjU96Hsno4jHvrFvKXL0UrKgRAdThIzRiFvVefs97JsoxqNGIwmUArfy/c2ohZ4O/Zs4dZs2Yxd+7c6LGVK1fy9ttvs2DBgliVFUI0cu6dO3AtyeLHnOOo6c1xjs7E3q179OvlfShb8s3X5C1aQODw4cg5JhPJg4eQMmQoBms9WDilKKhmE8akJLAkRMbkK67zMjEJ/Dlz5rBixQoSTliB9s0337Bo0aLT2ooKIURVuXfuIPvVl1GMJox2OyGXi+xXX4a7/oS9azf04pO3GgwcO4prcRbFX34RfY+kq6/BmTEao9N5ti4jSjEoqBYLalISmC1AbKd9xuRnmDZt2jB79uzor/Pz85k1axaPPvpoLMoJIZoI15IsFKMJg9WKoigYrFbUJDueTe8TysshUFBAWNPQ3G5y35nH4b/OjIa99cKLOO+RGTS/7Y9nPewVgwGTzYa5WTrGtGZgthLrsIcY3eEPGTKEw6U/OmmaxrRp03j00UexWCxVfg9VVUhJqfnmvqpqqNXr62utxl6vMV9bvOs1xmv7Mec4Rrs9EvZGFWNKCno4TMm+fdjMBsKKSu7a9/hl2TLCJSUAmFu04Nzf/pbk7t1qvHBKVQ04TuiYWWOqitGWiGqzYTBVvClKrH4vY/6h7d69ezlw4ACPP/44fr+fH374gaeeeopp06ad8XWaplNQUPNnWCkpibV6fX2t1djrNeZri3e9xnhtanpzQvn5mNLSMCZY8eUXEMrPx5CczNFNH+FasohQbi4AhsREUoYNJ3nAdShGI25P+RuNV4XDbq3VwivVaERNsmFItOHHAN4QcPpmKmVq83uZnl7x2oGYB/7ll1/O6tWrATh8+DAPPvhgpWEvhBDlaTbut+Qt/A96SQklhQVoxSWEfSWgKBx/7dXISQYVR/8BpN44IvJs/Gwp+yDWZgNrIigKZ/sTTJmWKYRoGEJBrG1aY+8/iIL31hAqLCQcDBF2F6EVRqZZJl5xZWThVItzztow4/1BbHXELPBbtWrFwoULKz0mhBBn8uviKS96OIy1XTusbdtR+ON+9NI59ubWbUjLHE9Ch45nb5wGA8bERAyJiShmM/VxQqLc4Qsh6ikdfCUEC4vQQiF0TcO95SNcK5YRdrsBUFNScI4cTVLPa87awilVLX0+n5CArkYitT6GPUjgCyHqo1AQraiQkM8HOhR/9SV5ixcSPHoUAMVspsWI4Vj7DcJQjdl/deak5/MJoBjO+vP5qpDAF0LUG4oeJuz1EHR70MNhAkcOk7coi5Kvvyo9QSHpml44M0bhbN0y7i2LFYOh9Pm8rd49n68KCXwhxFmnKKCXlER634RChIoKyV+5HPdHm6PPR6wdOpI2dhyWNufHf4Cqislhx5BoA7XhxmbDHbkQosFTFND9foJuN5rfT9jvp3DDegrWrkH3Re7eTS1a4BwzjsTL47/jVNn8eWuLNPyeQFxrx4IEvhDi7ChtchYqLiGsaXh37sC1dBEhlwsAg81G6vAMHP36ocTzrloB1VTayKz0+bxiNAIS+EIIUS1KWEPzeqLTLH0/7CMvawH+n3+KnKCqJA+8jpQbhqPabHEcmIJqMWNMsoOlDtoo1EMS+EKI+NDDpd0sPYQ1jWBODq4lWXh374qeYruqK87RYzGlN4/bsCILpayoSUkoFku9nVJZFyTwhRAxFplPHypyowWDaMXFFKxZReEHGyAU6SdjuaAtaZnjsV54UdxGFV0oZbOhmEzoev2dP19XJPCFEDER+UDWFwn6gB89FKJo82byVy4n7PUAoKY6SRs9Blu3HnFbOGVQVYxJNgwJifV+oVRdk8AXQtQ9LYRWVESwpAQ9HKb4yy9wLVpIMPsXABSLhZShw0gedD0Gc8VtguvSiR0r9QayUKquSeALIerMqR/I+g8dwrVoASXfflN6goK997Wk/mYkxuTkOAyo/nWsPJsk8IUQtaeHCRUVEjieS1jTCBUWkL98Ke6tW6LPSxIuuZS0zPGYz2sV+/GcNOOm4a2IjRUJfCFELeh4d+/CtWIZ/kMHMTiSMTrTKP58N7o/suGIqWVL0saOJ+GyTjFfONWUZtzUhAS+EKLaylbIFn26neNv/hs9rKOENfz7f8C/73sADHY7zhEZ2Pv0RVHV2I7HYMCYkBCZcVPamljC/nQS+EKI6gkGCLndhHw+8pYuIewPEPZ60AO/rkRVnU5aP/YXDAmx3eNWUQ0YbbaTetxI0FdMAl8IUTVaiLDHTai4GD2sE8zOxv/TjycFvSExEUNyCgT8MQ17VVUxJjuwJKnohtj+9NCYSOALIc7o1Jk3mtdD/upVFH2wEcJa5ByzGVNaGpjM6H4/arP0mIxFNZlQbTaUxERMyUnocdykvTGQwBdClO+UVgh6KETRhx+Qv2oF4eJI0Brs9sh/ExIxWCxoJT50LUTy4KF1Nw5FQTWbI83MLNbIBwiiRiTwhRAn03X0kmI0tzuytaCuU7znM/IWZxE6fhwAxZpA6rAbcQwcRMm331K4fi2aKw/VmUby4KHYOneu9TBkxk3dk8AXQpTSwe8n5C5C80eey/sP/EzeooX4vv8ucorBgOPavqSOGIlaendv69wZW+fOOOzWOtmBqrzNwCXs64YEvhBNXNkUy2jQ6zqhfBeuZUvxbN8aPS+hU2fSxozDfO65MRlHU+5xEy8S+EI0ZSdsQqLrOmGfj4J1aylc9x56MHKXbz6vFc6xmSRe2ikmQ4j0uEnCkJjYZHvcxIsEvhBNkKKF0LxeQt7IzBs9HMa9bQv5y5aiFRUCoDocpGaMwt6rT913sizrcZOUBJaEJt/jJl4k8IVoSk6ZeQNQ8s3X5C1aQODwYQAUk4nkwUNIGTIUgzWhTstHPoi1oCYlgVl63MSbBL4QTcEpM28AAr8cw7Uoi+Iv90RPS+pxNc5RYzA6nXVavrzWByL+JPCFaNR08PkiH8gGggBoHjf5K1dQtHkThMMAWC+8CGfmeKwXtK3T6mUfxCoJidL6oB6QwBeiEVIUCJeUEMrLRfP7QQc9GKTwgw0UrFlFuKQEAGN6c9JGjyWxy1V12sny1M1GRP0ggS9EY1Pa3MxvAs3nR9d1vLt34lqyiFBuLhDpeZMybDjJ/QeimEx1Vjr6Qaw1AWTGTb0jgS9EY6GFCHs8hIq96GEdjFZ8P/1IXtYC/Pt/iJxjUHH070/qjb+JfHBaF0o3GzE3a4Y/UPnp4uyRwBeigTu1uRlAyJXHgbeWkb/114VTiVdciXNMJuYW59RN3VNaH6iJiRCQZmb1mQS+EA1VOVMswyUlFKxdQ+H769BLZ+OYW7cmLXMCCR061knZaOsDmw3FZJLWBw2IBL4QDY4OvhJChUXRKZa6puHe8hH5K5ajuYsAMKakkJoxiqSre9XJwinFYIi0PpDNRhosCXwhGohIzxsfoaJfm5sBFO/9irxFCwgePRo5z2wm+fqhtB6dgTdYB3VVA6akpJN63IiGKWZ/env27GHWrFnMnTuXb775hieeeAJVVTGbzTz77LM0a9YsVqWFaHxO2Faw7LY6cPQIeYsWUrL3q8g5ikLS1b1wZozCmJqKarVCsObdK6PNzBJt6AZVZtw0AjEJ/Dlz5rBixQoSEiLLsp966ilmzJjBJZdcwvz585kzZw6PPPJILEoL0bhoIcLuIkIlJZGZN4BWVIRr5TLcH22Ohr+1Q0fSxo7D0ub8WpdUVRXVnhSdQy9B33jEJPDbtGnD7NmzefjhhwF48cUXad68OQCapmGxWGJRVojGIzrFsjg68yYcCFC4YT0Fa9eg+yJ37qYWLXCOGUfi5VfUeuHUqYulJOgbnzMGvq7rbNq0iU8//ZSCggKcTifXXHMNvXv3PuNfriFDhnC4tBETEA373bt38/bbbzNv3rxKB6aqCikpNd8EWVUNtXp9fa3V2Os15murSr1wKITmdhMq8YCig82MrusUbNvG0fnzCebmRd4nKYlzxoym2XXXoRjL/zZWVQMOu7XSMSlGI8akpMhesWrNNwSvb7+XDbVWLOtVGPjbtm3jlVde4bLLLqNDhw6kp6dTWFjIxx9/zJw5c7jzzjvp1atXlQutWbOGV155hddeew1nFRozaZpOQS02KE5JSazV6+trrcZerzFf2xnrlU6xDJ0wxRLA98O+yMKpn3+KHFBVkgdcR8qw4ag2G+6SEBAqt9YZd6Aqa09ss4E1EcIKuP2xubYYkb+X5UtPt1f4tQoD/8CBA7zxxhuop/yLP2zYMDRNY8GCBVUO/OXLl7NgwQLmzp1LSkpK1UYtRFNQThdLgGBODq6li/Du2hk9ltjlKtLGZGJKb17jctKeuGmrMPAnTJgAQCAQwO12k5aWFv2aqqrcdNNNVSqgaRpPPfUULVu25N577wWge/fu/PnPf67NuIVo0BQF9LIulidMsQyXFJO/ZjWFG9+H0n8ALOdfgDNzPAkXXVzzeuXsEyuangoDv7CwkBkzZrB3714cDge5ublcc801PPbYYyRVoQdHq1atWLhwIQCffvpp3Y1YiIYu4Cfodpd2sYwkr66FKNq8mfxVywl7PACoqak4R44hqUfPGi+ckvbE4kQVBv5TTz3F4MGD+cc//hE9lpWVxV//+leee+65uAxOiEZFCxHIzSWQm49eFvS6TslXX5C3KIvgL8cAUCwWUoYOI3nQYAzmms1oU00mTCkpmO0GaU8soioM/EOHDjFixIiTjmVmZrJy5cqYD0qIRuWEKZaW0lk3AP5Dh3AtWkDJt99EzlMU7L2vJfU3IzEmJ1e/jqKgms2l+8RaMTps6HH8oFHUfxUGvqmCHtl1uUmCEI2ZooXQir0ndbEECBUWkL98Ge6tH0efryRccinOseOwtGpd/TrRrpU2FItVHtmIClUY+D6fj59//jl6N1KmpHSnHCFE+RQ9TNjrJeg5eYplOODnlyXvkr1yJbo/MgXS1LIlaWPHk3BZp2rfTCmqAWOiLfJBrHStFFVQYeBbLBZmzJhR7nEhRDn0MHpxMSG3G+2EoNfDYTyfbMe1fAlafj4ABrsd54gM7H36Vnuxk6oaUe22SDMzQ+S1EvSiKioM/Llz58ZzHEI0bCXeSNAHT14EVfL9d+RlLSBw8AAQWdXquG4wqTcMw5BQvZWUsn2gqK0KP74/dOgQ//Vf/0UoFGLHjh307t2bwYMH8/nnn8dxeELUZ6V96XOP43flnxT2wexsfnnlnxx74blo2Nu6dafjrOdJGz22WmGvWsxY0pwY01tAgg1k1o2ooQrv8J9++mnGjh2L0WjkmWee4bnnnuPCCy/koYcekrt/0aRF+tL7I33pA35OvNXWvF4KVq+kcNNGKH2sY2nbjrTM8VjbX4jFbsVfUbuDU6hGI8ZkR6T1gRB1oMLADwQCXHfddeTn5/PLL7/Qu3dvAMInzDYQoskppy89gB4KUfThB+SvWkG4ODIV0piWhnPUWGzdulfrA1nFoESamSXZZQ69qFOVtkfetm0bV199NRAJe7fbHfNBCVHvaCHCbjehkuJoX3qILJwq3vMZrsWLCB7PBkCxWkm94UYc1w3GUMH05vIoBgWjNQFDUhKYzPKMXtS5CgP/oosu4sEHH2Tv3r088cQTHD9+nBdffDEa/kI0CVqIsNdLyHvyXHoA/8ED5GUtwPf9d5EDioKjbz9Sh2egOhxVLqEYlMj0SpsNjFX/B0KI6qow8KdMmcLmzZu56667uPjii/nuu+/o2LEjt9xySzzHJ8RZUdGiKYBQfj6u5UvwbN/268KpTp1JG5OJ+dzzqlFEwWi1otrtYDLX5fCFKFeFgX/s2DEuuuii6P+np6czceLEeI1LiLNCCWuEi70EPB507eSgD/t8FKxbS+G699CDkQ6XpnPPIy1zHImXdqpGkdIWCA67rIwVcVVh4D/wwAMoihJdaVtcXEwgEOC5557jiiuuiNsAhYiHilbHQmThlHvbFvKXL0UrLARAdThI/c1I7L2vrVInS++XX1L0wftkh4KYWrbEft0QkrpcJWEv4qrCwF+wYMFpxw4ePMgjjzxSpS0KhWgI9FAI3esm6D496AFKvv2GvEULCBw6BIBiMpE86HpSht6AwZpQpRrFe/dStG5NZAtBk5HiH/bj/fYlWtz1J+zdutfp9QhxJtXaxLxNmzbSPE00DqVbCvq9IQIF3tO+HPjlGK7FWRR/sSd6LKnH1ThHjcboTDvt/HIpoJrN+L74DB2FkNuDqhowWK2EfeBakiWBL+KqWoGvaZpMyxQNm66jF3vRPB60UAjrKZt8ax43+StXULR5E5R+WGu98CKcmeOxXtC2ajVKg95od4DFgvfbbzDYkk66WVIsFoLZv9TVVQlRJVV+pBMIBNi4cSODBw+O+aCEqHMV7B0b/XIwSOEHGyhYs4pwaUdYY7N0nGPGYuvStco/2f66OjaBsv1iTc1bEHK5UKy//uOi+/2YWpxT++sSohoqDPycnJyTfm2xWLj99turvHG5EPVGiZeQ24MWDJ72JV3X8ezaiWvJIkK5kb/zhoQEUm4cQXL/gShVXDilGAyY7EkYbEmnrY51js4k+9WXCfvAkJhA2OdDDwVxjs6s/bUJUQ0VBn5mZiYtWrSo8IXZ2dln/LoQZ5cOZZuEB04PegDfTz+SvSQL7/ffRw4YVBz9+pM6fARqkr3KlVSrBWNyChhN5a6OtXfrDnf9CdeSLLSc4xjTm+McnSnP70XcVRj4//u//4vRaGTEiBFcdNFFmEwmdF1n7969LF++nHA4XG6/fCHOLh38/tKgD1BeAodcebiWLsbz6SfRY4lXXIlzdCbmc6r+mMWgqhgdDpTERMoe31TE3q079m7dSUlJpEC2HRRnSYWBP23aNPbs2cPrr7/Ojh07CIfDWK1WrrrqKm666SauvPLKOA5TiDOLdLD0ESpyn9bBsky4pISCtWso3LAevfTxjvX8NqSOHkdCx0uqUUvBaEuMNDdTqzXvQYiz6ox/W6+44gpeeOGFeI1FiGora1UcdBeh+QPlbv2kaxruLR+Tv2IZmrsIADU5GWfGaM69fiBub6DK9VSLGaMjGcwWaW4mGhy5PRENVwWtik9U/PVX5GUtJHj0CACKyUzy9UNIuX4oBqu1SqtkofRDWYcdJTEp8q+MEA2QBL5oeCpoVXyiwNEj5C3OouSrLyMHFIWkq6/BmTEaY2pqtcoZE6yoySkgj29EA1fp3+APPviAAQMGRH+9Zs0ahg0bFtNBCVEeRQuhVdCquIxWVIRr5XLcH30Yveu3XtyBtMzxWNqcX6161flQVoiGoMLA/+CDD9i9ezerV6/ms88+AyIrbTdu3CiBL+KqrLFZwOM+rYNlmXAwSNGG9eS/uxrdF9lC0NS8Bc4xmSRecWX1WoIoCqaEBFSHQz6UFY1KhX+bO3bsSEFBARaLhbZtI0vKFUVh+PDhcRucaOJK+91U1NgMIgunvDs+xbVsMaG8PAAMiYmkjsjA0bc/irF6ga0ajRhTUlCs0rZYND4Vfje0bNmSUaNGkZGRAUS2N/z8889p37593AYnmqhT+t1UxLf/B/KyFuD/6cfIAVUluf9AUm4cjmpLqlbJU/eRlbAXjVGltz8vvPACrVu35ujRo+zdu5dmzZrx7LPPxmNsoqnRdUIeD6GcnHLbIJQJ5ubgWrIY764d0WOJXa4ibfRYTM2rv/pbtVgwJifLPrKi0as08Hft2sXkyZO55ZZbmDt3Lr///e/jMS7R1PiKCbndBC1qhWEfLikmf81qCje+D6V3/uY255OWOZ6EiztUu6RiMGBKScYYln1kRdNQaeCHw2G++OILWrVqRSAQwOVyxWNcoinQ9UjQezy/9ruxqKefpmm4P96Ma8Uywh4PAGpqKs6RY0jq0bPKc+lPVDbV0uhwgLQ6EE1EpYGfkZHBE088wdNPP83zzz/PrbfeGo9xicasLOgr6GD562k6JV99Sd7ihQSPHQMifeRThtxA8uDrMZgt1aurgGo0oSYlyVRL0SRVGvi/+93vGDZsGIcOHeLuu+/G6XTGY1yiMapi0AP4Dx/CtWghJd98HTmgKNh79SE1Y2SkM2V1KEqkJUKSHSzWys8XopGqNPDXrFnDSy+9RPv27dm3bx9/+tOfojN3zmTPnj3MmjWLuXPncuDAAaZOnYqiKFx00UXMnDkTQw1+DBcNVeWtir1ffknh+rUczM1BD4XQioqiC6cSOl6Cc+x4LK1bV7uyajZhdCTLNEshqELgv/XWWyxZsgSbzYbH4+H3v/99pYE/Z84cVqxYQUJCZJPnv/3tb9x///307NmTxx57jA0bNsjOWU1AVTpYQiTsc9+Zi+7zEy72RoNeTU0l/Xe3kNDp8mrvpWxQVYz2pGjvGwl7IaDS22xFUbDZbAAkJSVhsVT+3LRNmzbMnj07+uu9e/fSo0cPAPr27cvWrVtrOl7RACgKEPARzM3Bn5uH5q847PVwGNeiBWj5+YS9nkjYGwwY7A6M6c1J7HxFtcLeoKqYU5Ixt2iBYrNLozMhTlDpHX6bNm145pln6NatGzt37qRNmzaVvumQIUM4fPhw9Ne6rke/aW02W5U2QldVhZSUxErPq/j1hlq9vr7Wqu/1wj4fIbcbze/DYlLAVPENguebbzgy7z8EfzkWPWZMTsaYkgKKgpbvwmGv4jN3RcFoT8KYZK/W6tr6/HvZkGo19nqN5doq/c54+umnWbBgAVu3bqV9+/ZMmjSp2kVOfF7v9XpxOByVvkbT9FrtDBTPnYXivYtRvaxXhQ6WZYLHs8lbsojiz3ZHjykWK8Y0J0aLBU0Lo/t8qM40ity+M9dVFIxWK6rDgR8TeAJA1fvb18vfywZYq7HXa0jXlp5e8faclT7S2bt3L5qm8dhjj7F792727dtX7QFceumlfPJJZDu5zZs3061bt2q/h6intBDhwgICx48T9HrPGPaa10vewvkcenxGNOwtbduROmYcqj0JtDC6rqP7/ehaiOTBQ89YWjUasTidGNPSwCiLp4SoTKV3+E888QTPPPMMAPfffz9Tp05l3rx51SoyZcoUZsyYwYsvvki7du0YMmRIzUYr6g8tRNjjIVRcXGGr4jK6FqLow03kr1pB2OsFwOh04hw9Flu3HiiKgrnluRSuX4vmykN1ppE8eCi2zp3LfT/FYMBkT0KxJYH0vRGiyioNfKPRyIUXXghA69atqzydslWrVixcuBCAtm3b8vbbb9dimKLe0EKEK+lJX0bXdYr3fI5rSRbB7GwAFKuV1BtuxDFwEAazOXqurXNnbJ0747Bbz/gY58S+N0KI6qk08M8991xefPFFrrzySr744guaN28ej3GJekbRQmjFXkKeyoMewH/wAHlZC/B9/13pGyjYr+2Lc8RI1Cp8hnOqyDRLe+SuXghRI5UG/t/+9jfeeecdPvzwQ9q3b88999wTj3GJekIJawQL8vHn5FW4+ciJQvn5uJYvxbN9668Lpy7rRNrYcZjPPa/69Q0KxkQbalKSbEYiRC1V+h1ksViYOHFiHIYi6hMlrBEuLibo8WBONFUa9mG/n4J1aylctxY9EJklYzr3PNLGjiPxsk41GACoFmukuZm0LRaiTsgtkzhJ2XaCQc+Ju0xVPANGD4fxbNuKa/kStMJCAFS7g9SMkdh79UFRT+9+WRlVVTEmOyBBGpwJUZck8AVw8h19RdsJnqrk22/IW7SAwKFDkfcwmUgedD0pQ2/AYE2owSAUVJsNU5IZ3VD9fyiEEGcmgd/ElX9Hf2aBX37BtSSL4j2fR48l9eiJc9QYjM60Go1DNZkwpiRjTnNSLP3phYgJCfwmStHDhCvZIPxUmsdD/qoVFH24CcKR11jaX0ha5nisbdvVbByle8kakuygSAdVIWJJAr+JqUnQh4NBCta/R8GaVYSLI3ffxmbNcI7OxHZV12p3siwjc+qFiC8J/CaiLOhDbg9aFYNe13W8u3dxeNliAsePA2BISCDlxhEk9x+IYqpZOwPFYMDksEe6WQoh4kYCv7HTw+jVDHoA308/kpe1AP/+HyIHDAYcffuTOuI3qEk1D+qyvWSROfVCxJ181zVWpUGvub1oWqjKLwu58nAtXYLn0+3RY44uXXCMHIP5nJY1Ho5BVTE6HLKXrBBnkQR+Y6Pr6CXFaG43WqjqQR/2lVCw9l0K31+HXrrfrLlVa9LGjqNFj6sqb1NcEUXBlJCA6nDISlkhzjL5DmxMfMWR7QQr2SD8RHo4jHvLR+QvX4bmLgJATU4mNWMU9mt6o9Ri72HVaMSYnIySkCAdLYWoByTwGwNfcWSXqQo2CK9I8ddf4VqUReBIZHcyxWQm+fohpFw/FIPVGt1Y/FAVWhafRFEw2WyRu3ppXyxEvSGB32Dp4PMR8rjR/FXf4QkgcPQoeYsXUvLVl5EDikLS1dfgzBiFMdUJRDYWz5v/NopqRLXZ0AoKyJv/NnDzGUNfNZswJqeA2SL9b4SoZyTwGxwdzesllJuDFghUuDl4eTS3m/yVyyj6aDOUtji2XtyBtMzxWNqcf9K5hevXoqhGFIsFRVFQLBbwR46XF/iKQcFU1r5YFlAJUS9J4DcYOvj9hNxFBMyGat3Vh4NBijasJ//dNei+EgBMzVvgHDOWxCu6lLtwKpSbg5JoO/mg2UwoN+e0c2UBlRANgwR+PacooPt9kQ9jA4FIj3mztUqv1XUd784duJYuJpSXC4AhMZHU4b/B0W8AirHiP35js3S0ggKwWH49GAhgbJb+69jKFlAlJkUGKoSo1yTw66myoA+6S5/RV/OTT9/+HyILp376MXLAoOIYMJDUG4ejVmHXqOTBQyPP7P2gJ1hP3lhcAaM1ATU5WRZQCdGAyHdrPRMJej9Bd1GNgj6Ym4tr6SK8O3dEjyVeeRVpo8diatGiyu8TeU5/82kbi9uvvBKT9KoXokGSwK8nahv04ZJi8t9dQ9GG9eilC67Mbc4nLXM8CRd3qNGYTtpY3OPHlJiAapcFVEI0VPKde5bVNuh1TcP98WZcK5YR9ngAUFNScY4cTVLPq2u1cCo6RqMRi9MmC6iEaOAk8M+S2j6j13Wdkq++JG/xQoLHjkXe02IhZcgNJA++HoPZUsk7VGGMBgWjLQlL83R8noCEvRANnAR+nNU26AFKDh7kl7fmUvLN19E3tffqQ2rGyMiipzqgWswYHclgtpTO5qne4i4hRP0jgR8ntX10AxAqLCR/xTLcWz6Kvj6h4yU4x47H0rp13YxTploK0WhJ4MdYXdzRhwMBCt9fR8HaNeh+PwCmc1qSNjaThE6X13jHqVNJr3ohGjf5zo6Rsjv6kMdNyOevUdDr4TCeTz/BtWwJWr4LAIMtiZZjx2Du2QuljoJZetUL0TRI4Nexugh6gJJ93+NatBD/zz9FDhiNJA8cRMoNN5Lawlnz/vSnDFamWgrRdMh3eR1RFNADgUibYp8PvYZBH8w5jmvJIry7d0WP2bp2xzl6DKYT2hrUlvSqF6LpkcCvJUWBsM+H5nIRKimpcdBrXi8Fa1ZR+MEGKN171tK2HWmZ47G2v7AOx6tglF71QjRJEvg1VHZHr3k8+I06weKSGr2ProUo+vBD8lctJ+z1AmB0OnGOGoute486+0AWpFe9EE2dBH5NBHyEPF40vw89rJNgr1r3yhPpuk7xF3twLc4imP0LAIrVSsrQYSRfNxiDue5aDUuveiEESOBXnR5GLylB83rQgqEafxgL4D94gLxFC/F9923kgKJgv7YvqSMyIoud6pD0qhdClJHAr4weRi8uRvN40EqbktVUKD8f1/KleLZv/XXh1GWdSBuTifm8VnUx2ihZQCWEOFWjC3z3zh24lmTxY85x1PTmOEdnYu/Wvdrvo+hhwsVeNLcXTatd0If9fgrXraVg3Vr0QKRFgencc0kbM47ETlXYFLxUVTcVV61WjCkpsoBKCHGSuCVCMBhk6tSpHDlyBIPBwBNPPEH79u3rtIZ75w6yX30ZxWjCaLcTcrnIfvVluOtPVQ99LYReUkzQ4yVcOlumpvRwGM/2rbiWL43sHgWodgepv8nA3vtaFFWt8ntVZVNxWUAlhDiTuAX+hx9+SCgUYv78+WzZsoW///3vzJ49u05ruJZkoRhNGKxWFEXBYLUS9kWOnynwy2bchL3eyNTK0g2+a6Pku2/Jy1pA4NDBSA2jkeRB15MydBiGhIRqv98ZNxW//HJZQCWEqFTc0qFt27ZomkY4HMbj8WA8w36qAKqqkJKSWK0aP+Ycx2i3RwJRAVU1YEhMQMs5XuF7hUtKCHncaD4/GHSwVf/DTVU14CidqeM7doyj/3mHol2/LpxK7dWLluPHYU6v+cKpQ648VJvtpGvTE6wYQgGanX8uqs1W+ZvUkKoaqv1n0RBqNfZ6jfna4l2vsVxb3AI/MTGRI0eOcMMNN5Cfn8+rr756xvM1TaegoLhaNdT05oRcLgxWK6pqQNPChH0+jOnNT3qvaPuDWnSuPJHDbiX/WC75q1dQtGkThEsXTrW/MLJwqm07fICvFu0QVGcaWkEBisUSCXvViNGWiMFuxx1UoJq/V9WRkpJY7T+LhlCrsddrzNcW73oN6drS0+0Vfi1uk7LffPNN+vTpw3vvvcfy5cuZOnUq/tLOj3XFOToTPRQkXNraIOzzoYeCOEdnlp6hQ8BPMDcHf25u5K6+lmGvB4McX72GQzMeoWjjBghrGJs1o/kdd3Pu5KlY27ar/YUR2VRc10LowSBGhwOjPYlgQQGOQUPr5P2FEI1f3O7wHQ4HJpMJgOTkZEKhEFotPxQ9lb1bd7jrT7iWZKHlHMeY3py0seOxX3EF4WIPmtdb6zn0ZXRdx/vZLlyLFxHKzQHAkJBAyrDhJA+4DqX0WuuKrXNnFONEfLs/JZCdDdYE0m+7vUYzkIQQTVPcAn/ixIk8+uij3HTTTQSDQR544AESE+v+GZW9W3fs3bpHfiTKKyLs9eI/frxOPogt4/v5J1xZC/D9sC9ywGDA0bc/qSN+g5pU8Y9TtaFaLaT26weDBsf9x0shROMQt8C32Wy89NJL8SpHqKiQwPGcOg36kMuFa9liPJ9sjx5L7HwFbW69mYDDWWd1TmRQ1cgH0TYbMtVSCFEbjXIOn6KAVuKrs7AP+0ooWPsuhe+vQw8GATC3aoVz7HgSL7kUq91KoC76059IUTAlJGBwOGQBlRCiTkiSnIEeDuPe8hH5K5ahFRUBoDqSSR05Cvs1vVEMsfnMW3rVCyFiQQK/AsVf78W1aCGBI4cBUExmkq8fQsr1QzFYq98dsyqkV70QIpYk8E8ROHqUvMULKfnqy+ixpKt74Rw5CmNqbJ7Tg/SqF0LEngR+Kc3tJn/lMoo+2gylz/6tF11M2tjxWC64IGZ1FYMBkz1JetULIWKuyQd+OBikaOP75K9Zje6L7FplbN6ctDGZJF7RpU53nDqVarVE+t9Lr3ohRBw02cDXdR3vzh24li4mlJcLgCExkdThv8HRbwBKJb1+akOmWgohzoYmGfi+/T+Qt2gh/h/3Rw4YVBz9B5A6fASqLSl2hWWqpRDiLGpSqRPMzcW1dBHenTuixxKv7IJz9FjMLc6JaW2ZaimEONuaROCHS4rJf3cNRRvWo5duU2hucz5pY8eR0KFjTGvLVEshRH3RqANf1zTcH3+Ea+Uywm43AGpKKs6Ro0nqeXXMFk6VkamWQoj6pNEGfuHu3Rz+95sEjx0FQDGbSRlyA8mDh2CwWGJaW6ZaCiHqo0YX+FpxMUeefQrvrp2RA4pC0jW9cGaMwpiSGvP6qtUSuas31m17ZCGEqK1GF/ie7VujYW/t0JG0zPFYWreJeV2ZaimEqO8aXeDbr+lN+i3HcbQ+j3CbdjFdOAWAoqAmJGKymWWqpRCiXmt0CWVISCB9wk1YfW4KcgtjWqtsqqU5PY1i2ZBECFHPNbrAj4dTp1oKIURDIIFfTarFHOl/I1MthRANjAR+Ff061dIe2VJLCCEaGAn8KlAtFozJ0tVSCNGwSeCfgWIwYHLYI3f1QgjRwEngV8CYYEVNTpGplkKIRkPS7BQGVcXocKAkJiILqIQQjYkEfpnSXvWqw4Eud/VCiEZIko2yBVQOlIREaV8shGi0mnbgKwomWyKq3YFuUCXshRCNWpMNfNVkwpiSDGarLKASQjQJTS7wFYOCMSkJNckubRGEEE1Kkwp8aYsghGjKmkTgS1sEIYRoAoEvbRGEECKi0Qa+YjBgTkmWtghCCFGqUQa+roMxNRXFEzjbQxFCiHqj0U5TMRgb5b9lQghRY3FNxX/9619s3LiRYDDIb3/7WzIzM+NZXgghmrS4Bf4nn3zCZ599xjvvvENJSQn//ve/41VaCCEEoOh6fBoKvPDCCyiKwr59+/B4PDz88MN07ty5wvPD4TCaVvOhqaoBTQvX+PX1tVZjr9eYry3e9RrztcW7XkO6NpNJrfBrcbvDz8/P5+jRo7z66qscPnyYu+++m7Vr16JUMC9e03QKCoprXC8lJbFWr6+vtRp7vcZ8bfGu15ivLd71GtK1padXPDMxboGfkpJCu3btMJvNtGvXDovFgsvlIi0tLV5DEEKIJi1us3S6du3KRx99hK7rZGdnU1JSQkpKSrzKCyFEkxe3O/wBAwawY8cOxo4di67rPPbYY6hqxc+ahBBC1K24Tst8+OGH41lOCCHECeI2S0cIIcTZ1WhX2gohhDiZBL4QQjQREvhCCNFESOALIUQTIYEvhBBNhAS+EEI0ERL4QgjRRDSoXUKCwSCPPvooR44cIRAIcPfdd3PhhRcydepUFEXhoosuYubMmRgMkX/HXC4XEyZMYOXKlVgsFgAOHDjAf/3Xf7Fq1aqY1nK73UyePBmPx0MwGGTq1Kl06dIlZvWKi4uZNGkShYWFJCQk8Pzzz+N0OmP6ewmwf/9+xo0bx9atW086Xpe1dF2nb9++XHDBBQBceeWVTJo0KWbXpmkaf/vb3/jqq68IBALce++9DBgwIGb1XnvtNT766CMAioqKyM3NZcuWLTGp5Xa7eeCBBygpKcFkMvH888+Tnp4es2srKCiIfh+kpKTw5JNPnrF/VnXqvfnmm6xevRqAfv368ac//Qmfz8fkyZPJy8vDZrPx7LPPVvh9UNtaZdavX8/atWt54YUXKryuuqhX3Uwpl96ALFq0SH/yySd1Xdd1l8ul9+vXT7/zzjv17du367qu6zNmzNDXrVun67qub968Wc/IyNC7dOmi+3w+Xdd1fenSpfqoUaP0Xr16xbzWSy+9pL/xxhu6ruv6/v379ZEjR8a03htvvKHPnj1b13VdX7x4sf7EE0/EtJ6u67rb7dZvv/12/eqrrz7peF3X+vnnn/U777zzjNdTl/UWL16sz5w5U9d1Xf/ll1+if46xqneiO+64Q9+8eXPMar355pv6s88+q+u6ri9YsED/29/+FtNre+aZZ/RXXnlF13Vd37Jli/7oo4/WSb2DBw/qo0aN0kOhkK5pmj5+/Hj9m2++0f/973/r//jHP3Rd1/VVq1ad8fugtrV0XdefeOIJfciQIfr9999/xuuqi3rVzZTyNKhHOkOHDuW+++6L/lpVVfbu3UuPHj0A6Nu3L1u3bgXAYDDwxhtvnNSgLTk5mbfffjsutSZOnMiECRMA0DStwrvfuqx39913A3D06FGaNWsW03q6rjNjxgwefPBBEhISYlpr7969ZGdnc8stt3D77bfz448/xrTexx9/zDnnnMMdd9zB9OnTGThwYEzrlVm3bh0Oh4Nrr702ZrUuvvhivF4vAB6PB2MlW4HWtt4PP/xA3759AbjqqqvYtWtXndQ755xz+N///V9UVcVgMBAKhbBYLOzatSv6+9e3b1+2bdsWs1pl1/T444+f8Zrqql51M6U8DSrwbTYbSUlJeDwe/vznP3P//fej63q0p77NZsPtdgPQu3dvUlNTT3r9gAEDSExMjEsth8OB1WolJyeHyZMn8+CDD8a0HkT+At166628/fbb9OvXL6b1Xn75Zfr160fHjh3PWKcuaqWnp3PHHXcwd+5c7rzzTiZPnhzTevn5+Rw4cIB//etf3H777TzyyCMxrVfmX//610mPCmJRKzU1lS1btjBs2DBef/11xo4dG9N6l1xyCRs3bgRg48aN+Hy+OqlnMplwOp3ous6zzz7LpZdeStu2bfF4PNjt9tPGFotaAMOGDatwT4+6rlfdTClPgwp8gGPHjnHrrbeSkZHBiBEjos8OAbxeLw6Ho97U+u6775g4cSIPPPBA9F/xWNYD+L//+z/mzZvHvffeG9N6K1asYPHixdxyyy3k5OTwhz/8IWa1OnXqxHXXXQdAt27dyM7ORq+kBVRt6qWkpNC/f38URaFHjx78/PPPZ6xV23oQuRN2OBycf/75Ma318ssv88c//pE1a9bw+uuvx/zvyR133MGRI0eYOHEix44d45xzzqmzen6/n4ceegiv18vMmTMBSEpKiv4EU5Xf99rUqona1qtuppyqQQV+bm4uf/jDH5g8eXL0zuTSSy/lk08+AWDz5s1069atXtT64YcfuO+++3jhhRcqvduui3r/+te/WLZsGQCJiYmVtp6ubb3169czd+5c5s6dS3p6+hn3KK5trZdffpm33noLgG+//ZZzzz33jHdVta3XtWtXPvzww2i9li1bVnhuXdQD2Lp1a/TRRyxrORyO6B1wWlpaNBxjVW/nzp1kZGTw5ptv0qpVK6666qo6qafrOvfccw8dOnTgr3/9a/Tv+1VXXRX9s9u8eTNdu3aNWa3qqm296mZKeRpUt8wnn3ySd999l3bt2kWPTZs2jSeffJJgMEi7du148sknT/oDGThwIO++++5Jz7t69+5d4SyIuqp19913891333HeeecBkTuPV155JWb1cnNzmTJlCoFAAE3TmDRp0hn/stfV7+WZjtdVrcLCQiZPnkxxcTGqqvLYY4/Rvn37mF1bIBBg5syZ7N+/H13Xefzxx7nsssti+nv5l7/8hd69ezNo0KAK69RFrezsbKZPn05xcTGhUIg///nP9O7dO2b1Dhw4wJQpUwBo3rw5Tz/9NElJSbWut3HjRh588EGuvPLK6HkPPvggHTt2ZMqUKeTk5GAymXjhhRcqnIVU21plM2Q++eQT5s+fz3//939XeF11Ue+1116rVqaUp0EFvhBCiJprUI90hBBC1JwEvhBCNBES+EII0URI4AshRBMhgS+EEE2EBL4QQjQREvhCCNFENKj2yELE2qRJkxgxYgT9+/dn//79PPvsszRr1owDBw4QDoe5//776dmzJ2vXrmXevHnR17300kvs27ePWbNmYTKZGDduHCNHjjx7FyJEOSTwhThBZmYm77zzDv3792fRokV06dIFj8fD008/TX5+PjfffDOrV6/m559/5rXXXiMhIYHHHnuMjz/+mBYtWuD3+8nKyjrblyFEuSTwhThBz549eeqpp8jLy2PLli106dKF3bt388UXXwAQCoXIz88nLS2NKVOmYLPZ+PHHH6PL4Mu6KApRH0ngC3ECRVEYMWIETz31FL1796Zly5a0bNmSu+66C5/PxyuvvILRaOQf//gHmzZtAuC2226Ldu88sfuhEPWNBL4Qpxg9ejT9+/dn+fLltG7dmunTp3PzzTfj8Xi46aabSEpK4qqrrmLUqFEkJibicDg4fvw4rVq1OttDF+KMpHmaEKfIzs7m4YcfjrZkFqKxkJ8/hTjBe++9xx//+MczbpIuREMld/hCCNFEyB2+EEI0ERL4QgjRREjgCyFEEyGBL4QQTYQEvhBCNBH/H20WFJO/rltTAAAAAElFTkSuQmCC",
11
+ "text/plain": [
12
+ "<Figure size 432x288 with 1 Axes>"
13
+ ]
14
+ },
15
+ "metadata": {},
16
+ "output_type": "display_data"
17
+ }
18
+ ],
19
+ "source": [
20
+ "import matplotlib.pyplot as plt\n",
21
+ "import seaborn as sns\n",
22
+ "# import matplotlib \n",
23
+ "\n",
24
+ "\n",
25
+ "dates = [2011, 2013, 2014, 2015, 2017, 2019, 2020, 2022]\n",
26
+ "values = [7.99, 7.99, 8.99, 9.99, 10.99, 12.99, 13.99, 15.49]\n",
27
+ "\n",
28
+ "# plt.scatter(dates,values)\n",
29
+ "# plt.scatter(dates, values)\n",
30
+ "# sns.set_style('darkgrid')\n",
31
+ "sns.set_palette(\"RdBu\")\n",
32
+ "# sns.color_palette(\"flare\", as_cmap=True)\n",
33
+ "sns.regplot(x=dates, y=values)\n",
34
+ "# plt.plot(dates, values)\n",
35
+ "plt.xlabel('year')\n",
36
+ "plt.ylabel('cost (USD)') \n",
37
+ "plt.xticks(range(2011, 2023))\n",
38
+ "plt.title('Netflix (standard) Time vs Price')\n",
39
+ "# plt.legend()\n",
40
+ "\n",
41
+ "plt.savefig('netflix.jpg')\n",
42
+ "plt.show()\n"
43
+ ]
44
+ }
45
+ ],
46
+ "metadata": {
47
+ "interpreter": {
48
+ "hash": "b7e818f66e33c31ac0526ee7f8556503ff93918b8b22809241939dc19e90de0b"
49
+ },
50
+ "kernelspec": {
51
+ "display_name": "Python 3.8.12 64-bit ('pytorch_m1': conda)",
52
+ "language": "python",
53
+ "name": "python3"
54
+ },
55
+ "language_info": {
56
+ "name": "python",
57
+ "version": "3.8.12"
58
+ },
59
+ "orig_nbformat": 4
60
+ },
61
+ "nbformat": 4,
62
+ "nbformat_minor": 2
63
+ }
code/penguin_classifier.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36bab9fe0b430613c34c25bca7a87e8ec059c61b40119d71da00153ef95ed3cb
3
+ size 2839
code/penguins_nn.ipynb ADDED
@@ -0,0 +1,866 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 22,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "text/html": [
11
+ "<div>\n",
12
+ "<style scoped>\n",
13
+ " .dataframe tbody tr th:only-of-type {\n",
14
+ " vertical-align: middle;\n",
15
+ " }\n",
16
+ "\n",
17
+ " .dataframe tbody tr th {\n",
18
+ " vertical-align: top;\n",
19
+ " }\n",
20
+ "\n",
21
+ " .dataframe thead th {\n",
22
+ " text-align: right;\n",
23
+ " }\n",
24
+ "</style>\n",
25
+ "<table border=\"1\" class=\"dataframe\">\n",
26
+ " <thead>\n",
27
+ " <tr style=\"text-align: right;\">\n",
28
+ " <th></th>\n",
29
+ " <th>CulmenLength</th>\n",
30
+ " <th>CulmenDepth</th>\n",
31
+ " <th>FlipperLength</th>\n",
32
+ " <th>BodyMass</th>\n",
33
+ " <th>Species</th>\n",
34
+ " </tr>\n",
35
+ " </thead>\n",
36
+ " <tbody>\n",
37
+ " <tr>\n",
38
+ " <th>187</th>\n",
39
+ " <td>48.4</td>\n",
40
+ " <td>16.3</td>\n",
41
+ " <td>22.0</td>\n",
42
+ " <td>54.00</td>\n",
43
+ " <td>1</td>\n",
44
+ " </tr>\n",
45
+ " <tr>\n",
46
+ " <th>303</th>\n",
47
+ " <td>49.5</td>\n",
48
+ " <td>19.0</td>\n",
49
+ " <td>20.0</td>\n",
50
+ " <td>38.00</td>\n",
51
+ " <td>2</td>\n",
52
+ " </tr>\n",
53
+ " <tr>\n",
54
+ " <th>196</th>\n",
55
+ " <td>50.5</td>\n",
56
+ " <td>15.9</td>\n",
57
+ " <td>22.2</td>\n",
58
+ " <td>55.50</td>\n",
59
+ " <td>1</td>\n",
60
+ " </tr>\n",
61
+ " <tr>\n",
62
+ " <th>16</th>\n",
63
+ " <td>38.7</td>\n",
64
+ " <td>19.0</td>\n",
65
+ " <td>19.5</td>\n",
66
+ " <td>34.50</td>\n",
67
+ " <td>0</td>\n",
68
+ " </tr>\n",
69
+ " <tr>\n",
70
+ " <th>85</th>\n",
71
+ " <td>41.3</td>\n",
72
+ " <td>20.3</td>\n",
73
+ " <td>19.4</td>\n",
74
+ " <td>35.50</td>\n",
75
+ " <td>0</td>\n",
76
+ " </tr>\n",
77
+ " <tr>\n",
78
+ " <th>234</th>\n",
79
+ " <td>47.4</td>\n",
80
+ " <td>14.6</td>\n",
81
+ " <td>21.2</td>\n",
82
+ " <td>47.25</td>\n",
83
+ " <td>1</td>\n",
84
+ " </tr>\n",
85
+ " <tr>\n",
86
+ " <th>145</th>\n",
87
+ " <td>39.0</td>\n",
88
+ " <td>18.7</td>\n",
89
+ " <td>18.5</td>\n",
90
+ " <td>36.50</td>\n",
91
+ " <td>0</td>\n",
92
+ " </tr>\n",
93
+ " <tr>\n",
94
+ " <th>21</th>\n",
95
+ " <td>37.7</td>\n",
96
+ " <td>18.7</td>\n",
97
+ " <td>18.0</td>\n",
98
+ " <td>36.00</td>\n",
99
+ " <td>0</td>\n",
100
+ " </tr>\n",
101
+ " <tr>\n",
102
+ " <th>30</th>\n",
103
+ " <td>39.5</td>\n",
104
+ " <td>16.7</td>\n",
105
+ " <td>17.8</td>\n",
106
+ " <td>32.50</td>\n",
107
+ " <td>0</td>\n",
108
+ " </tr>\n",
109
+ " <tr>\n",
110
+ " <th>330</th>\n",
111
+ " <td>42.5</td>\n",
112
+ " <td>17.3</td>\n",
113
+ " <td>18.7</td>\n",
114
+ " <td>33.50</td>\n",
115
+ " <td>2</td>\n",
116
+ " </tr>\n",
117
+ " </tbody>\n",
118
+ "</table>\n",
119
+ "</div>"
120
+ ],
121
+ "text/plain": [
122
+ " CulmenLength CulmenDepth FlipperLength BodyMass Species\n",
123
+ "187 48.4 16.3 22.0 54.00 1\n",
124
+ "303 49.5 19.0 20.0 38.00 2\n",
125
+ "196 50.5 15.9 22.2 55.50 1\n",
126
+ "16 38.7 19.0 19.5 34.50 0\n",
127
+ "85 41.3 20.3 19.4 35.50 0\n",
128
+ "234 47.4 14.6 21.2 47.25 1\n",
129
+ "145 39.0 18.7 18.5 36.50 0\n",
130
+ "21 37.7 18.7 18.0 36.00 0\n",
131
+ "30 39.5 16.7 17.8 32.50 0\n",
132
+ "330 42.5 17.3 18.7 33.50 2"
133
+ ]
134
+ },
135
+ "execution_count": 22,
136
+ "metadata": {},
137
+ "output_type": "execute_result"
138
+ }
139
+ ],
140
+ "source": [
141
+ "import pandas as pd\n",
142
+ "\n",
143
+ "# load the training dataset (excluding rows with null values)\n",
144
+ "penguins = pd.read_csv('/Users/johnnydevriese/projects/data/penguins.csv').dropna()\n",
145
+ "\n",
146
+ "# Deep Learning models work best when features are on similar scales\n",
147
+ "# In a real solution, we'd implement some custom normalization for each feature, but to keep things simple\n",
148
+ "# we'll just rescale the FlipperLength and BodyMass so they're on a similar scale to the bill measurements\n",
149
+ "penguins['FlipperLength'] = penguins['FlipperLength']/10\n",
150
+ "penguins['BodyMass'] = penguins['BodyMass']/100\n",
151
+ "\n",
152
+ "# The dataset is too small to be useful for deep learning\n",
153
+ "# So we'll oversample it to increase its size\n",
154
+ "for i in range(1,3):\n",
155
+ " penguins = penguins.append(penguins)\n",
156
+ "\n",
157
+ "# Display a random sample of 10 observations\n",
158
+ "sample = penguins.sample(10)\n",
159
+ "sample"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": 23,
165
+ "metadata": {},
166
+ "outputs": [
167
+ {
168
+ "name": "stdout",
169
+ "output_type": "stream",
170
+ "text": [
171
+ "['CulmenLength' 'CulmenDepth' 'FlipperLength' 'BodyMass' 'Species'] SpeciesName\n",
172
+ "[ 42.7 18.3 19.6 40.75 0 ] Adelie\n",
173
+ "[ 37.9 18.6 19.3 29.25 0 ] Adelie\n",
174
+ "[ 39.0 17.1 19.1 30.5 0 ] Adelie\n",
175
+ "[ 50.2 18.8 20.2 38.0 2 ] Chinstrap\n",
176
+ "[ 45.2 14.8 21.2 52.0 1 ] Gentoo\n",
177
+ "[ 45.7 13.9 21.4 44.0 1 ] Gentoo\n",
178
+ "[ 38.8 20.0 19.0 39.5 0 ] Adelie\n",
179
+ "[ 43.8 13.9 20.8 43.0 1 ] Gentoo\n",
180
+ "[ 46.0 18.9 19.5 41.5 2 ] Chinstrap\n",
181
+ "[ 49.4 15.8 21.6 49.25 1 ] Gentoo\n"
182
+ ]
183
+ }
184
+ ],
185
+ "source": [
186
+ "penguin_classes = ['Adelie', 'Gentoo', 'Chinstrap']\n",
187
+ "print(sample.columns[0:5].values, 'SpeciesName')\n",
188
+ "for index, row in penguins.sample(10).iterrows():\n",
189
+ " print('[',row[0], row[1], row[2],row[3], int(row[4]), ']',penguin_classes[int(row[-1])])"
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": 24,
195
+ "metadata": {},
196
+ "outputs": [
197
+ {
198
+ "name": "stdout",
199
+ "output_type": "stream",
200
+ "text": [
201
+ "Training Set: 957, Test Set: 411 \n",
202
+ "\n",
203
+ "Sample of features and labels:\n",
204
+ "[51.1 16.5 22.5 52.5] 1 (Gentoo)\n",
205
+ "[50.7 19.7 20.3 40.5] 2 (Chinstrap)\n",
206
+ "[49.5 16.2 22.9 58. ] 1 (Gentoo)\n",
207
+ "[39.3 20.6 19. 36.5] 0 (Adelie)\n",
208
+ "[42.5 20.7 19.7 45. ] 0 (Adelie)\n",
209
+ "[50. 15.3 22. 55.5] 1 (Gentoo)\n",
210
+ "[50.2 18.7 19.8 37.75] 2 (Chinstrap)\n",
211
+ "[50.7 19.7 20.3 40.5] 2 (Chinstrap)\n",
212
+ "[49.1 14.5 21.2 46.25] 1 (Gentoo)\n",
213
+ "[43.2 16.6 18.7 29. ] 2 (Chinstrap)\n",
214
+ "[38.8 17.6 19.1 32.75] 0 (Adelie)\n",
215
+ "[37.8 17.1 18.6 33. ] 0 (Adelie)\n",
216
+ "[45.8 14.2 21.9 47. ] 1 (Gentoo)\n",
217
+ "[43.8 13.9 20.8 43. ] 1 (Gentoo)\n",
218
+ "[36. 17.1 18.7 37. ] 0 (Adelie)\n",
219
+ "[43.3 13.4 20.9 44. ] 1 (Gentoo)\n",
220
+ "[36. 18.5 18.6 31. ] 0 (Adelie)\n",
221
+ "[41.1 19. 18.2 34.25] 0 (Adelie)\n",
222
+ "[33.1 16.1 17.8 29. ] 0 (Adelie)\n",
223
+ "[40.9 13.7 21.4 46.5] 1 (Gentoo)\n",
224
+ "[45.2 17.8 19.8 39.5] 2 (Chinstrap)\n",
225
+ "[48.4 14.6 21.3 58.5] 1 (Gentoo)\n",
226
+ "[43.6 13.9 21.7 49. ] 1 (Gentoo)\n",
227
+ "[38.5 17.9 19. 33.25] 0 (Adelie)\n"
228
+ ]
229
+ }
230
+ ],
231
+ "source": [
232
+ "from sklearn.model_selection import train_test_split\n",
233
+ "\n",
234
+ "features = ['CulmenLength','CulmenDepth','FlipperLength','BodyMass']\n",
235
+ "label = 'Species'\n",
236
+ " \n",
237
+ "# Split data 70%-30% into training set and test set\n",
238
+ "x_train, x_test, y_train, y_test = train_test_split(penguins[features].values,\n",
239
+ " penguins[label].values,\n",
240
+ " test_size=0.30,\n",
241
+ " random_state=0)\n",
242
+ "\n",
243
+ "print ('Training Set: %d, Test Set: %d \\n' % (len(x_train), len(x_test)))\n",
244
+ "print(\"Sample of features and labels:\")\n",
245
+ "\n",
246
+ "# Take a look at the first 25 training features and corresponding labels\n",
247
+ "for n in range(0,24):\n",
248
+ " print(x_train[n], y_train[n], '(' + penguin_classes[y_train[n]] + ')')\n",
249
+ "\n"
250
+ ]
251
+ },
252
+ {
253
+ "cell_type": "code",
254
+ "execution_count": 25,
255
+ "metadata": {},
256
+ "outputs": [
257
+ {
258
+ "name": "stdout",
259
+ "output_type": "stream",
260
+ "text": [
261
+ "Libraries imported - ready to use PyTorch 1.10.0\n"
262
+ ]
263
+ }
264
+ ],
265
+ "source": [
266
+ "import torch\n",
267
+ "import torch.nn as nn\n",
268
+ "import torch.utils.data as torch_data\n",
269
+ "\n",
270
+ "# Set random seed for reproducability\n",
271
+ "torch.manual_seed(0)\n",
272
+ "\n",
273
+ "print(\"Libraries imported - ready to use PyTorch\", torch.__version__)"
274
+ ]
275
+ },
276
+ {
277
+ "cell_type": "code",
278
+ "execution_count": 26,
279
+ "metadata": {},
280
+ "outputs": [
281
+ {
282
+ "name": "stdout",
283
+ "output_type": "stream",
284
+ "text": [
285
+ "Ready to load data\n"
286
+ ]
287
+ }
288
+ ],
289
+ "source": [
290
+ "# Create a dataset and loader for the training data and labels\n",
291
+ "train_x = torch.Tensor(x_train).float()\n",
292
+ "train_y = torch.Tensor(y_train).long()\n",
293
+ "train_ds = torch_data.TensorDataset(train_x,train_y)\n",
294
+ "train_loader = torch_data.DataLoader(train_ds, batch_size=20,\n",
295
+ " shuffle=False, num_workers=1)\n",
296
+ "\n",
297
+ "# Create a dataset and loader for the test data and labels\n",
298
+ "test_x = torch.Tensor(x_test).float()\n",
299
+ "test_y = torch.Tensor(y_test).long()\n",
300
+ "test_ds = torch_data.TensorDataset(test_x,test_y)\n",
301
+ "test_loader = torch_data.DataLoader(test_ds, batch_size=20,\n",
302
+ " shuffle=False, num_workers=1)\n",
303
+ "print('Ready to load data')\n",
304
+ "\n"
305
+ ]
306
+ },
307
+ {
308
+ "cell_type": "code",
309
+ "execution_count": 27,
310
+ "metadata": {},
311
+ "outputs": [
312
+ {
313
+ "name": "stdout",
314
+ "output_type": "stream",
315
+ "text": [
316
+ "PenguinNet(\n",
317
+ " (fully_connected1): Linear(in_features=4, out_features=10, bias=True)\n",
318
+ " (fully_connected2): Linear(in_features=10, out_features=10, bias=True)\n",
319
+ " (fully_connected3): Linear(in_features=10, out_features=3, bias=True)\n",
320
+ ")\n"
321
+ ]
322
+ }
323
+ ],
324
+ "source": [
325
+ "# Number of hidden layer nodes\n",
326
+ "hl = 10\n",
327
+ "hidden_layer_nodes = 10\n",
328
+ "initial_input_feature_dimension = len(features)\n",
329
+ "output_feature_dimension = len(penguin_classes)\n",
330
+ "\n",
331
+ "# Define the neural network\n",
332
+ "class PenguinNet(nn.Module):\n",
333
+ " def __init__(self):\n",
334
+ " super(PenguinNet, self).__init__()\n",
335
+ " self.fully_connected1 = nn.Linear(in_features=len(features), out_features=hl, bias=True) # bias=True is default\n",
336
+ " self.fully_connected2 = nn.Linear(hl, hl)\n",
337
+ " self.fully_connected3 = nn.Linear(hl, len(penguin_classes))\n",
338
+ "\n",
339
+ " def forward(self, x):\n",
340
+ " x = torch.relu(self.fully_connected1(x))\n",
341
+ " x = torch.relu(self.fully_connected2(x))\n",
342
+ " x = torch.relu(self.fully_connected3(x))\n",
343
+ " return x\n",
344
+ "\n",
345
+ "# Create a model instance from the network\n",
346
+ "model = PenguinNet()\n",
347
+ "print(model)"
348
+ ]
349
+ },
350
+ {
351
+ "cell_type": "code",
352
+ "execution_count": 28,
353
+ "metadata": {},
354
+ "outputs": [
355
+ {
356
+ "name": "stdout",
357
+ "output_type": "stream",
358
+ "text": [
359
+ "Epoch: 0\n",
360
+ "Training set: Average loss: 1.118814\n",
361
+ "Validation set: Average loss: 1.023595, Accuracy: 148/411 (36%)\n",
362
+ "\n",
363
+ "Epoch: 1\n",
364
+ "Training set: Average loss: 1.010274\n",
365
+ "Validation set: Average loss: 0.983460, Accuracy: 163/411 (40%)\n",
366
+ "\n",
367
+ "Epoch: 2\n",
368
+ "Training set: Average loss: 0.965314\n",
369
+ "Validation set: Average loss: 0.934165, Accuracy: 191/411 (46%)\n",
370
+ "\n",
371
+ "Epoch: 3\n",
372
+ "Training set: Average loss: 0.911513\n",
373
+ "Validation set: Average loss: 0.867269, Accuracy: 250/411 (61%)\n",
374
+ "\n",
375
+ "Epoch: 4\n",
376
+ "Training set: Average loss: 0.817720\n",
377
+ "Validation set: Average loss: 0.742112, Accuracy: 272/411 (66%)\n",
378
+ "\n",
379
+ "Epoch: 5\n",
380
+ "Training set: Average loss: 0.733329\n",
381
+ "Validation set: Average loss: 0.691639, Accuracy: 302/411 (73%)\n",
382
+ "\n",
383
+ "Epoch: 6\n",
384
+ "Training set: Average loss: 0.696301\n",
385
+ "Validation set: Average loss: 0.661350, Accuracy: 312/411 (76%)\n",
386
+ "\n",
387
+ "Epoch: 7\n",
388
+ "Training set: Average loss: 0.671731\n",
389
+ "Validation set: Average loss: 0.640087, Accuracy: 327/411 (80%)\n",
390
+ "\n",
391
+ "Epoch: 8\n",
392
+ "Training set: Average loss: 0.653092\n",
393
+ "Validation set: Average loss: 0.624311, Accuracy: 338/411 (82%)\n",
394
+ "\n",
395
+ "Epoch: 9\n",
396
+ "Training set: Average loss: 0.638097\n",
397
+ "Validation set: Average loss: 0.610605, Accuracy: 345/411 (84%)\n",
398
+ "\n",
399
+ "Epoch: 10\n",
400
+ "Training set: Average loss: 0.625696\n",
401
+ "Validation set: Average loss: 0.598022, Accuracy: 345/411 (84%)\n",
402
+ "\n",
403
+ "Epoch: 11\n",
404
+ "Training set: Average loss: 0.614685\n",
405
+ "Validation set: Average loss: 0.588183, Accuracy: 353/411 (86%)\n",
406
+ "\n",
407
+ "Epoch: 12\n",
408
+ "Training set: Average loss: 0.605506\n",
409
+ "Validation set: Average loss: 0.578678, Accuracy: 358/411 (87%)\n",
410
+ "\n",
411
+ "Epoch: 13\n",
412
+ "Training set: Average loss: 0.597361\n",
413
+ "Validation set: Average loss: 0.569911, Accuracy: 361/411 (88%)\n",
414
+ "\n",
415
+ "Epoch: 14\n",
416
+ "Training set: Average loss: 0.590228\n",
417
+ "Validation set: Average loss: 0.562248, Accuracy: 361/411 (88%)\n",
418
+ "\n",
419
+ "Epoch: 15\n",
420
+ "Training set: Average loss: 0.583250\n",
421
+ "Validation set: Average loss: 0.556146, Accuracy: 372/411 (91%)\n",
422
+ "\n",
423
+ "Epoch: 16\n",
424
+ "Training set: Average loss: 0.576846\n",
425
+ "Validation set: Average loss: 0.549725, Accuracy: 375/411 (91%)\n",
426
+ "\n",
427
+ "Epoch: 17\n",
428
+ "Training set: Average loss: 0.571098\n",
429
+ "Validation set: Average loss: 0.544390, Accuracy: 382/411 (93%)\n",
430
+ "\n",
431
+ "Epoch: 18\n",
432
+ "Training set: Average loss: 0.565975\n",
433
+ "Validation set: Average loss: 0.540335, Accuracy: 384/411 (93%)\n",
434
+ "\n",
435
+ "Epoch: 19\n",
436
+ "Training set: Average loss: 0.561476\n",
437
+ "Validation set: Average loss: 0.536972, Accuracy: 389/411 (95%)\n",
438
+ "\n",
439
+ "Epoch: 20\n",
440
+ "Training set: Average loss: 0.557517\n",
441
+ "Validation set: Average loss: 0.532509, Accuracy: 390/411 (95%)\n",
442
+ "\n",
443
+ "Epoch: 21\n",
444
+ "Training set: Average loss: 0.553931\n",
445
+ "Validation set: Average loss: 0.529417, Accuracy: 396/411 (96%)\n",
446
+ "\n",
447
+ "Epoch: 22\n",
448
+ "Training set: Average loss: 0.550773\n",
449
+ "Validation set: Average loss: 0.528216, Accuracy: 397/411 (97%)\n",
450
+ "\n",
451
+ "Epoch: 23\n",
452
+ "Training set: Average loss: 0.547976\n",
453
+ "Validation set: Average loss: 0.523656, Accuracy: 397/411 (97%)\n",
454
+ "\n",
455
+ "Epoch: 24\n",
456
+ "Training set: Average loss: 0.545466\n",
457
+ "Validation set: Average loss: 0.521025, Accuracy: 397/411 (97%)\n",
458
+ "\n",
459
+ "Epoch: 25\n",
460
+ "Training set: Average loss: 0.543647\n",
461
+ "Validation set: Average loss: 0.519855, Accuracy: 400/411 (97%)\n",
462
+ "\n",
463
+ "Epoch: 26\n",
464
+ "Training set: Average loss: 0.542047\n",
465
+ "Validation set: Average loss: 0.517385, Accuracy: 398/411 (97%)\n",
466
+ "\n",
467
+ "Epoch: 27\n",
468
+ "Training set: Average loss: 0.540234\n",
469
+ "Validation set: Average loss: 0.515388, Accuracy: 400/411 (97%)\n",
470
+ "\n",
471
+ "Epoch: 28\n",
472
+ "Training set: Average loss: 0.538977\n",
473
+ "Validation set: Average loss: 0.512899, Accuracy: 401/411 (98%)\n",
474
+ "\n",
475
+ "Epoch: 29\n",
476
+ "Training set: Average loss: 0.537303\n",
477
+ "Validation set: Average loss: 0.512066, Accuracy: 404/411 (98%)\n",
478
+ "\n",
479
+ "Epoch: 30\n",
480
+ "Training set: Average loss: 0.536062\n",
481
+ "Validation set: Average loss: 0.511284, Accuracy: 404/411 (98%)\n",
482
+ "\n",
483
+ "Epoch: 31\n",
484
+ "Training set: Average loss: 0.534580\n",
485
+ "Validation set: Average loss: 0.508444, Accuracy: 404/411 (98%)\n",
486
+ "\n",
487
+ "Epoch: 32\n",
488
+ "Training set: Average loss: 0.533200\n",
489
+ "Validation set: Average loss: 0.507806, Accuracy: 404/411 (98%)\n",
490
+ "\n",
491
+ "Epoch: 33\n",
492
+ "Training set: Average loss: 0.532376\n",
493
+ "Validation set: Average loss: 0.505557, Accuracy: 404/411 (98%)\n",
494
+ "\n",
495
+ "Epoch: 34\n",
496
+ "Training set: Average loss: 0.531220\n",
497
+ "Validation set: Average loss: 0.503028, Accuracy: 404/411 (98%)\n",
498
+ "\n",
499
+ "Epoch: 35\n",
500
+ "Training set: Average loss: 0.529759\n",
501
+ "Validation set: Average loss: 0.502396, Accuracy: 404/411 (98%)\n",
502
+ "\n",
503
+ "Epoch: 36\n",
504
+ "Training set: Average loss: 0.528576\n",
505
+ "Validation set: Average loss: 0.501712, Accuracy: 404/411 (98%)\n",
506
+ "\n",
507
+ "Epoch: 37\n",
508
+ "Training set: Average loss: 0.527694\n",
509
+ "Validation set: Average loss: 0.499238, Accuracy: 404/411 (98%)\n",
510
+ "\n",
511
+ "Epoch: 38\n",
512
+ "Training set: Average loss: 0.526515\n",
513
+ "Validation set: Average loss: 0.498586, Accuracy: 404/411 (98%)\n",
514
+ "\n",
515
+ "Epoch: 39\n",
516
+ "Training set: Average loss: 0.525752\n",
517
+ "Validation set: Average loss: 0.496938, Accuracy: 404/411 (98%)\n",
518
+ "\n",
519
+ "Epoch: 40\n",
520
+ "Training set: Average loss: 0.524745\n",
521
+ "Validation set: Average loss: 0.496314, Accuracy: 405/411 (99%)\n",
522
+ "\n",
523
+ "Epoch: 41\n",
524
+ "Training set: Average loss: 0.524034\n",
525
+ "Validation set: Average loss: 0.494481, Accuracy: 404/411 (98%)\n",
526
+ "\n",
527
+ "Epoch: 42\n",
528
+ "Training set: Average loss: 0.523150\n",
529
+ "Validation set: Average loss: 0.492949, Accuracy: 404/411 (98%)\n",
530
+ "\n",
531
+ "Epoch: 43\n",
532
+ "Training set: Average loss: 0.522167\n",
533
+ "Validation set: Average loss: 0.492328, Accuracy: 404/411 (98%)\n",
534
+ "\n",
535
+ "Epoch: 44\n",
536
+ "Training set: Average loss: 0.521537\n",
537
+ "Validation set: Average loss: 0.490820, Accuracy: 401/411 (98%)\n",
538
+ "\n",
539
+ "Epoch: 45\n",
540
+ "Training set: Average loss: 0.521010\n",
541
+ "Validation set: Average loss: 0.489736, Accuracy: 401/411 (98%)\n",
542
+ "\n",
543
+ "Epoch: 46\n",
544
+ "Training set: Average loss: 0.520252\n",
545
+ "Validation set: Average loss: 0.489686, Accuracy: 404/411 (98%)\n",
546
+ "\n",
547
+ "Epoch: 47\n",
548
+ "Training set: Average loss: 0.519929\n",
549
+ "Validation set: Average loss: 0.488752, Accuracy: 401/411 (98%)\n",
550
+ "\n",
551
+ "Epoch: 48\n",
552
+ "Training set: Average loss: 0.519249\n",
553
+ "Validation set: Average loss: 0.488609, Accuracy: 405/411 (99%)\n",
554
+ "\n",
555
+ "Epoch: 49\n",
556
+ "Training set: Average loss: 0.518899\n",
557
+ "Validation set: Average loss: 0.487255, Accuracy: 401/411 (98%)\n",
558
+ "\n"
559
+ ]
560
+ }
561
+ ],
562
+ "source": [
563
+ "# Specify the loss criteria (we'll use CrossEntropyLoss for multi-class classification)\n",
564
+ "loss_criteria = nn.CrossEntropyLoss()\n",
565
+ "\n",
566
+ "def train(model, data_loader, optimizer):\n",
567
+ " # Set the model to training mode\n",
568
+ " model.train()\n",
569
+ " train_loss = 0\n",
570
+ " \n",
571
+ " for batch, tensor in enumerate(data_loader):\n",
572
+ " data, target = tensor\n",
573
+ " #feedforward\n",
574
+ " optimizer.zero_grad()\n",
575
+ " out = model(data)\n",
576
+ " loss = loss_criteria(out, target)\n",
577
+ " train_loss += loss.item()\n",
578
+ "\n",
579
+ " # backpropagate\n",
580
+ " loss.backward()\n",
581
+ " optimizer.step()\n",
582
+ "\n",
583
+ " #Return average loss\n",
584
+ " avg_loss = train_loss / (batch+1)\n",
585
+ " print('Training set: Average loss: {:.6f}'.format(avg_loss))\n",
586
+ " return avg_loss\n",
587
+ "\n",
588
+ "def test(model, data_loader):\n",
589
+ " # Switch the model to evaluation mode (so we don't backpropagate)\n",
590
+ " model.eval()\n",
591
+ " test_loss = 0\n",
592
+ " correct = 0\n",
593
+ "\n",
594
+ " with torch.no_grad():\n",
595
+ " batch_count = 0\n",
596
+ " for batch, tensor in enumerate(data_loader):\n",
597
+ " batch_count += 1\n",
598
+ " data, target = tensor\n",
599
+ " # Get the predictions\n",
600
+ " out = model(data)\n",
601
+ "\n",
602
+ " # calculate the loss\n",
603
+ " test_loss += loss_criteria(out, target).item()\n",
604
+ "\n",
605
+ " # Calculate the accuracy\n",
606
+ " _, predicted = torch.max(out.data, 1)\n",
607
+ " correct += torch.sum(target==predicted).item()\n",
608
+ " \n",
609
+ " # Calculate the average loss and total accuracy for this epoch\n",
610
+ " avg_loss = test_loss/batch_count\n",
611
+ " print('Validation set: Average loss: {:.6f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n",
612
+ " avg_loss, correct, len(data_loader.dataset),\n",
613
+ " 100. * correct / len(data_loader.dataset)))\n",
614
+ " \n",
615
+ " # return average loss for the epoch\n",
616
+ " return avg_loss\n",
617
+ "\n",
618
+ "# Use an \"Adam\" optimizer to adjust weights\n",
619
+ "# (see https://pytorch.org/docs/stable/optim.html#algorithms for details of supported algorithms)\n",
620
+ "learning_rate = 0.001\n",
621
+ "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n",
622
+ "optimizer.zero_grad()\n",
623
+ "\n",
624
+ "# We'll track metrics for each epoch in these arrays\n",
625
+ "epoch_nums = []\n",
626
+ "training_loss = []\n",
627
+ "validation_loss = []\n",
628
+ "\n",
629
+ "# Train over 50 epochs\n",
630
+ "epochs = 50\n",
631
+ "# for epoch in range(1, epochs + 1):\n",
632
+ "for epoch in range(epochs):\n",
633
+ "\n",
634
+ " # print the epoch number\n",
635
+ " print('Epoch: {}'.format(epoch))\n",
636
+ " \n",
637
+ " # Feed training data into the model to optimize the weights\n",
638
+ " train_loss = train(model, train_loader, optimizer)\n",
639
+ " \n",
640
+ " # Feed the test data into the model to check its performance\n",
641
+ " test_loss = test(model, test_loader)\n",
642
+ " \n",
643
+ " # Log the metrics for this epoch\n",
644
+ " epoch_nums.append(epoch)\n",
645
+ " training_loss.append(train_loss)\n",
646
+ " validation_loss.append(test_loss)"
647
+ ]
648
+ },
649
+ {
650
+ "cell_type": "code",
651
+ "execution_count": 34,
652
+ "metadata": {},
653
+ "outputs": [
654
+ {
655
+ "data": {
656
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAyQElEQVR4nO3deXxcdb3/8ddnlmSy72mztEn3vU3TtFQKtKUFCgJlE8qigGgF9aJeRcCrInq9l/u7XC7wkMWCICoXrOxqKVgsWynQdKX7vqRpm31fZ+b7++NM1iZpkmYySebzfDzmcWbOnHPyPWjzznc5368YY1BKKRW8bIEugFJKqcDSIFBKqSCnQaCUUkFOg0AppYKcBoFSSgU5R6AL0FOJiYkmMzMz0MVQSqlBZePGjUXGmKSOvht0QZCZmUlubm6gi6GUUoOKiBzp7DttGlJKqSCnQaCUUkFOg0AppYLcoOsjUEoNLY2NjeTl5VFXVxfoogwJLpeL9PR0nE5nt8/RIFBKBVReXh5RUVFkZmYiIoEuzqBmjKG4uJi8vDxGjRrV7fO0aUgpFVB1dXUkJCRoCPQBESEhIaHHtSsNAqVUwGkI9J3e/LcMmiDYc7KS/1y1i6p6d6CLopRSA0rQBMGxkhp+++FB9pysCHRRlFIDSFlZGU8++WSPz7vssssoKyvr8pif//znrFmzppcl6z9BEwSTUqMB2HmiMsAlUUoNJJ0Fgcfj6fK8VatWERsb2+Uxv/zlL1m8ePHZFK9fBE0QpMa4iHY52H1CawRKqRb33XcfBw4cICsri9mzZ7Nw4UJuuukmpk2bBsBVV13FrFmzmDJlCitWrGg+LzMzk6KiIg4fPsykSZP45je/yZQpU7j44oupra0F4LbbbuOVV15pPv6BBx4gOzubadOmsXv3bgAKCwu56KKLyM7O5lvf+hYZGRkUFRX1638Dvw0fFZHngMuBAmPM1A6+nwg8D2QD/2aMedhfZfH9PCalRLNLg0CpAevBv+5gZ37f/hudnBrNA1dM6fT7hx56iO3bt7Nlyxbef/99vvzlL7N9+/bm4ZfPPfcc8fHx1NbWMnv2bK699loSEhLaXGPfvn289NJLPPPMM1x//fW8+uqr3HLLLaf9rMTERDZt2sSTTz7Jww8/zLPPPsuDDz7IhRdeyP3338/q1avbhE1/8WeN4PfAki6+LwHuBvwaAK1NSolm98lKvF5dp1kp1bE5c+a0GYP/+OOPM2PGDObOncuxY8fYt2/faeeMGjWKrKwsAGbNmsXhw4c7vPY111xz2jEff/wxy5YtA2DJkiXExcX13c10k99qBMaYD0Uks4vvC4ACEfmyv8rQ3qSUKGoaPBwtqSEzMaK/fqxSqpu6+su9v0REtPxueP/991mzZg3r168nPDycBQsWdDhGPzQ0tPm93W5vbhrq7Di73Y7bbY1gNCbwf5gOij4CEVkuIrkikltYWNjr60xKsTqMtXlIKdUkKiqKysqOB5GUl5cTFxdHeHg4u3fv5tNPP+3zn3/eeeexcuVKAN59911KS0v7/GecyaAIAmPMCmNMjjEmJympw3UVumX8sChsokGglGqRkJDAvHnzmDp1Kvfcc0+b75YsWYLb7Wb69On87Gc/Y+7cuX3+8x944AHeffddsrOzefvtt0lJSSEqKqrPf05XxJ/VEl/T0N866ixudcwvgKrudhbn5OSYs1mYZvEjHzAqMYJnvpbT62sopfrOrl27mDRpUqCLETD19fXY7XYcDgfr16/nrrvuYsuWLWd1zY7+m4rIRmNMh7/4gm7SuYnDo9hyrCzQxVBKKQCOHj3K9ddfj9frJSQkhGeeeabfy+DP4aMvAQuARBHJAx4AnADGmKdFZDiQC0QDXhH5PjDZGOPXdptJKdH8bdsJKuoaiXZ1f5pWpZTyh3HjxrF58+aAlsGfo4ZuPMP3J4F0f/38zkz2dRjvPlHJnFHx/f3jlVJqwBkUncV9SUcOKaVUW0EXBMOiQ4kLd7JbJ59TSikgCINARJg4PFonn1NKKZ+gCwKwmof2nKzAo1NNKKV6KDIyEoD8/Hyuu+66Do9ZsGABZxrm/uijj1JTU9P8uTvTWvtLkAZBFHWNXg4XVwe6KEqpQSo1NbV5ZtHeaB8E3ZnW2l+CNAi0w1gpZbn33nvbrEfwi1/8ggcffJBFixY1Txn95ptvnnbe4cOHmTrVela2traWZcuWMX36dG644YY2cw3ddddd5OTkMGXKFB544AHAmsguPz+fhQsXsnDhQqBlWmuARx55hKlTpzJ16lQeffTR5p/X2XTXZyvoHigDGJscid0m7DpRweXTUwNdHKVUk7fvg5Nf9O01h0+DSx/q9Otly5bx/e9/n29/+9sArFy5ktWrV/ODH/yA6OhoioqKmDt3LldeeWWn6wE/9dRThIeHs23bNrZt20Z2dnbzd7/+9a+Jj4/H4/GwaNEitm3bxt13380jjzzC2rVrSUxMbHOtjRs38vzzz/PZZ59hjOGcc85h/vz5xMXFdXu6654KyhqBy2lnTFIEu7XDWKmgN3PmTAoKCsjPz2fr1q3ExcWRkpLCT37yE6ZPn87ixYs5fvw4p06d6vQaH374YfMv5OnTpzN9+vTm71auXEl2djYzZ85kx44d7Ny5s8vyfPzxx1x99dVEREQQGRnJNddcw0cffQR0f7rrngrKGgFYzUMbDpUEuhhKqda6+Mvdn6677jpeeeUVTp48ybJly3jxxRcpLCxk48aNOJ1OMjMzO5x+urWOaguHDh3i4YcfZsOGDcTFxXHbbbed8Tpdzf/W3emueyooawRgBUF+eR1lNQ2BLopSKsCWLVvGyy+/zCuvvMJ1111HeXk5ycnJOJ1O1q5dy5EjR7o8/4ILLuDFF18EYPv27Wzbtg2AiooKIiIiiImJ4dSpU7z99tvN53Q2/fUFF1zAG2+8QU1NDdXV1bz++uucf/75fXi3pwvqGgHArhOVfGlMwhmOVkoNZVOmTKGyspK0tDRSUlK4+eabueKKK8jJySErK4uJEyd2ef5dd93F7bffzvTp08nKymLOnDkAzJgxg5kzZzJlyhRGjx7NvHnzms9Zvnw5l156KSkpKaxdu7Z5f3Z2NrfddlvzNb7xjW8wc+bMPmsG6ohfp6H2h7OdhrpJQUUdc/7jPX5++WS+ft6oM5+glPKLYJ+G2h96Og110DYNJUWFkhARolNNKKWCXtAGgYgwKSWaXTpySCkV5II2CMB6wnjPqUrcHm+gi6JUUBtsTdQDWW/+WwZ5EETT4PZyqEinmlAqUFwuF8XFxRoGfcAYQ3FxMS6Xq0fnBe2oIYCJw62RQztPVDBuWP8uFq2UsqSnp5OXl0dhYWGgizIkuFwu0tN7tuZX8ATBkU/gvV/BTX8GlxUAY5MjcdqFXScqWZoV2OIpFaycTiejRunIvUAKnqYhZxgc/QQ+X9G8K8RhY0xSpI4cUkoFteAJgtSZMO4SWP8bqG8ZKTQ5JVpnIVVKBbXgCQKA+fdCbSl8/kzzrokpUZyqqKekWqeaUEoFp+AKgvRZMHaxr1ZQBejaBEop5bcgEJHnRKRARLZ38r2IyOMisl9EtolIdkfH9bn590FNMeT+DoBpaTGE2G28u+Nkv/x4pZQaaPxZI/g9sKSL7y8Fxvley4Gn/FiWFiNmw5gLYd3j0FBNbHgIV8xI5S8b8yivbeyXIiil1EDityAwxnwIdDXh/1LgD8byKRArIin+Kk8b8++FmiLIfR6AO84bRU2Dh5c/P9ovP14ppQaSQPYRpAHHWn3O8+07jYgsF5FcEcntk4dORs6FUfNh3WPQUMPk1Gi+NDqBFz45rNNNKKWCTiCDoKPFPzt8xtwYs8IYk2OMyUlKSuqbnz7/XqgugE0vAFatIL+8jre3a1+BUiq4BDII8oARrT6nA/n99tMz50Hm+fDxo9BYx4UTkxmVGMHvPj7Ub0VQSqmBIJBB8BbwNd/ooblAuTHmRL+WYP69UHUSNr2AzSbcPi+TLcfK2HiktF+LoZRSgeTP4aMvAeuBCSKSJyJ3iMidInKn75BVwEFgP/AM8G1/laVTmefByHPh4/+FxjquzU4n2uXgOa0VKKWCiN8mnTPG3HiG7w3wHX/9/G4RgQX3wh+WwraXiZh1GzfOGckzHx0kr7SG9LjwgBZPKaX6Q3A9WdyRUfMheTLkPgfAredmIiK88MnhwJZLKaX6iQaBCMy6HU5sheObSI0N49Kpw3n582NU1bsDXTqllPI7DQKAGTeAMxw2tjxgVlnv5i+5x85wolJKDX4aBACuGJh6DXzxKtRVMHNkHNkjY3l+3WE8Xl0+Tyk1tGkQNJn1dWishi/+AsAd543maEkNa3adCnDBlFLKvzQImqRlw/BpVvOQMVwyZRipMS5e/EznH1JKDW0aBE2aOo1PfgHHN+Gw27gmO52P9xVyqqIu0KVTSim/0SBobdpXwBkBG62hpNdkp+E18OaW4wEumFJK+Y8GQWuuaJh2HWx/DerKGZ0UycyRsby68TjW829KKTX0aBC0l3M7NNbAtpUAXJudzp5TlezI16UslVJDkwZBe6kzISXLWrTGGC6fnkKI3carm/ICXTKllPILDYKO5NwOBTsgbwOx4SEsmpTMW1vyadRFa5RSQ5AGQUemXgchUc1LWV6bnU5xdQMf7OmD1dGUUmqA0SDoSGgkTP8K7HgNakuZPyGJhIgQXtuszUNKqaFHg6Azs24Hdx1s/TNOu40rs1JZs7OAspqGQJdMKaX6lAZBZ1KmW08a73wTsJqHGjxe/ratfxdRU0opf9Mg6Mr4S+HYp1BTwpTUaCYMi9LRQ0qpIUeDoCvjl4Dxwv73EBGuyU5j89EyDhZWBbpkSinVZzQIupI6EyKSYO9qAK6amYZN4LVNOuWEUmro0CDois0G4y6B/f8Aj5th0S7OG5fE65uP49V1CpRSQ4QGwZmMvwTqyuHYZwBcm53G8bJaPj1UHOCCKaVU39AgOJMxC8HmbG4eunjycCJDHby6UZuHlFJDgwbBmYRGQeY82PsOAGEhdhZNSubj/fqUsVJqaPBrEIjIEhHZIyL7ReS+Dr6PE5HXRWSbiHwuIlP9WZ5eG78EivZAyUEAZo6I5VRFPSfLdcEapdTg57cgEBE78ARwKTAZuFFEJrc77CfAFmPMdOBrwGP+Ks9ZGX+Jtd37LgBZI+MA2HKsNFAlUkqpPuPPGsEcYL8x5qAxpgF4GVja7pjJwHsAxpjdQKaIDPNjmXonfjQkjm/uJ5iUEoXTLmw5Vh7ggiml1NnzZxCkAcdafc7z7WttK3ANgIjMATKA9PYXEpHlIpIrIrmFhQFqmx9/CRz+GOorCXXYmZwSzdZjZYEpi1JK9SF/BoF0sK/94PuHgDgR2QL8C7AZcJ92kjErjDE5xpicpKSkPi9ot4xfAt5GOLAWgBkjYvnieDkefZ5AKTXI+TMI8oARrT6nA/mtDzDGVBhjbjfGZGH1ESQBh/xYpt4bcQ64YppHD81Ij6Wq3q3TTSilBj1/BsEGYJyIjBKREGAZ8FbrA0Qk1vcdwDeAD40xA3NxYLsTxi6Gfe+A18uMEbEAbNHmIaXUIOe3IDDGuIHvAu8Au4CVxpgdInKniNzpO2wSsENEdmONLvqev8rTJ8YvgepCyN/M6MQIolwODQKl1KDn8OfFjTGrgFXt9j3d6v16YJw/y9Cnxi4GscHe1djSZzEjPZateWWBLpVSSp0VfbK4J8Ljrb4C3zDSGSNi2H2ikrpGT4ALppRSvadB0FPjL4GT26Ainxnpsbi9hh35A7NbQymlukODoKfGL7G2e98hy9dhrM8TKKUGMw2CnkqaCLEjYe87JEe7SIlxaT+BUmpQ0yDoKRHImAcntgDW8wQ6ckgpNZhpEPRG4nioPAF1FcwYEcuR4hpKqxsCXSqllOoVDYLeSJpgbYv2tfQTaPOQUmqQ0iDojcSmINjDtPQYRGCrzkSqlBqkNAh6Iy7TWr6yaC+RoQ7GJUdqjUApNWhpEPSG3QEJY6BwL9DSYWyMzkSqlBp8NAh6K3EcFPmCYEQsJdUN5JXWBrhQSinVcxoEvZU4wVrD2N3Q3GGsw0iVUoORBkFvJY4H44GSg0wYHkWow6ZPGCulBiUNgt5KGm9ti/bitNuYmhajHcZKqUFJg6C3EnyzZxftAawO4y+Ol+P2eANYKKWU6jkNgt4KjYTodCjaB1hTUtc1etlzqjLABVNKqZ7RIDgbSeOh0KoRtMxEqg+WKaUGFw2Cs5E43qoReL2MjA8nNtypHcZKqUGnW0EgIt8TkWix/E5ENonIxf4u3ICXOB4aq6EyHxHRpSuVUoNSd2sEXzfGVAAXA0nA7cBDfivVYNE0+Vyr5qG9pyqpqncHsFBKKdUz3Q0C8W0vA543xmxttS94JTYNIbU6jHMy4/Aa2HSkNICFUkqpnuluEGwUkXexguAdEYkCdJxkRBK4YpuHkM4cGYdNIPdwSWDLpZRSPdDdILgDuA+YbYypAZxYzUNdEpElIrJHRPaLyH0dfB8jIn8Vka0iskNEznjNAUXEqhX4Jp+LDHUwOTWaDYe1RqCUGjy6GwRfAvYYY8pE5Bbgp0CX4yRFxA48AVwKTAZuFJHJ7Q77DrDTGDMDWAD8j4iE9KD8gZc0vnnyOYCcjHg2HyulUR8sU0oNEt0NgqeAGhGZAfwYOAL84QznzAH2G2MOGmMagJeBpe2OMUCUiAgQCZQAg6unNXECVBdArVULmDMqnrpGLzvyKwJcMKWU6p7uBoHbWJPtLwUeM8Y8BkSd4Zw04Firz3m+fa39BpgE5ANfAN8zxpz2p7SILBeRXBHJLSws7GaR+0n7DuOMOAA2HNJ+AqXU4NDdIKgUkfuBrwJ/9zX7OM9wTkejitqv3HIJsAVIBbKA34hI9GknGbPCGJNjjMlJSkrqZpH7SdPkc74hpMnRLjISwtmgHcZKqUGiu0FwA1CP9TzBSay/7P/7DOfkASNafU7H+su/tduB14xlP3AImNjNMg0MsRlgD20eOQRWP0HukVJdsUwpNSh0Kwh8v/xfBGJE5HKgzhhzpj6CDcA4ERnl6wBeBrzV7pijwCIAERkGTAAO9qD8gWezQ8LY5qYhgNmZcZRUN3CwqDqABVNKqe7p7hQT1wOfA18Brgc+E5HrujrHGOMGvgu8A+wCVhpjdojInSJyp++wXwHnisgXwHvAvcaYot7dSgAljmtuGgKYPSoe0H4CpdTg4Ojmcf+G9QxBAYCIJAFrgFe6OskYswpY1W7f063e52NNWzG4JU2AXW9BYx04XYxOjCA+IoQNh0tZNmdkoEunlFJd6m4fga0pBHyKe3Du0Jc4HowXSg4AICLkZMSRe0RrBEqpga+7v8xXi8g7InKbiNwG/J12f+kHtcS2I4cAZmfGc6S4hoKKugAVSimluqe7ncX3ACuA6cAMYIUx5l5/FmxQSRgLSJsO45xM63mCXJ2ATik1wHW3jwBjzKvAq34sy+AVEg6xI9oMIZ2aFoPLaePzQyVcNi0lgIVTSqmudRkEIlLJ6Q+BgfWwmDHGnPbwV9BKnNBmziGn3cbMEdpPoJQa+LpsGjLGRBljojt4RWkItJM0AYr2g7dlhozZmXHszK/QhWqUUgOajvzpK4njwF0L5Uebd+VkxuM1sPmo9hMopQYuDYK+kuhbtrJVh3F2hrVQja5PoJQayDQI+koHQ0ibF6rRJ4yVUgOYBkFfiUiA8IQ2HcagC9UopQY+DYK+1G7kEFgPlulCNUqpgUyDoC81TT7Xavrp2U0Plun6BEqpAUqDoC+lzYLaEsjf1LxLF6pRSg10GgR9acrV4AyHjS+02Z2TEU/uYV2oRik1MGkQ9CVXtBUG21+F+qrm3bMz4yiubmDPqcoAFk4ppTqmQdDXsm+FhirY8VrzrkWThhHmtPPk2gMBLJhSSnVMg6CvjZgDSRPbNA8lRYVyx3mjeGtrPtuPlwewcEopdToNgr4mAtlfg+O5cGpH8+7l80cTF+7kv1bvDmDhlFLqdBoE/jB9GdhDYNMfmndFu5x8Z+FYPtpXxCf7B9+yzEqpoUuDwB8iEmDSFbD1ZWsdY59b5maQGuPiodW7dQSRUmrA0CDwl+yvQV0Z7Ppr8y6X084PLhrPtrxyVn1xMnBlU0qpVjQI/CXzAojLhE1tnym4Jjud8cMiefjdPTr/kFJqQPBrEIjIEhHZIyL7ReS+Dr6/R0S2+F7bRcQjIvH+LFO/sdmsWsHhj6C4Zdio3Sb8+JKJHCqqZmXusQAWUCmlLH4LAhGxA08AlwKTgRtFZHLrY4wx/22MyTLGZAH3Ax8YY4bOXAxZN4PY23QaAyyalExORhyPrdlHbYMnQIVTSimLP2sEc4D9xpiDxpgG4GVgaRfH3wi85Mfy9L+o4TB+CWz5P/A0Nu8WEe67dCIFlfU8t+5QAAuolFL+DYI0oHXbR55v32lEJBxYArzayffLRSRXRHILCwv7vKB+NetWqC6Avavb7M7JjGfxpGSefv8ApdUNASqcUkr5Nwikg32djZm8AljXWbOQMWaFMSbHGJOTlJTUZwXsF2MWQVTqaRPRAdxzyUSqG9z89I3tOpxUKRUw/gyCPGBEq8/pQH4nxy5jqDULNbE7YOYtsH8NlBxs89WE4VH8eMlE/v7FCZ76QOchUkoFhj+DYAMwTkRGiUgI1i/7t9ofJCIxwHzgTT+WJbByboeQSPjbD9osWgPwrQtGc+WMVP77nT2s3V0QoAIqpYKZ34LAGOMGvgu8A+wCVhpjdojInSJyZ6tDrwbeNcZU+6ssARedChc9CAffh81/bPOViPBf105ncko0d7+8mYOFVR1fQyml/EQGW9t0Tk6Oyc3NDXQxes7rhT9cCSe2wnc+s8KhlbzSGq78zTriwp288Z15RLmcASqoUmooEpGNxpicjr7TJ4v7i80GVz5uDSPtoIkoPS6cJ27K5nBxDT/48xa83sEV0EqpwUuDoD/Fj4ZFP7OGkn7xl9O+/tKYBH5++WTW7Crg0TV7A1BApVQw0iDob+fcCemz4e0fQ9XpncNf+1IG1+ek8/g/9/O3bZ0NslJKqb6jQdDfbHZY+gQ0VMOqe077WkT41VVTycmI43svb+GNzccDUEilVDDRIAiEpAmw4D7Y+QbsPH3UbKjDzu+/PodzRsXzg5Vb+OP6w/1eRKVU8NAgCJRz74bh0+HvP4Sa0x+ojgx18Nxts1k0MZmfvbmDJ9/fH4BCKqWCgQZBoNidVhNRbSn8+RZorD3tEJfTzlO3zGJpVir/b/UeHnpbVzZTSvU9DYJASpkOV/8WjnwCK29tM0NpE6fdxv9en8XN54zk6Q8O8NM3tuvQUqVUn3IEugBBb9p1UFcOf/9XeOMuuHqF9cxBKzab8O9XTSXK5eTpDw5QVtvI/7t2OhGh+j+fUurs6W+SgWD2Hdb6xu/9EkKj4cv/A9J28tamNQziwp08tHo3e05W8uTN2YwfFhWYMiulhgxtGhoozvtXqwM593fwz191eti35o/hT3ecQ1lNA0t/s45XNub1YyGVUkORBsFAIQIX/RKyb4WP/gfWPd7pofPGJrLq7vOZMSKGH/1lKz9+ZasueamU6jUNgoFEBC7/X5hyNfzjZ/DZb0+bk6hJcrSLP91xDv9y4Vj+sjGPq59cxwGduVQp1QsaBAONzW51GI+/1JqG4q93g7u+w0Mddhs/vHgCv799DgWV9Vz++Mc8+9FB3B5vPxdaKTWYaRAMRI4QWPYinP9D2PQHeP4yKO98qon545NYdff5nDsmgX//+y6u/M06th4r67/yKqUGNQ2Cgcpmh0U/h+v/CIW7YcV8OLyu08OHx7h49tYcnro5m+Lqeq56ch2/eGsHlXWnP5uglFKtaRAMdJOvhG+8B64Ya2GbLvoNRIRLp6Ww5l/n87W5Gbyw/jCLH/mAt784oU8kK6U6pUEwGCRPhG/+E8ZeZPUbvLbcegitE1EuJw8uncrr355HfEQod724iZuf/Yztxzs/RykVvHSpysHE64WPHob3H7KWurzqKRh1fpenuD1eXvzsKI+u2UtZbSNXz0zjRxdPIDU2rJ8KrZQaCLpaqlKDYDDKy7VqBSUH4EvfhQt/Bk5Xl6dU1DXy5NoDPLfuEAJ84/xR3Dl/jK6NrFSQ0CAYihqq4R8/hw3PQtJEuGYFpMw442l5pTU8/M4e3tiST0JECHfOH8ON54wkUuctUmpI0yAYyvavgTe/C9WFMP8+OO/71hTXZ7Atr4z/Wr2bdfuLiXI5+OrcDG6fN4qkqFD/l1kp1e8CFgQisgR4DLADzxpjHurgmAXAo4ATKDLGzO/qmhoEHagpgVU/gu2vwrCpcMXjkD6rW6duPVbGbz88wNvbT+K027g2O53lF4xmVGKEnwutlOpPAQkCEbEDe4GLgDxgA3CjMWZnq2NigU+AJcaYoyKSbIw5fUX3VjQIurDrb9Y6yJUn4Jw74cJ/g9DuzU56qKiaZz46yCsb82j0eFk8aRhfnZvBeWMTsdnkzBdQSg1ogQqCLwG/MMZc4vt8P4Ax5j9bHfNtINUY89PuXleD4AzqKqzprDc8C9Fp1pTWE5Z0+/SCyjpe+OQwL31+jJLqBjISwrn5nJF8ZdYI4iJC/FhwpZQ/dRUE/nyOIA041upznm9fa+OBOBF5X0Q2isjX/Fie4OCKhi8/DHe8a9UGXrrBWv2s5FC3Tk+OcnHPJRNZf/+FPLYsi+SoUP5j1W7O+c/3+Nc/byH3cIk+nKbUEOPPoSIdtSe0/w3iAGYBi4AwYL2IfGqM2dvmQiLLgeUAI0eO9ENRh6ARc+BbH8Inj8OH/w27/gozboQLfgjxo894eqjDztKsNJZmpbH7ZAUvfnqU1zcf57XNxxkZH85VWalcNTON0UmR/XAzSil/CnTT0H2AyxjzC9/n3wGrjTF/6ey62jTUCxUnYN1jsPF5a13k6TfABT+ChDE9ukxVvZvV20/yxubjrDtQhDEwIz2Gq2emcfmMVBIjdcSRUgNVoPoIHFidxYuA41idxTcZY3a0OmYS8BvgEiAE+BxYZozZ3tl1NQjOQuVJa8Gb3OfAUw/TvgLn/wiSxvf4UifL6/jr1nxe33ycnScqsNuE88YmsjQrlYunDNfnEpQaYAI5fPQyrKGhduA5Y8yvReROAGPM075j7gFuB7xYQ0wf7eqaGgR9oKrAqiHkPgeNtdbEduf/sFsPpHVkz8lK3thynLe25HO8rJZQh43Fk4exdEYq8yckEeqw9/ENKKV6Sh8oUx2rLoJPn4TPn4H6Chh3sRUII+f26nLGGDYdLeXNLfn8bdsJSqobiHY5WDx5GJdOTeH8cYm4nBoKSgWCBoHqWl25FQafPgk1xZBxHpz3Axi7yFo+sxcaPV7W7S/ira35rNl5ioo6NxEhdhZMTObSqcNZMCFZm4+U6kcaBKp7GqqtFdHWPQ6V+ZA4AebeBTOWgbP3s5U2uL2sP1jM6u0n+cfOkxRVNRDisDFvTAILJyazcEIyI+LD+/BGlFLtaRConnE3wI7XYP0TcHIbhMVDztdhzjchavhZXdrjNeQeLuHt7SdZu6eAI8U1AIxJiuBCXyjkZMYT4tClMpTqSxoEqneMgSPrYP2TsGcV2Bww5WqYdStkzOt1s1HL5Q2HiqpZu6eQ9/cU8NnBEho8XsKcdmaPiufcMQmcOyaBKakx2HWaC6XOigaBOnvFB6xlMre+ZHUsx4+GrJsh6yZrkZw+UF3vZt3+ItbtL+KTA8XsK6gCIMrlYO7oBOaOTiAnI47JqdE47VpjUKonNAhU32mogV1vwaY/wpGPQWzWEpozb4GxiyGk79r6CyrrWH+gmPUHivnkQDFHS6xmJJfTxoz0WGZlxDErI47skXE6D5JSZ6BBoPyj+ABs/hNs+T+oOgmOMBizECZcBuOXQGRSn/64/LJaNh0tZeORUjYdKWVHfgVur/X/3/S4MKalxTA1LYZpvpeGg1ItNAiUf3nccPgjqx9h9yqoyAPEmu9owmUw6YoeT2fRHbUNHr44Xs6mo6V8cbyc7cfLmzufAdJiw5icGs2klGgmDY9iYko0GfHhOq22CkoaBKr/GGONNNrzNuz+u/UerAVzJl0Bk66E5Eln3dHcmfKaRrbnlzcHw64TFRwqqsZXcSDMaWfC8CjGD4tkbHIkY5KsbXpcuHZIqyFNg0AFTtlRa8GcXW/B0U8BAwljrUAYfwmkzerW0ppno67Rw75TVew6WcGuExXsPlHJvoIqiqrqm48JcdgYnRjBmKRIMhLCyUyIsLaJESRHhSJ+Ci6l+osGgRoYKk/Bbl8oHPoIjAdComDU+TB6odW/kDDWb7WF9sprGtlfWMn+gioOFFazv6CKQ0XVHCupae57AKsWkZEQTnpcOOlxYaTHhTEivul9ONEuhwaFGvA0CNTAU1MChz6Eg2vhwFooO2Ltj06HzHmQmg2pM2H4tD4didQdbo+X/LI6DhVXc6S4msNFNRwtqSavtJZjJTVUN3jaHB/qsJEYGUpSVKhvG0JSZCjxESHER4YSHx5CfEQICZEhxIWH6MNyKiA0CNTAV3LQCoSDa+HY51B1ytovdqtPITULUrIgebL1OTw+IMU0xlBW00heaS15pTUcL6ulsLLeelVZ26KqBkqq6/F28k8ryuVoCY3mAAkhPiKUmDAn0WEOol1O33sn0S4HDn1uQp0lDQI1+FScgPxNkL/Zeh3fBLUlLd9HJFuBkDzJCof0HEiaCLaBMbupx2soq2mgpLrlVdy0rbLCwgoNK0Aq69xdXi8mzElCZAgJESEkRIQS73vfEhYtARLtchIT7tQmK9WGBoEa/IyBinwo3AUFu6FgV8v7xmrrmNBoq/N5xDkwYjak5UBYbECL3V11jR5KaxqoqHVTUddIRW0j5bXWtqy2sTlIiqvqKa7yhUtNA13987XbhLhwJ7HhIadtY8KcxIY7iQ0Laa6FhIfYcTnthIc4CHPacTltGiRDiAaBGrq8Xig9BHm5cOwzq1mpYAcYr/V9zEhIHNfqNd56RSSDbXA3t3i8hqp6N5V1jacFSHltI6U1DZTWNFJa3UBpTQNlNY3Wq7aBukZvt35GeIidyFAHUS4HUS4nUS6r1hHlchAT7iTOFy4xYdY2zldLiXJZYaJBMnB0FQQ6Ibwa3Gw262G1hDEw4wZrX30lHN8IeRugcC8U7YVNn7bUHAAQcEVDaAy4Yqz3rhiITrNqFCPPgZgR/TaCqTfsNiEmzOpLIK5n59Y1epprG2U1VoDUNnqobfBQ2+ihpsFDbYOb6gYPVXVuKusbqaxzU1nnJr+sloo6N+U1jTR4Og8Uh03aBEhkqIOIUAdhIXbCnfbm9y1B4yAq1NnmnLAQu692YtfnPPxIawQqODQ1LRXthaJ9UF1oLchTX2Ft63zbkoMtgRGVagXCiLlWH0T8aAiLG9Dh0J+MMdQ0eJprG001kIraptBo2VbUuamqd1Pb4KGmwU1Ng8f3ctPo6d7voBCHjTCnFQzhIXYiXQ4iQqxwiQy1gqWpecvltBPqsDW/Dw+xnxZAESFNW3tQdMZrjUApEYhJs15jFnZ+nMdtNS0d/QyOfWptd7ze8n1oNMRlQFwmxGZYr7A4X+0ium3tIiRySIeGiBDh+ys/vYc1ktbq3Z7m2kZTM1dlXSOV9W7qWtVSahs91Dd6qfHVVKrr3VTXuzleVtv8vqbBQ53b02XfSUdCHbbmGkt4iBUWLqeNUIe9eRvqsBHqsBHS9LLbcTqEELu1P9TZUnsJc9oJC7H5AsmOwybYbYLDLjhsNhy+96EOO067BLwJTWsESp1JeR6c2AqlR6D0cNuXp77z80KiIHbk6a/oNGuBn8hkvz9VHYyMMTR4vNQ1eqlr9Fhh0tzcZQVI02crQKyaSZUvTJpCpt7tpd5thU9d07bRQ6PHur6ns/HBPWQTmgOnqSbjsFth4bTbsNsEp90KkqVZadw4Z2Svfo7WCJQ6GzHp1qs9r7eDJqZW7yvyrSk2yo5aC/zUV7S7gEBEohUKUSnWug6xI301DV9oDIFO7f4mIr6/4O1W/4mfeLyGBreXBo+XBl9oWDUYb3MNpimIvMbQ6DF4vAa31+DxeGn0GN853uZtXaOHercXt9eL22Md6/Ya3B6vdV4fhU97GgRK9ZbNBlHDrFd31JZZT1BXnIDKE9ZDc5UnoPKktT2+CWqK2p5jD4XoFKuZyRluPWXtjLDWkG793hnu2/reR6dA/Bir9qFB4hd2m1id2QyMZ1fOhgaBUv0lLNZ6pczo/JiGaqspquyo1fRUdtQKiYYaqxO7sRaqi633DTXW58Ya8DZ2fD17KMSPskIhfhREDgOHC5wua+sItbah0VaNJDpVm6uCkF+DQESWAI8BduBZY8xD7b5fALwJHPLtes0Y80t/lkmpAS0kApImWK+e8DT6QqEWGqqsMCk5YC0eVHLIer9/Tdd9GgCIFRYxaVZtImaE1UQVl9HSbBUa2evbUwOT34JAROzAE8BFQB6wQUTeMsbsbHfoR8aYy/1VDqWCgt1pvVzRwDDruYrR89se4/VaNQl3vRUY7npw11nbulIoPw4Vx33bPOvp7f1rrBpHa2HxEDvCqkW0rlW0qWl08Dk8vqXvwxXTb/9p1Jn5s0YwB9hvjDkIICIvA0uB9kGglOoPNhuERlmv7jIGqot8nd6HfU1WR6D8mNU0VVPcNlDctS1BYzydX9cV01LDiBruC40wX6j4ts5wKzwikiA8wepYH+JDcgPFn0GQBhxr9TkPOKeD474kIluBfOBHxpgd7Q8QkeXAcoCRI3s3dEop1Qsi1trTkUmQPqtn5zY1V7nrrG1NccsoqqZX8QE48klLiJgzTH1hD/UFQkTbmkjTe1esr7zDrOG5EcnW+4hE6zvtOO+QP4Ogo9huP/ZpE5BhjKkSkcuAN4Bxp51kzApgBVjPEfRxOZVS/tDUXEW09TkuA9KyOz/eGPC6W5qtGqut8Kgutobp1hRZtZOaYqu5qnVNpLYUGuugrswajeXtYDZXsVnNWuEJvle81XnfVANpDhZfrSQsDsLjrGObzguJGJI1En8GQR4wotXndKy/+psZYypavV8lIk+KSKIxpt0YOqXUkCfSKjwAkqwnuHvK620JhKoC61VdYC2GVFtiBUlNiTWdSG2Z1YHeWNfNGkmI1azV1MQWGt3y3h7Sch/WG+u9zWkFSEikb+t7OcOtcGp++Y4Xu+/p9FgrjEKj/V6T8WcQbADGicgo4DiwDLip9QEiMhw4ZYwxIjIHsAHFfiyTUmqos9msv/bD4631KnrC425pyqotbRscNcXW57oKa2LDplfZMagvt85tavQwpuW9p8HqTznjiK1OiM03dUkszL4Dzv2X3l2nC34LAmOMW0S+C7yDNXz0OWPMDhG50/f908B1wF0i4gZqgWVmsM15oZQaOuwOsEdaQ2Qjk/r22p5G6zmRpldjtVUDMfi2vpfXbQVMbalVs6kttWoutaVWf4cf6FxDSikVBLqaa0i70JVSKshpECilVJDTIFBKqSCnQaCUUkFOg0AppYKcBoFSSgU5DQKllApyGgRKKRXkBt0DZSJSCBzp5emJQLDOYxSs9673HVz0vjuXYYzp8HHpQRcEZ0NEcjt7sm6oC9Z71/sOLnrfvaNNQ0opFeQ0CJRSKsgFWxCsCHQBAihY713vO7joffdCUPURKKWUOl2w1QiUUkq1o0GglFJBLmiCQESWiMgeEdkvIvcFujz+IiLPiUiBiGxvtS9eRP4hIvt827hAltEfRGSEiKwVkV0iskNEvufbP6TvXURcIvK5iGz13feDvv1D+r6biIhdRDaLyN98n4f8fYvIYRH5QkS2iEiub99Z3XdQBIGI2IEngEuBycCNIjI5sKXym98DS9rtuw94zxgzDnjP93mocQM/NMZMAuYC3/H9bzzU770euNAYMwPIApaIyFyG/n03+R6wq9XnYLnvhcaYrFbPDpzVfQdFEABzgP3GmIPGmAbgZWBpgMvkF8aYD4GSdruXAi/43r8AXNWfZeoPxpgTxphNvveVWL8c0hji924sVb6PTt/LMMTvG0BE0oEvA8+22j3k77sTZ3XfwRIEacCxVp/zfPuCxTBjzAmwfmECyQEuj1+JSCYwE/iMILh3X/PIFqAA+IcxJijuG3gU+DHgbbUvGO7bAO+KyEYRWe7bd1b37ejjAg5U0sE+HTc7BIlIJPAq8H1jTIVIR//TDy3GGA+QJSKxwOsiMjXARfI7EbkcKDDGbBSRBQEuTn+bZ4zJF5Fk4B8isvtsLxgsNYI8YESrz+lAfoDKEginRCQFwLctCHB5/EJEnFgh8KIx5jXf7qC4dwBjTBnwPlYf0VC/73nAlSJyGKup90IR+RND/74xxuT7tgXA61hN32d138ESBBuAcSIySkRCgGXAWwEuU396C7jV9/5W4M0AlsUvxPrT/3fALmPMI62+GtL3LiJJvpoAIhIGLAZ2M8Tv2xhzvzEm3RiTifXv+Z/GmFsY4vctIhEiEtX0HrgY2M5Z3nfQPFksIpdhtSnageeMMb8ObIn8Q0ReAhZgTUt7CngAeANYCYwEjgJfMca071Ae1ETkPOAj4Ata2ox/gtVPMGTvXUSmY3UO2rH+sFtpjPmliCQwhO+7NV/T0I+MMZcP9fsWkdFYtQCwmvb/zxjz67O976AJAqWUUh0LlqYhpZRSndAgUEqpIKdBoJRSQU6DQCmlgpwGgVJKBTkNAqX6kYgsaJopU6mBQoNAKaWCnAaBUh0QkVt88/xvEZHf+iZ2qxKR/xGRTSLynogk+Y7NEpFPRWSbiLzeNBe8iIwVkTW+tQI2icgY3+UjReQVEdktIi9KMEyIpAY0DQKl2hGRScANWJN7ZQEe4GYgAthkjMkGPsB6ahvgD8C9xpjpWE82N+1/EXjCt1bAucAJ3/6ZwPex1sYYjTVvjlIBEyyzjyrVE4uAWcAG3x/rYViTeHmBP/uO+RPwmojEALHGmA98+18A/uKbDybNGPM6gDGmDsB3vc+NMXm+z1uATOBjv9+VUp3QIFDqdAK8YIy5v81OkZ+1O66r+Vm6au6pb/Xeg/47VAGmTUNKne494DrffO9N68FmYP17uc53zE3Ax8aYcqBURM737f8q8IExpgLIE5GrfNcIFZHw/rwJpbpL/xJRqh1jzE4R+SnWKlA2oBH4DlANTBGRjUA5Vj8CWNP+Pu37RX8QuN23/6vAb0Xkl75rfKUfb0OpbtPZR5XqJhGpMsZEBrocSvU1bRpSSqkgpzUCpZQKclojUEqpIKdBoJRSQU6DQCmlgpwGgVJKBTkNAqWUCnL/H8IFD/4snB5TAAAAAElFTkSuQmCC",
657
+ "text/plain": [
658
+ "<Figure size 432x288 with 1 Axes>"
659
+ ]
660
+ },
661
+ "metadata": {
662
+ "needs_background": "light"
663
+ },
664
+ "output_type": "display_data"
665
+ }
666
+ ],
667
+ "source": [
668
+ "%matplotlib inline\n",
669
+ "from matplotlib import pyplot as plt\n",
670
+ "\n",
671
+ "plt.plot(epoch_nums, training_loss)\n",
672
+ "plt.plot(epoch_nums, validation_loss)\n",
673
+ "plt.xlabel('epoch')\n",
674
+ "plt.ylabel('loss')\n",
675
+ "plt.legend(['training', 'validation'], loc='upper right')\n",
676
+ "plt.show()"
677
+ ]
678
+ },
679
+ {
680
+ "cell_type": "code",
681
+ "execution_count": 35,
682
+ "metadata": {},
683
+ "outputs": [
684
+ {
685
+ "name": "stdout",
686
+ "output_type": "stream",
687
+ "text": [
688
+ "fully_connected1.weight \n",
689
+ " [[-0.00374341 0.2682218 -0.41152257 -0.3679695 ]\n",
690
+ " [-0.17916061 -0.08960593 0.11843108 0.5180272 ]\n",
691
+ " [-0.04437202 0.13230628 -0.15110654 -0.09828269]\n",
692
+ " [-0.47767425 -0.33114105 -0.20611155 0.01852179]\n",
693
+ " [ 0.22086579 0.5711509 -0.40086356 -0.18697421]\n",
694
+ " [ 0.31580442 0.24776897 -0.20200174 0.39890492]\n",
695
+ " [-0.08059168 0.05290705 0.4527381 -0.46383518]\n",
696
+ " [-0.3545517 -0.15797205 -0.23337851 0.39141223]\n",
697
+ " [-0.32408983 -0.23016644 -0.34932023 -0.4682805 ]\n",
698
+ " [-0.47349784 0.8002842 0.30180416 0.15444154]]\n",
699
+ "fully_connected1.bias \n",
700
+ " [ 0.02629578 -0.20744474 0.08459234 -0.46684736 -0.35585782 -0.45410082\n",
701
+ " 0.31546897 0.25728968 -0.22174752 0.24439509]\n",
702
+ "fully_connected2.weight \n",
703
+ " [[ 0.20224687 0.3143725 0.12550515 0.04272011 0.21202639 -0.18619564\n",
704
+ " 0.05892715 -0.24517313 -0.21917307 -0.16335806]\n",
705
+ " [ 0.14308453 0.08098809 -0.18731831 0.09553465 0.7475572 -0.01170831\n",
706
+ " 0.01207405 0.03671877 0.19618031 0.71772873]\n",
707
+ " [-0.24369258 -0.09592994 0.12428063 0.2620103 0.44033986 0.32761905\n",
708
+ " 0.06293392 -0.24256472 0.02909058 -0.6438864 ]\n",
709
+ " [-0.29470977 0.4369507 0.2404469 -0.31544605 -0.65187347 -0.03367811\n",
710
+ " -0.05203882 -0.09720274 0.12160733 -0.44794998]\n",
711
+ " [ 0.11592636 0.15991893 0.22637847 0.11824107 -0.31298175 -0.20513597\n",
712
+ " 0.15789726 0.0661869 -0.24668422 -0.1820901 ]\n",
713
+ " [ 0.29749104 0.33983657 -0.13788326 -0.07958971 -1.0037647 0.04011776\n",
714
+ " -0.23813814 -0.21048178 -0.01742402 -0.21410409]\n",
715
+ " [-0.12950484 0.18764248 -0.19243696 0.2869356 0.21671084 -0.26666948\n",
716
+ " -0.07870413 0.01426902 0.04613796 0.07500109]\n",
717
+ " [ 0.12409672 0.01894209 -0.15429662 0.1496355 -0.30334112 -0.1874303\n",
718
+ " -0.07916126 -0.15403877 -0.11062703 -0.25918713]\n",
719
+ " [-0.06726643 0.16598707 -0.20601156 -0.01622862 -0.10633215 -0.07815906\n",
720
+ " 0.00878868 0.00450952 0.06399861 0.4654336 ]\n",
721
+ " [ 0.29954556 0.20082232 0.3002309 -0.02287012 -0.2840742 -0.14991638\n",
722
+ " 0.21532115 -0.00204995 -0.15717986 -0.24232906]]\n",
723
+ "fully_connected2.bias \n",
724
+ " [-0.2959424 -0.09140179 -0.24091302 0.11557585 0.17096573 -0.3224678\n",
725
+ " 0.19725719 -0.24745122 0.03521875 -0.1282217 ]\n",
726
+ "fully_connected3.weight \n",
727
+ " [[-0.06091028 -0.06208903 -0.28376698 -0.27304304 -0.04948315 0.0040895\n",
728
+ " -0.14365433 0.11912274 -0.28462344 -0.02134135]\n",
729
+ " [ 0.27809682 -0.41300255 0.27310103 0.7309681 -0.2853832 0.6525562\n",
730
+ " -0.03649095 -0.14116624 -0.0045454 -0.25554216]\n",
731
+ " [ 0.03393281 -0.19290853 0.71934235 -0.31080088 0.15194914 -0.3314264\n",
732
+ " -0.07604478 -0.06650442 -1.1165304 0.17134616]]\n",
733
+ "fully_connected3.bias \n",
734
+ " [ 0.25107792 0.10447468 -0.24180876]\n"
735
+ ]
736
+ }
737
+ ],
738
+ "source": [
739
+ "for param_tensor in model.state_dict():\n",
740
+ " print(param_tensor, \"\\n\", model.state_dict()[param_tensor].numpy())"
741
+ ]
742
+ },
743
+ {
744
+ "cell_type": "code",
745
+ "execution_count": 36,
746
+ "metadata": {},
747
+ "outputs": [
748
+ {
749
+ "data": {
750
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWIAAAElCAYAAADeAeiuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAnqklEQVR4nO3dfbzlY73/8dd7xn0IDY7cRBoVTqEhpRtRkgrdGp1K5aQ6pXTzC906lXNUKqU6miLqaCRRKickNyV3Y9yLKGIyYUK5Z3j//riuXctu773W7L32fNda+/3s8X3sta7vd13fa63MZ13r872+1yXbREREc6Y13YCIiKkugTgiomEJxBERDUsgjohoWAJxRETDlmm6Af1Gy6xoLbdK083oWVs8fYOmm9DzHs1IpbYuu2T+IttrTqSO6as+yV58f9vjfP/tp9reeSLnmqgE4iWk5VZh+ae+vulm9Kxzzz+86Sb0vPsefKTpJvS8Gass+8eJ1uHFD7D802a3Pe6BSw6fMdFzTVQCcUQMJgFS063oSHLEETG4NK391kk10lGSbpN05bDyfSVdK+kqSZ9rKT9Q0vV130vb1Z8ecUQMru71iI8Gvgp85x9V60XAbsAzbD8oaa1avikwG9gMeCLwC0mb2B41J5UecUQMKMG06e23Dtg+B7hjWPG7gENsP1iPua2W7wYcZ/tB2zcA1wPbjFV/AnFEDCbRaWpihqR5Lds+HZ5hE+D5ki6QdLakrWv5usDNLcctqGWjSmoiIgaUOk1NLLI9axwnWAZYHdgW2Bo4XtKTy4n/yZhjFhOII2JwdXgxbpwWACe6TGF5oaRHgRm1fP2W49YDbhmroqQmImJwSe238fsRsEM5jTYBlgMWAScDsyUtL2kjYCZw4VgVpUccEYNJ6vhiXPuqNBfYnpJPXgB8EjgKOKoOaXsI2Kv2jq+SdDxwNbAYePdYIyYggTgiBlmXUhO29xxl1xtHOf5g4OBO608gjogBpcnOEXdNAnFEDK5p/XGLcwJxRAymoXHEfSCBOCIGV59M+pNAHBEDqnujJiZbAnFEDK6kJiIiGjTxGzaWmgTiiBhc6RFHRDQsPeKIiCblYl1ERLMyjjgiomm5xTkionnJEUdENCw94oiIhqVHHBHRoC5ODD/ZEogjYmCpT3rE/ZFAiYhYQqIE4nZbR3VJR0m6rS6LNHzfhyRZ0oyWsgMlXS/pWkkvbVd/AnFEDCZ1uHXmaGDnfzqFtD7wEuCmlrJNgdnAZvU1X5c0Zo4kgTgiBlT73nCnPWLb5wB3jLDrS8CHAbeU7QYcZ/tB2zcA1wPbjFV/csQRMbA6DLQzJM1reT7H9pwO6t4V+JPty4adZ13g/JbnC2rZqHq+RyzpVTX/8rRR9p8laVabOv5+jKRTJK02CU2NiB4zbdq0thuwyPaslq2TILwS8FHgEyPtHqHMI5T9o52dvJmG7Qn8mpJzmTDbu9i+qxt1RUQP626OeLiNgY2AyyTdCKwHzJf0L5Qe8Potx64H3DJWZT0diCWtDGwH7E0NxJJWlHScpMslfR9YseX4nSSdJ2m+pB/U1w+v88ahq5uS3ijpQkmXSvpGu4R6RPQPdTFHPJztK2yvZXtD2xtSgu9Wtv8MnAzMlrS8pI2AmcCFY9XX04EY2B34ue3fAXdI2gp4F3Cf7WcABwPPAqjB9WPAi21vBcwDPjBaxZKeDuwBbGd7C+AR4N9GOXYfSfMkzfPi+7v13iJiknVx+Npc4DzgqZIWSNp7tGNtXwUcD1wN/Bx4t+1Hxqq/1y/W7QkcVh8fV5/PBL4CYPtySZfX/dsCmwLn1g93OcoHN5odKUH8onr8isBtIx1Yc0ZzAKattNaYuZ6I6B3duqHD9p5t9m847PnBlI5iR3o2EEt6ArADsLkkA9MpCe9LGDnxLeD0dh/YsOOPsX1gN9obET1GoGm5s26iXgt8x/aTah5mfeAGYD41hSBpc+AZ9fjzge0kPaXuW0nSJmPUfwbwWklr1ePXkPSkSXovEdGAycoRd1vP9ogpaYhDhpX9ENgSWLGmJC6lJsFt3y7pLcBcScvX4z8G/G6kym1fLeljwGmSpgEPA+8G/tjl9xERDRi6WNcPejYQ295+hLKvtHnNL4Gtx6qrNZdj+/vA9yfQzIjoYQnEERFN6484nEAcEQNK6RFHRDSu3sLc8xKII2Ig5WJdREQv6I84nEAcEQMqOeKIiOYlEEdENCyBOCKiYf0y10QCcUQMpF6aS6KdBOKIGFgJxBERDUsgjohoWn/E4QTiiBhQ6p9bnPujlRERS0iA1H7rqC7pKEm3Sbqypezzkq6pCxmfJGm1ln0HSrpe0rWSXtqu/gTiiBhQXV3F+Whg52FlpwOb14WMfwccCCBpU8qq85vV13y93QrxCcQRMbC61SO2fQ5wx7Cy02wvrk/PB9arj3cDjrP9oO0bgOuBbcaqP4E4IgZWhz3iGZLmtWz7jONUbwP+rz5eF7i5Zd+CWjaqXKyLiMHUeY93ke1Z4z6N9FFgMXDsP878T0Zaef7vEogjYiAJmD59csevSdoLeAWwo+2hYLsAWL/lsPWAW8aqJ6mJiBhYXbxYN1LdOwP7A7vavq9l18nAbEnLS9oImEldbX406RFHxGBagotxbauS5gLbU/LJC4BPUkZJLA+cXgP6+bbfafsqSccDV1NSFu+2/chY9ScQR8RAKuOIuxOJbe85QvGRYxx/MHBwp/UnEEfEgMrsaxERjeuTOJxAHBEDSjAtE8NHRDSnmzniyZZAHBEDq0/icAJxRAyu9IgjIhrWJ3E4gTgiBpNysW5wbfn0DTj3gq823YyetforD2u6CT3vzp/s13QTpoj+GUfcdq4JSRtLWr4+3l7Se1tnoo+I6FXdmo94snUy6c8PgUckPYVyS99GwPcmtVUREV0wmZP+dFMngfjROgv9q4DDbL8fWGdymxURMUEd9IZ7JA53lCN+WNKewF7AK2vZspPXpIiIieunGzo66RG/FXgOcLDtG+r8mv87uc2KiJi4adPUdusFbXvEtq+WtD+wQX1+A3DIZDcsImKiBqZHLOmVwKXAz+vzLSSdPMntioiYmD7KEXeSmjiIshT0XQC2L6WMnIiI6Fmi/YiJXukxdxKIF9v+67CyMVckjYjoBd3qEUs6StJtkq5sKVtD0umSrqt/V2/Zd6Ck6yVdK+ml7ervJBBfKekNwHRJMyUdDvyms+ZHRDRn+jS13Tp0NLDzsLIDgDNszwTOqM+RtCkwG9isvubrkqaPVXkngXjfWuGDwFzgb8B+nbY+IqIJpcfbndSE7XOAO4YV7wYcUx8fA+zeUn6c7Qfr4IbrKendUXUyauI+4KN1i4joGx12eGdImtfyfI7tOR28bm3bCwFsL5S0Vi1fFzi/5bgFtWxUowZiSYfZ3k/STxghJ2x71w4aGhHRmA57vItsz+rmaUcoG/O62lg94u/Wv4eOuzkREQ2a5EERt0pap/aG1wFuq+ULgPVbjlsPuGWsikYNxLYvrg/nAffbfhSgJp2XH2/LIyKWBlGGsE2ikylTPxxS//64pfx7kr4IPBGYCVw4VkWdXKw7A1ip5fmKwC+WsMEREUuX2o+Y6HTUhKS5wHnAUyUtkLQ3JQC/RNJ1wEvqc2xfBRwPXE25Ee7dth8Zq/5OJv1ZwfY9Q09s3yNppbFeEBHRC7qVmrC95yi7dhzl+IOBgzutv5Me8b2Sthp6IulZwP2dniAiogkCpkltt17QSY94P+AHkoaSzesAe0xaiyIiuqRH4mxbnYwjvkjS04CnUr5krrH98KS3LCJignplLol2Opl9bSVgf+B9tq8ANpT0iklvWUTEBHQyz0SvxOlOcsTfBh6iTA4PZYzcZyatRRERXTJdarv1gk4C8ca2Pwc8DGD7fka+cyQioqf0yzSYnVyse0jSitRb9CRtTJkAKCKiZ5VRE023ojOdBOJPUgYlry/pWGA74C2T2aiIiAnroR5vO52Mmjhd0nxgW8qXzPtsL5r0lkVETFCfxOGOesQALwSeR0lPLAucNGktiojoAsGSTPzeqLaBWNLXgadQJoUHeIekF9t+96S2LCJiggYmNUHpDW9ue+hi3THAFZPaqoiILuiPMNxZIL4W2AD4Y32+PnD5pLUoIqILJHpmLol2OgnETwB+K2loPs2tgfMknQxZqSMielefxOGOAvEnJr0VERGTYGByxLbPBpD0BOAFwE0tq3d0laS1gS9RhsrdSbm1+nO2l3iUhqT9KIsA3tfVRkZEXxCdT/zetFFvcZb0U0mb18frAFcCbwO+W4NcV6l8df0IOMf2k20/C5hNWe9pPPbjsSuLRMRUMiCT/mxk+8r6+K3A6bZfCTybEpC7bQfgIdtHDBXY/qPtwyVNl/R5SRdJulzSOwAkbS/pLEknSLpG0rEq3ktZK+pMSWfWY/eUdIWkKyV9dugco5VHRP/rl7kmxgrErXMO7wicAmD7buDRSWjLZsD8UfbtDfzV9taUi4Vvl7RR3bclpfe7KfBkYDvbX6Gsmvoi2y+S9ETgs5RgvwWwtaTdRysffnJJ+0iaJ2ne7Ytu78Z7jYilYFoHWyckvV/SVbXDNlfSCpLWkHS6pOvq39Un0s7R3CxpX0mvAraizDdBnQBo2fGesFOSvibpMkkXATsBb5Z0KXABZSTHzHrohbYX1FWmLwU2HKG6rYGzbN9uezFwLCXfPVr5Y9ieY3uW7Vlrzlizq+8zIiaH6E6PWNK6wHuBWbY3B6ZT0qYHAGfYnklZZPmA8bZ1rEC8N6WX+hZgD9t31fJtKXMUd9tVlIAPQL1zb0dgTcpnuq/tLeq2ke3T6qGtM8E9wsgXIEf7tHvjd0lETIppar91aBlgRUnLUK493QLsBhxT9x8D7D7udo62w/Zttt9pe7eWoIftM20fOt4TjuGXwAqS3tVSNnSx7VTgXZKWBZC0iaTHtanvbmCV+vgC4IWSZkiaDuwJnD1GeUT0OanMNdFuA2YMpR7rtk9rPbb/BBwK3AQspKRJTwPWtr2wHrMQWGu8be100p9JZ9s1P/slSR8GbgfupSzT9ANKymF+HV1xO+2/feYA/ydpYc0THwicSekFn2L7xwCjlUdE/+uwx7vI9qzRdtbc727ARsBdlMWU39iN9g3pmUAMf/9WmT3K7o/UrdVZdRt6/XtaHh8OHN7y/HvA90Y454jlEdH/ujQo4sXADbZvL3XqROC5wK2S1rG9sA7xvW28J+j0omFERF8pK3So7daBm4BtJa1Uf5HvCPwWOBnYqx6zFzDuX9Oj9oglHU5dHmkktt873pNGRCwN3ehp2r5A0gmU4bWLgUsoqc+VgeMl7U0J1q8b7znGSk3MG2+lERFNk7p3i7PtT1KWjWv1IKV3PGGjBmLbx4y2LyKiH/TIjXNtdbJCx5qUkQubAisMldveYRLbFRExYX0y509HKZRjKYnpjYD/BG4ELprENkVETFgXL9ZNuk4C8RNsHwk8bPts22+j3F0XEdHT+mX2tU7GEQ9N/rNQ0sspt/aNd2rKiIilY8luYW5UJ4H4M5IeD3yQcoPEqsD7J7VVERETJGB6r3R52+hkhY6f1od/BV40uc2JiOiegekRS/o2I9zYUXPFERE9q1cmfm+nk9TET1serwC8ipInjojoWWXURNOt6EwnqYkftj6XNBf4xaS1KCKiG3poVEQ745l9bSawQbcbEhHRbb0yTridTnLEd/PYHPGfKXfaRUT0LAHT+2R+yU5SE6u0OyYioveIaX2yGlrb7wtJZ3RSFhHRS8rioX1+Z52kFShrxs2oS4UMNXlV4IlLoW0REeM3IHfWvQPYjxJ0L+YfgfhvwNcmt1kRERPX9xfrbH8Z+LKkfev6bxERfaNcrOuPQNzJNcVHJa029ETS6pL+Y/KaFBHRHd3KEUtaTdIJkq6R9FtJz5G0hqTTJV1X/64+3nZ2EojfbvuuoSe27wTePt4TRkQsDaIEuHZbh74M/Nz204BnUuZoPwA4w/ZM4Iz6fFw6acc0tdywLWk6sNx4TxgRsVSozDXRbmtbjbQq8ALgSADbD9XO6W7A0JJyxwC7j7epnQTiUykrle4oaQdgLvDz8Z4wImJpUQcbZWTYvJZtn2HVPBm4Hfi2pEskfUvS44C1bS8EqH/XGm87O7nFeX9gH+Bdtd2nAd8c7wkjIpaGoaWSOrDI9qwx9i8DbAXsa/sCSV9mAmmIkbTtEdt+1PYRtl9r+zXAVZQJ4iMieto0td86sABYYPuC+vwESmC+VdI6APXvbeNuZycHSdpC0mcl3Qh8GrhmvCeMiFg62ueHO8kR2/4zcLOkp9aiHYGrgZOBvWrZXsCPx9vSse6s2wSYDewJ/AX4PiDbWaUjInre0KiJLtkXOFbScsAfgLfW6o+XtDdwE/C68VY+Vo74GuBXwCttXw8gKWvVRUTf6NYKHbYvBUbKI+/YjfrH+sJ4DWXKyzMlfVPSjtAnUxlFRNDxqInGjXWL80nASXWYxu6UlZvXlvQ/wEm2T1s6TYx+cudP9mu6CT1vn+9f1nQTpgb1z5p1nYyauNf2sbZfAawHXEqXh25ERHSbgOlS260XLFEu2/Ydtr9he4fJalBERLf0fWoiIqLf9UiHt60E4ogYSGX4Wn9E4gTiiBhY6RFHRDRK/b9CR0REP0tqIiKiaT20SnM7CcQRMbASiCMiGqakJiIimlMmhm+6FZ1JII6IgZVRExERDUtqIiKiQUlNREQ0TukRR0Q0qo/GEXdxSaeIiN7R7fmIJU2XdImkn9bna0g6XdJ19e/q421rAnFEDKwuz0f8PuC3Lc8PAM6wPRM4gwksmJFAHBGDq0uRWNJ6wMuBb7UU7wYcUx8fQ1lSblySI46IgdXhxboZkua1PJ9je86wYw4DPgys0lK2tu2FALYXSlprvO1MII6IgdVhCniR7Vmj16FXALfZvljS9t1p2WMlEEfEwOrSoIntgF0l7QKsAKwq6X+BWyWtU3vD6wC3jfcEyRFHxEASIKnt1o7tA22vZ3tDYDbwS9tvBE4G9qqH7QX8eLxtTY84IgbT5I8jPgQ4XtLewE3A68ZbUQJxRAysbsdh22cBZ9XHfwF27Ea9CcQRMbj65M66BOKIGFCZayIionH9MtdEAnFEDKQyaqLpVnQmgTgiBlZSExERDUuPOCKiYX0Shyf3zjpJ/yLpOEm/l3S1pFMk7TM0n+cIx39L0qbjOM8W9fbDiIiik5nXeiRST1ogVrl38CTgLNsb294U+Aiw9mivsf3vtq8ex+m2AEYMxJLS64+YgsqadWq79YLJ7BG/CHjY9hFDBbYvBX4FrCzpBEnXSDq2Bm0knSVpVn18j6SDJV0m6XxJa9fy10m6spafI2k54FPAHpIulbSHpIMkzZF0GvAdSRtK+pWk+XV7bq1r+1rHSbXHfoSkzL8RMSD6pEM8qYF4c+DiUfZtCewHbAo8mTK70XCPA863/UzgHODttfwTwEtr+a62H6pl37e9he3v1+OeBexm+w2UWZFeYnsrYA/gKy3n2Qb4IPCvwMbAq4c3pKZT5kmad/ui2zt68xHRA/okEjfV+7vQ9gLbjwKXAhuOcMxDwFAu+eKWY84Fjpb0dmD6GOc42fb99fGywDclXQH8gPIF0NqWP9h+BJgLPG94Rbbn2J5le9aaM9bs5P1FRA9QB//rBZOZP70KeO0o+x5sefzIKO142LaHH2P7nZKeTVm25FJJW4xyjntbHr8fuBV4JuXL54GWfeaxhj+PiD7VIyngtiazR/xLYPnacwVA0tbACydSqaSNbV9g+xPAImB94G4eu4TJcI8HFtYe+Jt4bE96G0kb1dzwHsCvJ9K+iOgdfZKZmLxAXHuzrwJeUoevXQUcBNwywao/L+kKSVdScseXAWcCmw5drBvhNV8H9pJ0PrAJj+0tn0eZV/RK4AbKSI+I6HPdmhh+aZjUoV22bwFeP8Kub7Yc856Wx9u3PF655fEJwAn18T9dTAPuALYeox3XAc9oKTqw5fF9tkcK3hHRzyZ/YviuyRjbiBhYfRKHp/aadbbPsv2KptsREZOkC0liSetLOlPSbyVdJel9tXwNSadLuq7+XX28zZzSgTgiBlkng9c66jMvBj5o++nAtsC761QMBwBn2J4JnFGfj0sCcUQMLKn91o7thbbn18d3A78F1gV2A46phx0D7D7ediZHHBEDaQkmhp8haV7L8zm254xYp7Qh5c7gC4C1bS+EEqwlrTXetiYQR8TA6jD1sMj2rLZ1SSsDPwT2s/23bg59S2oiIgZWN1ITpR4tSwnCx9o+sRbfKmmdun8dypw245JAHBEDqxt31tXZIY8Efmv7iy27Tgb2qo/3An483nYmNRERg6l7N3RsR5ka4QpJl9ayj1DuyD1e0t7ATcDrxnuCBOKIGEhDtzhPlO1fM3rneccJn4AE4ogYYP1yZ10CcUQMrMw1ERHRsF6Z+L2dBOKIGFz9EYcTiCNicPVJHE4gjojBJMG0PkkSJxBHxODqjzicQBwRg6tP4nACcUQMrj7JTCQQR8Sg6nji98YlEEfEQFqC+Ygbl0AcEQMrgTgiomFJTURENKl702BOugTiiBhInU783gsSiCNicPVJJE4gjoiBlVucIyIa1h9hOIuHRsQg68bqoYCknSVdK+l6SQd0u5kJxBExsNTB/9rWIU0Hvga8DNgU2FPSpt1sZwJxRAykoTvr2m0d2Aa43vYfbD8EHAfs1s22Jke8hObPv3jRisvqj023Y5gZwKKmG9HD8vm012uf0ZMmWsH8+RefuuKymtHBoStImtfyfI7tOS3P1wVubnm+AHj2RNvXKoF4Cdles+k2DCdpnu1ZTbejV+XzaW8QPyPbO3epqpH6ze5S3UBSExER7SwA1m95vh5wSzdPkEAcETG2i4CZkjaStBwwGzi5mydIamIwzGl/yJSWz6e9fEajsL1Y0nuAU4HpwFG2r+rmOWR3NdURERFLKKmJiIiGJRBHRDQsgTgiomEJxANIKvcLDf2NiN6WQDxgJMn/uAK7di2bNrSvsYb1oNE+j3xOSy6f2cRk1MSAkvRu4CXAVcBNwJG2Fzfbqt7R+oUl6cXAasC1wO9t3zfsCy2GkbQV5dbf3wILbD+Qz2z8Mo54AEl6DbAH8Frgh8DFCcKP1RKEPwS8HrgBuB/4k6TP2f5rk+3rZZK2p8xGdiNwJ/B7SYfavrvBZvW1pCYGgKTtJW3WUrQ6cAiwMyW4/L963MYNNK9nSVodeAHwItt7AN8AlqX8ksjP7RFI2gI4EHid7ZcD3wKWB15V9+czG4cE4sGwJnBPDSwAf6L0WP7d9k62H5b0XuAtkqbsr6BRgsSGwEvr4wuAe6gza+Vn9mNJWhbYAtieMjUkwG+AW4FtIZ/ZeE3Zf5SDQNKWALZ/IOlJwO8kvQL4NXAKJThvDTwN2At481RNUQzLCW9OCbg3A58CdpB0l+1fSloAbFvnFHh4qgeWoc9N0jTbDwNHS1oNeJOkhbZPlXQ5sJOkxwN/m+qf2XgkEPe31wDbSPqw7UslfQY4EngDcDhlRYHPAHcAb+n2/fH9ZFhOeNdafAFlFq0LgK9L+hWwA/DKOgH4lNYShF8J7FpXqjjE9mGSHgS+LemHlF8VX09effwyaqIPDevdHQQ8C/iE7UvqaIn3AP9me76kFYBHp2pgGaEnfBSwHbAx5XPbmpIbFmW43/W2bx6luimn/sI6iPKL6mDK57WT7askfYByHeIE23Nqr/nR5lrbv5Ij7jPDhwjZPgg4FzhY0pa2vwZ8GThN0izbD0zhILxSSxBekXLh0rYftn0N5XN7IjDT9tW2z0wQ/gdJKwFPAfYGZgIrA8cA50razPYXgWOBN0vaPkF4/JKa6CPDendvoIx9vcX2IfU61Kckfdz2EZIeogwtmpLqL4F3SLqIkiN/HvA2YGgV3kNt3yjpJkrv+J++5KYySTtRersHAytQLv6+zfbvJL0cOFPSBsDPgEeA3zfW2AGQQNxHWoLwB4CXU3ojH5e0Yg3GHwe+Iuk9to9qsq1NqzcY/Ab4JWUUyXNtPyrpaMrCj6dI+hmwOyXgTPkr/i054U0o6a0DbP9F0qqUGzdmSFqLEnx/bPsB4AFJx071z26ikproA5LWqlfxqf8oNrO9I2XBxz8DJ9b83KeBnzC1e8JD82xMA+YBx1N6dFvXQ84GPk75nKZRLsxd30BTe8bQkMYahJ8EvIOyHNDQxTfXx2+lrGB8ju0Lhj7rBOGJy8W6Hlb/Q18b+D7wP8BJlOBxbP0rysD6hyS9FbjI9pVNtbdpw1I3WwJ/sP1XSc+mBJCP2J5bf3ZfZHvKfmENqV/wzwdup3xhzaTk0l8LXAx8z/ZCSasAKwGr2b62qfYOqvSIe5ztPwOHAm8CdrF9P/BjSl7zSzUIv4Vy99zfGmtow4YF4f+gBN6fSdqH0jN+C+WC5uGUIX6rj1bXFLNs3b5C+aK/2PaJwA8oC2a+TtK6tu+2fWuC8ORIjriHtfzke5TSG/mOpLdTrvavDHyjjn19NqVnfFMzLW1eSxDejXJh7hmUW5VfAaxg+yt1KNazgcNs/6GxxvaI+uV1r6Qbgc2B8ynpLmyfJGkxsAuwh6SvTtXRN0tDUhM9TtJsYD/KvfyzgVcDX7D9I0mbUn7V3Gn7T821sjfUC0nfANa1vU0t24Vyce73lBno/tJgE3tGy4W5F1Nm53sEmEX58jrF9omS1qCkKM5OT3hyJTXRYyStPaxoXeBc2wttfwk4AjiypiOus33lVA3CrXNHSFrG9m3AfwF3SToYwPYpwM8pn2PGuVY1CO9KSUk8xfbvKRcy5wO7SDoE+C5weoLw5EuPuIdIehpwNXAYcE29W2lXYEfKuNeb63E/AR6izB1xb1Pt7RWS3kG58eB24ARgLcrwqz/Y/kQ95nH5rP5B0gzKtYa31rHBz6CkJa6l3HH4JuC7tk9usJlTRnLEveVe4DzKbFavkfQsyjwImwGvlvRnyoWVe4D9p2pgab2Vto4WeSPl7q+LKFf8j6DcXfhxSR+z/Rngvqba26OWpXxWL603uKxMmWfjQ7aPlvQz24/kJpelI4G4h9i+WdKFwFaUGzZmU6Yc3LBuz6OM6fzPqXphTtLzgE0kXW57HuUi039QpmG8ADjCZdrPK4BPArdBxrq25ISfBvylDkn7KiX4Hm/755JeT5mJ7nvAYsjntrQkEPeIlp7H/sB3KD8TF1AunpxICcQLgP+2fUtT7WySpJ2B/wa+BKxai/9ISeU8YnunetxHKWmJuU20s9cM/YKoF+aOAX5df119wfaP6jE7AJ8APpjREUtfAnGPqL0VUW7SuB74IqVn/L46QuKpwG1T9SYESS8EvkqZVe6Cll2rUsZPf6NOUrML8Dpgz6Xfyt4y9OVeg/BzKKuR7E5JSewKHCTpC5Qv+I9Sbmk+tbEGT2G5WNeDatD9FXB4vW15ypO0H+X76sstZf9FmXt5MeUK/3Moy/a81/YVTbSzV9QJebYH5lKGpl0MrG57w7p/M+CVwDOBDwH32r4rOeFmZPhaD6rDhfYHptde3pTVMkRtY8qSUEPlLwM2oIxzXUxZbWNP4DVTPQhXywBXUILvo5Q5mO+X9C0Al0UCfkaZzOcJtu+q5QnCDUgg7l3nUYYRTWktgeFHwLNVlnEH+AVlWsZ5lN7wg7bvtH1HA83sKbVX+wdKkD1O0qdt30e5YeN5kr4BUL+wvmj78gabGyQQ9yyXicv3qP+Aotx+ey4wW9I2LpO7PyRpT0pe+Lxmm9cb6o0trsH4AeC9wLPqML57KdcddpH0bQDb9zTZ3iiSI46+IWldynjhHYBL+McsYbvbvrrJtjWt3t69qF6Y24lyUe7nlOk+n0yZve9M2/8t6XHALNtnN9bgeIwE4ugrKksebUUZ1vcn4Czb1zXbqmbVOao/TRlxM5cynO8Myuf0C2AOZTjkd4DTbH+qvi4X5npEAnFEH1NZTWN/yqRGm1Omrvys7Z9K2p5yU9D1wDcpwfgJti9sprUxmgTiiD5VZ987npJ2+BEl0B5C+Xe9cz3m+cDbKRfuPmf7kWZaG2NJII7oQzUdcRJlBY0jW8r/FXgfZd6S/eqFuxdSbmuesqu39LqMmojoT/dTcuQnAEhaFv4+JO0LwL8Ah9eysxOEe1sCcUR/ehywJWUiKOpER9Prvr8AlwGr1vRF9LgE4og+VO+EO5wyXeoWw3bPokydesBUH9bXLxKII/rXScBC4J119rRHJW1HSU18d6rO0tePcrEuoo/VpbVeT5mTeT5lTo5Dhqa3jP6QQBwxAGpAfhRY3vaC3KzRXxKIIyIalhxxRETDEogjIhqWQBwR0bAE4oiIhiUQR0Q0LIE4xkXSI5IulXSlpB9MZG09SUdLem19/K2xbsuVtL2k547jHDdKmjFC+dskXSHp8vpedlvSutucd8z3EwFlgcGI8bjf9hYAko4F3gl8cWinpOnjmXLR9r+3OWR74B7gN0ta93CS1qMsI7+V7b9KWpmWBUq7oYP3E5EecXTFr4Cn1N7qmZK+B1whabqkz0u6qPY43wFlZQhJX5V0taSfAWsNVSTpLEmz6uOdJc2XdJmkMyRtSAn476+98edLWlPSD+s5Lqq3+CLpCZJOk3RJXSxT/LO1gLspgR3b99i+oaUdh0n6Te0pb1PLHyfpqHquS4Z60PW9HtrSu953hPezk6Tz6nv6QQ38SDqkfhaXSzq0u//XRD9IjzgmRNIywMso66MBbANsbvsGSfsAf7W9taTlgXMlnUaZNeypwL8CawNXA0cNq3dNyqoSL6h1rWH7DklHAPfYPrQe9z3gS7Z/LWkD4FTg6cAngV/b/pSklwP7jND8y4BbgRsknQGcaPsnLfsfZ/u5kl5Q27c5pQf9S9tvk7QacKGkXwBvBjYCtrS9WNIaw97PDOBjwItt3ytpf+ADkr4KvAp4Wp07eLUOP/oYIAnEMV4rSrq0Pv4VcCTwXODCoV4lsBPwjKH8L/B4YCbwAmBuTV3cIumXI9S/LXDOUF227xilHS8GNpX+3uFdVdIq9Ryvrq/9maQ7h7/Q9iOSdga2BnYEviTpWbYPqofMrcedI2nVGiR3AnaV9KF6zArABrUdR9hePEp7twU2pXwZASxHWXn6b8ADwLfqr4OfjvI+Y4AlEMd4/T1HPKQGmHtbi4B9bZ867LhdgHb31quDY6Ck155j+/4R2tL29XU+hgspPdvTgW8DBw3tHn54bddrbF877Hzt2ivgdNt7/tOOkvbYkbK+3Hsoq1THFJIccUymU4F3qa4eIWkTlaXczwFm17zqOsCLRnjtecALJW1UXzv0U/9uYJWW406jBC/qcVvUh+cA/1bLXgasPvwEkp4oaauWoi2AP7Y836Me9zxKiuWv9T3tWwMvkrZsacc7a6qmtb1Dzge2k/SUun+l+nmsDDze9inAfrUNMcWkRxyT6VvAhsD8GrhuB3anzKO7A3AF8Dvg7OEvtH17zTGfKGkacBvwEuAnwAn1Itm+wHuBr0m6nPLf8zmUC3r/CcyVNL/Wf9MI7VsWOFTSEynpgdvra4fcKek3wKrA22rZpynL1V9e39ONwCvqe92klj9MyW9/ddj7eUtt0/K1+GOUL5YfS1qB0mt+/6ifZgyszL4WMQJJZwEfsj2v6bbE4EtqIiKiYekRR0Q0LD3iiIiGJRBHRDQsgTgiomEJxBERDUsgjoho2P8HcuuOwBgzx8YAAAAASUVORK5CYII=",
751
+ "text/plain": [
752
+ "<Figure size 432x288 with 2 Axes>"
753
+ ]
754
+ },
755
+ "metadata": {
756
+ "needs_background": "light"
757
+ },
758
+ "output_type": "display_data"
759
+ }
760
+ ],
761
+ "source": [
762
+ "#Pytorch doesn't have a built-in confusion matrix metric, so we'll use SciKit-Learn\n",
763
+ "from sklearn.metrics import confusion_matrix\n",
764
+ "import numpy as np\n",
765
+ "\n",
766
+ "# Set the model to evaluate mode\n",
767
+ "model.eval()\n",
768
+ "\n",
769
+ "# Get predictions for the test data\n",
770
+ "x = torch.Tensor(x_test).float()\n",
771
+ "_, predicted = torch.max(model(x).data, 1)\n",
772
+ "\n",
773
+ "# Plot the confusion matrix\n",
774
+ "cm = confusion_matrix(y_test, predicted.numpy())\n",
775
+ "plt.imshow(cm, interpolation=\"nearest\", cmap=plt.cm.Blues)\n",
776
+ "plt.colorbar()\n",
777
+ "tick_marks = np.arange(len(penguin_classes))\n",
778
+ "plt.xticks(tick_marks, penguin_classes, rotation=45)\n",
779
+ "plt.yticks(tick_marks, penguin_classes)\n",
780
+ "plt.xlabel(\"Predicted Species\")\n",
781
+ "plt.ylabel(\"Actual Species\")\n",
782
+ "plt.show()"
783
+ ]
784
+ },
785
+ {
786
+ "cell_type": "code",
787
+ "execution_count": 37,
788
+ "metadata": {},
789
+ "outputs": [
790
+ {
791
+ "name": "stdout",
792
+ "output_type": "stream",
793
+ "text": [
794
+ "model saved as penguin_classifier.pt\n"
795
+ ]
796
+ }
797
+ ],
798
+ "source": [
799
+ "# Save the model weights\n",
800
+ "# model_file = '/User/johnnydevriese/projects/models/penguin_classifier.pt'\n",
801
+ "model_file = 'penguin_classifier.pt'\n",
802
+ "torch.save(model.state_dict(), f=model_file)\n",
803
+ "del model\n",
804
+ "print('model saved as', model_file)"
805
+ ]
806
+ },
807
+ {
808
+ "cell_type": "code",
809
+ "execution_count": 38,
810
+ "metadata": {},
811
+ "outputs": [
812
+ {
813
+ "name": "stdout",
814
+ "output_type": "stream",
815
+ "text": [
816
+ "New sample: [[50.4, 15.3, 20, 50]]\n",
817
+ "Prediction: Gentoo\n"
818
+ ]
819
+ }
820
+ ],
821
+ "source": [
822
+ "# New penguin features\n",
823
+ "x_new = [[50.4,15.3,20,50]]\n",
824
+ "print ('New sample: {}'.format(x_new))\n",
825
+ "\n",
826
+ "# Create a new model class and load weights\n",
827
+ "model = PenguinNet()\n",
828
+ "model.load_state_dict(torch.load(model_file))\n",
829
+ "\n",
830
+ "# Set model to evaluation mode\n",
831
+ "model.eval()\n",
832
+ "\n",
833
+ "# Get a prediction for the new data sample\n",
834
+ "x = torch.Tensor(x_new).float()\n",
835
+ "_, predicted = torch.max(model(x).data, 1)\n",
836
+ "\n",
837
+ "print('Prediction:',penguin_classes[predicted.item()])"
838
+ ]
839
+ }
840
+ ],
841
+ "metadata": {
842
+ "interpreter": {
843
+ "hash": "16c7f1dc46b458d69b8d4b83cad879badbf4dbe0bbfb50262ef6f7a4b6b16937"
844
+ },
845
+ "kernelspec": {
846
+ "display_name": "Python 3.9.7 64-bit ('base': conda)",
847
+ "language": "python",
848
+ "name": "python3"
849
+ },
850
+ "language_info": {
851
+ "codemirror_mode": {
852
+ "name": "ipython",
853
+ "version": 3
854
+ },
855
+ "file_extension": ".py",
856
+ "mimetype": "text/x-python",
857
+ "name": "python",
858
+ "nbconvert_exporter": "python",
859
+ "pygments_lexer": "ipython3",
860
+ "version": "3.8.12"
861
+ },
862
+ "orig_nbformat": 4
863
+ },
864
+ "nbformat": 4,
865
+ "nbformat_minor": 2
866
+ }
code/preprocessor_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_resize": true,
4
+ "feature_extractor_type": "ViTFeatureExtractor",
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_std": [
11
+ 0.5,
12
+ 0.5,
13
+ 0.5
14
+ ],
15
+ "resample": 2,
16
+ "size": 224
17
+ }
code/ps2_submission.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import util
3
+
4
+ # Character to replace with sub-problem letter in plot_path/save_path
5
+ WILDCARD = 'X'
6
+
7
+ def main_LogReg(train_path, valid_path, save_path):
8
+ """Problem (1b): Logistic regression with Newton's Method.
9
+
10
+ Args:
11
+ train_path: Path to CSV file containing dataset for training.
12
+ valid_path: Path to CSV file containing dataset for validation.
13
+ save_path: Path to save predicted probabilities using np.savetxt().
14
+ """
15
+ # Load dataset
16
+ x_train, y_train = util.load_dataset(train_path, add_intercept=True)
17
+
18
+ # Train a logistic regression classifier
19
+ clf = LogisticRegression()
20
+ clf.fit(x_train, y_train)
21
+
22
+ # Plot decision boundary on top of validation set set
23
+ x_eval, y_eval = util.load_dataset(valid_path, add_intercept=True)
24
+ plot_path = save_path.replace('.txt', '.png')
25
+ util.plot(x_eval, y_eval, clf.theta, plot_path)
26
+
27
+ # Use np.savetxt to save predictions on eval set to save_path
28
+ p_eval = clf.predict(x_eval)
29
+ yhat = p_eval > 0.5
30
+ print('LR Accuracy: %.2f' % np.mean( (yhat == 1) == (y_eval == 1)))
31
+ np.savetxt(save_path, p_eval)
32
+
33
+ class LogisticRegression:
34
+ """Logistic regression with Newton's Method as the solver.
35
+
36
+ Example usage:
37
+ > clf = LogisticRegression()
38
+ > clf.fit(x_train, y_train)
39
+ > clf.predict(x_eval)
40
+ """
41
+ def __init__(self, step_size=0.01, max_iter=1000000, eps=1e-5,
42
+ theta_0=None, verbose=True):
43
+ """
44
+ Args:
45
+ step_size: Step size for iterative solvers only.
46
+ max_iter: Maximum number of iterations for the solver.
47
+ eps: Threshold for determining convergence.
48
+ theta_0: Initial guess for theta. If None, use the zero vector.
49
+ verbose: Print loss values during training.
50
+ """
51
+ self.theta = theta_0
52
+ self.step_size = step_size
53
+ self.max_iter = max_iter
54
+ self.eps = eps
55
+ self.verbose = verbose
56
+
57
+ def gradient(self,x, y):
58
+ n_examples, dim = x.shape
59
+ logits = self.sigmoid(x)
60
+ # grad of logit function
61
+ gradient = 1 / n_examples * x.T @ (logits - y)
62
+ return gradient
63
+
64
+ def hessian(self, x, y):
65
+ n_examples, dim = x.shape
66
+ # sigmoid = lambda x: 1 / 1 + np.exp(- x @ self.theta)
67
+ logits = self.sigmoid(x)
68
+
69
+ # probs = self._sigmoid(x.dot(self.theta))
70
+ # diag = np.diag(logits * (1. - logits))
71
+ # hess = 1 / n_examples * x.T.dot(diag).dot(x)
72
+ # return hess
73
+
74
+ # main diag is just second derivative wrt to itself. e.g. f_xx and f_yy
75
+ main_diagonal = np.diag(logits * (1 - logits))
76
+ hessian = 1 / n_examples * x.T @ main_diagonal @ x
77
+ return hessian
78
+
79
+ def loss(self, x, y):
80
+ # https://developers.google.com/machine-learning/crash-course/logistic-regression/model-training
81
+ # also in p.16 in Supervised Learning notes
82
+ n_examples, dim = x.shape
83
+ # sigmoid = lambda x: 1 / 1 + np.exp(- x @ self.theta)
84
+ logits = self.sigmoid(x)
85
+
86
+ loss = -np.mean(y * np.log(logits) + (1 + y) * np.log(1 - logits))
87
+ return loss
88
+
89
+
90
+ def sigmoid(self, x):
91
+ # return 1 / (1 + np.exp(-x.dot(self.theta)))
92
+ return 1 / (1 + np.exp(- x @ self.theta))
93
+
94
+
95
+ def fit(self, x, y):
96
+ """Run Newton's Method to minimize J(theta) for logistic regression.
97
+
98
+ Args:
99
+ x: Training example inputs. Shape (n_examples, dim).
100
+ y: Training example labels. Shape (n_examples,).
101
+ """
102
+ # *** START CODE HERE ***
103
+ # NOTE: look at p.18 in notes
104
+ # we need to calculate theta with Newton and then maximize the
105
+ # logistic regression log likelihood function l(theta)
106
+ # prev_theta = theta # store for comparison
107
+
108
+ # m = rows = number of examples
109
+ # n = columns = number of features
110
+
111
+ # breakpoint()
112
+ # NOTE: it looks like they prepend the '1' at the beginning of the x array!
113
+ n_examples, dim = x.shape
114
+ if self.theta is None:
115
+ self.theta = np.zeros(dim)
116
+
117
+ # just need to init for first time.
118
+ # theta_prev = np.ones(dim)
119
+ # # print(np.sum(np.abs(theta_prev - self.theta)) < self.eps)
120
+
121
+ # current_iteration = 0
122
+ # theta_difference = np.sum(np.abs(theta_prev - self.theta))
123
+ # while theta_difference > self.eps and current_iteration < self.max_iter:
124
+ for i in range(self.max_iter):
125
+ # current_iteration += 1
126
+
127
+ gradient = self.gradient(x, y)
128
+ hessian = self.hessian(x, y)
129
+
130
+ # theta_prev = self.step(gradient, hessian)
131
+ theta_prev = np.copy(self.theta)
132
+ # theta_prev = self.step()
133
+ # theta_prev = self.theta
134
+ # self.theta = self.theta - self.step_size * np.linalg.inv(hessian) @ gradient
135
+ self.theta -= self.step_size * np.linalg.inv(hessian).dot(gradient)
136
+
137
+ if np.sum(np.abs(theta_prev - self.theta)) < self.eps:
138
+ break
139
+
140
+ # *** END CODE HERE ***
141
+
142
+ def predict(self, x):
143
+ """Return predicted probabilities given new inputs x.
144
+
145
+ Args:
146
+ x: Inputs of shape (n_examples, dim).
147
+
148
+ Returns:
149
+ Outputs of shape (n_examples,).
150
+ """
151
+ # *** START CODE HERE ***
152
+ # breakpoint()
153
+ # sigmoid = lambda x: 1 / 1 + np.exp(- x @ self.theta)
154
+ prediction = self.sigmoid(x)
155
+ return prediction
156
+ # *** END CODE HERE ***
157
+
158
+ def main_GDA(train_path, valid_path, save_path):
159
+ """Problem (1e): Gaussian discriminant analysis (GDA)
160
+
161
+ Args:
162
+ train_path: Path to CSV file containing dataset for training.
163
+ valid_path: Path to CSV file containing dataset for validation.
164
+ save_path: Path to save predicted probabilities using np.savetxt().
165
+ """
166
+ # Load dataset
167
+ x_train, y_train = util.load_dataset(train_path, add_intercept=False)
168
+
169
+ # Train a GDA classifier
170
+ clf = GDA()
171
+ clf.fit(x_train, y_train)
172
+
173
+ # Plot decision boundary on validation set
174
+ x_eval, y_eval = util.load_dataset(valid_path, add_intercept=False)
175
+ plot_path = save_path.replace('.txt', '.png')
176
+ util.plot(x_eval, y_eval, clf.theta, plot_path)
177
+ x_eval = util.add_intercept(x_eval)
178
+
179
+ # Use np.savetxt to save outputs from validation set to save_path
180
+ p_eval = clf.predict(x_eval)
181
+ yhat = p_eval > 0.5
182
+ print('GDA Accuracy: %.2f' % np.mean( (yhat == 1) == (y_eval == 1)))
183
+ np.savetxt(save_path, p_eval)
184
+
185
+ class GDA:
186
+ """Gaussian Discriminant Analysis.
187
+
188
+ Example usage:
189
+ > clf = GDA()
190
+ > clf.fit(x_train, y_train)
191
+ > clf.predict(x_eval)
192
+ """
193
+ def __init__(self, step_size=0.01, max_iter=10000, eps=1e-5,
194
+ theta_0=None, verbose=True):
195
+ """
196
+ Args:
197
+ step_size: Step size for iterative solvers only.
198
+ max_iter: Maximum number of iterations for the solver.
199
+ eps: Threshold for determining convergence.
200
+ theta_0: Initial guess for theta. If None, use the zero vector.
201
+ verbose: Print loss values during training.
202
+ """
203
+ self.theta = theta_0
204
+ self.step_size = step_size
205
+ self.max_iter = max_iter
206
+ self.eps = eps
207
+ self.verbose = verbose
208
+
209
+ def sigmoid(self, x):
210
+ # return 1 / (1 + np.exp(-x.dot(self.theta)))
211
+ return 1 / (1 + np.exp(- x @ self.theta))
212
+
213
+
214
+ def fit(self, x, y):
215
+ """Fit a GDA model to training set given by x and y by updating
216
+ self.theta.
217
+
218
+ Args:
219
+ x: Training example inputs. Shape (n_examples, dim).
220
+ y: Training example labels. Shape (n_examples,).
221
+ """
222
+ # *** START CODE HERE ***
223
+ n_examples, dim = x.shape
224
+
225
+ # Find phi, mu_0, mu_1, and sigma
226
+ phi = 1 / n_examples * np.sum(y == 1)
227
+ mu_0 = (y == 0).dot(x) / np.sum(y == 0)
228
+ mu_1 = (y == 1).dot(x) / np.sum(y == 1)
229
+ mu_yi = np.where(np.expand_dims(y == 0, -1),
230
+ np.expand_dims(mu_0, 0),
231
+ np.expand_dims(mu_1, 0))
232
+ sigma = 1 / n_examples * (x - mu_yi).T.dot(x - mu_yi)
233
+
234
+ # Write theta in terms of the parameters
235
+ self.theta = np.zeros(dim + 1)
236
+ sigma_inv = np.linalg.inv(sigma)
237
+ mu_diff = mu_0.T.dot(sigma_inv).dot(mu_0) - mu_1.T.dot(sigma_inv).dot(mu_1)
238
+ self.theta[0] = 1 / 2 * mu_diff - np.log((1 - phi) / phi)
239
+ self.theta[1:] = -sigma_inv.dot(mu_0 - mu_1)
240
+
241
+ # *** END CODE HERE ***
242
+
243
+ def predict(self, x):
244
+ """Make a prediction given new inputs x.
245
+
246
+ Args:
247
+ x: Inputs of shape (n_examples, dim).
248
+
249
+ Returns:
250
+ Outputs of shape (n_examples,).
251
+ """
252
+ # *** START CODE HERE ***
253
+ prediction = self.sigmoid(x)
254
+ return prediction
255
+ # *** END CODE HERE
256
+
257
+ def main_posonly(train_path, valid_path, test_path, save_path):
258
+ """Problem 2: Logistic regression for incomplete, positive-only labels.
259
+
260
+ Run under the following conditions:
261
+ 1. on t-labels,
262
+ 2. on y-labels,
263
+ 3. on y-labels with correction factor alpha.
264
+
265
+ Args:
266
+ train_path: Path to CSV file containing training set.
267
+ valid_path: Path to CSV file containing validation set.
268
+ test_path: Path to CSV file containing test set.
269
+ save_path: Path to save predictions.
270
+
271
+ NOTE: You need to complete logreg implementation first (see class above)!!!
272
+ """
273
+ output_path_true = save_path.replace(WILDCARD, 'true')
274
+ output_path_naive = save_path.replace(WILDCARD, 'naive')
275
+ output_path_adjusted = save_path.replace(WILDCARD, 'adjusted')
276
+
277
+ plot_path = save_path.replace('.txt', '.png')
278
+ plot_path_true = plot_path.replace(WILDCARD, 'true')
279
+ plot_path_naive = plot_path.replace(WILDCARD, 'naive')
280
+ plot_path_adjusted = plot_path.replace(WILDCARD, 'adjusted')
281
+
282
+ # Problem (2a): Train and test on true labels (t)
283
+ full_predictions = fully_observed_predictions(train_path, test_path, output_path_true, plot_path_true)
284
+
285
+ # Problem (2b): Train on y-labels and test on true labels
286
+ naive_predictions, clf = naive_partial_labels_predictions(train_path, test_path, output_path_naive, plot_path_naive)
287
+
288
+ # Problem (2f): Apply correction factor using validation set and test on true labels
289
+ alpha = find_alpha_and_plot_correction(clf, valid_path, test_path, output_path_adjusted, plot_path_adjusted, naive_predictions)
290
+
291
+ return
292
+
293
+ def fully_observed_predictions(train_path, test_path, output_path_true, plot_path_true):
294
+ """
295
+ Problem (2a): Fully Observable Binary Classification Helper Function
296
+
297
+ Args:
298
+ train_path: Path to CSV file containing dataset for training.
299
+ test_path: Path to CSV file containing dataset for testing.
300
+ output_path_true: Path to save observed predictions
301
+ plot_path_true: Path to save the plot using plot_posonly util function
302
+ Return:
303
+ full_predictions: tensor of predictions returned from applied LogReg classifier prediction
304
+ """
305
+ full_predictions = None
306
+ # Problem (2a): Train and test on true labels (t)
307
+ # Make sure to save predicted probabilities to output_path_true using np.savetxt()
308
+ # *** START CODE HERE ***
309
+ x_train, t_train = util.load_dataset(train_path, label_col='t',
310
+ add_intercept=True)
311
+ clf = LogisticRegression()
312
+ clf.fit(x_train, t_train)
313
+
314
+ x_test, t_test = util.load_dataset(test_path, label_col='t',
315
+ add_intercept=True)
316
+
317
+ full_predictions = clf.predict(x_test)
318
+ np.savetxt(output_path_true, full_predictions)
319
+ util.plot(x_test, t_test, clf.theta, plot_path_true)
320
+ # *** END CODE HERE ***
321
+ return full_predictions
322
+
323
+ def naive_partial_labels_predictions(train_path, test_path, output_path_naive, plot_path_naive):
324
+ """
325
+ Problem (2b): Naive Partial Labels Binary Classification Helper Function
326
+
327
+ Args:
328
+ train_path: Path to CSV file containing dataset for training.
329
+ test_path: Path to CSV file containing dataset for testing.
330
+ output_path_naive: Path to save observed predictions
331
+ plot_path_naive: Path to save the plot using plot_posonly util function
332
+ Return:
333
+ naive_predictions: tensor of predictions returned from applied LogReg prediction
334
+ clf: Logistic Regression classifier (will be reused for 2f)
335
+ """
336
+ naive_predictions = None
337
+ clf = None
338
+ # Problem (2b): Train on y-labels and test on true labels
339
+ # Make sure to save predicted probabilities to output_path_naive using np.savetxt()
340
+ # *** START CODE HERE ***
341
+ x_train, y_train = util.load_dataset(train_path, label_col='y',
342
+ add_intercept=True)
343
+ clf = LogisticRegression()
344
+ clf.fit(x_train, y_train)
345
+ x_test, t_test = util.load_dataset(test_path, label_col='t',
346
+ add_intercept=True)
347
+ naive_predictions = clf.predict(x_test)
348
+ np.savetxt(output_path_naive, naive_predictions)
349
+ util.plot(x_test, t_test, clf.theta, plot_path_naive)
350
+ # *** END CODE HERE ***
351
+ return naive_predictions, clf
352
+
353
+ def find_alpha_and_plot_correction(clf, valid_path, test_path, output_path_adjusted, plot_path_adjusted, naive_predictions):
354
+ """
355
+ Problem (2f): Alpha Correction Binary Classification Helper Function
356
+
357
+ Args:
358
+ clf: Logistic regression classifier from part 2b
359
+ valid_path: Path to CSV file containing dataset for validation.
360
+ test_path: Path to CSV file containing dataset for testing.
361
+ output_path_adjusted: Path to save observed predictions
362
+ plot_path_adjusted: Path to save the plot using plot_posonly util function
363
+ naive_predictions: tensor of predictions returned from applied LogReg prediction from 2b
364
+ Return:
365
+ alpha: corrected alpha value
366
+ """
367
+ alpha = None
368
+ # Problem (2f): Apply correction factor using validation set and test on true labels
369
+ # Plot and use np.savetxt to save outputs to output_path_adjusted
370
+ # *** START CODE HERE ***
371
+ x_valid, y_valid = util.load_dataset(valid_path, label_col='y')
372
+ x_valid = x_valid[y_valid == 1, :] # Restrict to just the labeled examples
373
+ x_valid = util.add_intercept(x_valid)
374
+ y_pred = clf.predict(x_valid)
375
+ alpha = np.mean(y_pred)
376
+ print('Found alpha = {}'.format(alpha))
377
+ x_test, t_test = util.load_dataset(test_path, label_col='t',
378
+ add_intercept=True)
379
+
380
+ # Plot and use np.savetxt to save outputs to output_path_adjusted
381
+ np.savetxt(output_path_adjusted, naive_predictions / alpha)
382
+ util.plot(x_test, t_test, clf.theta, plot_path_adjusted, correction=alpha)
383
+ # *** END CODE HERE ***
384
+ return alpha
385
+
386
+ if __name__ == '__main__':
387
+ '''
388
+ Start of Problem 1: Linear Classifiers
389
+ '''
390
+ # 1b
391
+ main_LogReg(train_path='ds1_train.csv',
392
+ valid_path='ds1_valid.csv',
393
+ save_path='logreg_pred_1.txt')
394
+ main_LogReg(train_path='ds2_train.csv',
395
+ valid_path='ds2_valid.csv',
396
+ save_path='logreg_pred_2.txt')
397
+ # 1e
398
+ main_GDA(train_path='ds1_train.csv',
399
+ valid_path='ds1_valid.csv',
400
+ save_path='gda_pred_1.txt')
401
+ main_GDA(train_path='ds2_train.csv',
402
+ valid_path='ds2_valid.csv',
403
+ save_path='gda_pred_2.txt')
404
+
405
+ '''
406
+ Start of Problem 2: Incomplete, Positive-Only Labels
407
+ '''
408
+ main_posonly(train_path='train.csv',
409
+ valid_path='valid.csv',
410
+ test_path='test.csv',
411
+ save_path='posonly_X_pred.txt')
code/pytorch_m1_testing.ipynb ADDED
@@ -0,0 +1,659 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "something\n"
13
+ ]
14
+ }
15
+ ],
16
+ "source": [
17
+ "print(\"something\")"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 2,
23
+ "metadata": {},
24
+ "outputs": [
25
+ {
26
+ "name": "stdout",
27
+ "output_type": "stream",
28
+ "text": [
29
+ "tensor([[0.8833, 0.1793, 0.9218],\n",
30
+ " [0.8408, 0.2123, 0.5323],\n",
31
+ " [0.5581, 0.2310, 0.7946],\n",
32
+ " [0.8700, 0.1769, 0.7497],\n",
33
+ " [0.1971, 0.3898, 0.8916]])\n"
34
+ ]
35
+ }
36
+ ],
37
+ "source": [
38
+ "import torch\n",
39
+ "x = torch.rand(5, 3)\n",
40
+ "print(x)"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 3,
46
+ "metadata": {},
47
+ "outputs": [
48
+ {
49
+ "ename": "ImportError",
50
+ "evalue": "cannot import name 'batched_dot_mul_sum' from '__main__' (unknown location)",
51
+ "output_type": "error",
52
+ "traceback": [
53
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
54
+ "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
55
+ "\u001b[0;32m/var/folders/4k/y4ljh2217c57vl68z1zkl0440000gn/T/ipykernel_49379/927675702.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 11\u001b[0m globals={'x': x})\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt0\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
56
+ "\u001b[0;32m~/miniforge3/envs/pytorch_m1/lib/python3.8/site-packages/torch/utils/benchmark/utils/timer.py\u001b[0m in \u001b[0;36mtimeit\u001b[0;34m(self, number)\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mcommon\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_torch_threads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_task_spec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_threads\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;31m# Warmup\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 261\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnumber\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnumber\u001b[0m \u001b[0;34m//\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 262\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 263\u001b[0m return common.Measurement(\n",
57
+ "\u001b[0;32m~/miniforge3/envs/pytorch_m1/lib/python3.8/timeit.py\u001b[0m in \u001b[0;36mtimeit\u001b[0;34m(self, number)\u001b[0m\n\u001b[1;32m 175\u001b[0m \u001b[0mgc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 177\u001b[0;31m \u001b[0mtiming\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minner\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 178\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgcold\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
58
+ "\u001b[0;32m<timeit-src>\u001b[0m in \u001b[0;36minner\u001b[0;34m(_it, _timer)\u001b[0m\n",
59
+ "\u001b[0;31mImportError\u001b[0m: cannot import name 'batched_dot_mul_sum' from '__main__' (unknown location)"
60
+ ]
61
+ }
62
+ ],
63
+ "source": [
64
+ "import torch.utils.benchmark as benchmark\n",
65
+ "\n",
66
+ "t0 = benchmark.Timer(\n",
67
+ " stmt='batched_dot_mul_sum(x, x)',\n",
68
+ " setup='from __main__ import batched_dot_mul_sum',\n",
69
+ " globals={'x': x})\n",
70
+ "\n",
71
+ "t1 = benchmark.Timer(\n",
72
+ " stmt='batched_dot_bmm(x, x)',\n",
73
+ " setup='from __main__ import batched_dot_bmm',\n",
74
+ " globals={'x': x})\n",
75
+ "\n",
76
+ "print(t0.timeit(100))\n",
77
+ "print(t1.timeit(100))"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": null,
83
+ "metadata": {},
84
+ "outputs": [
85
+ {
86
+ "name": "stdout",
87
+ "output_type": "stream",
88
+ "text": [
89
+ "# packages in environment at /Users/johnnydevriese/miniconda3:\n",
90
+ "#\n",
91
+ "# Name Version Build Channel\n",
92
+ "aom 3.2.0 he49afe7_2 conda-forge\n",
93
+ "appnope 0.1.2 py39h6e9494a_2 conda-forge\n",
94
+ "backcall 0.2.0 pyh9f0ad1d_0 conda-forge\n",
95
+ "backports 1.0 py_2 conda-forge\n",
96
+ "backports.functools_lru_cache 1.6.4 pyhd8ed1ab_0 conda-forge\n",
97
+ "blas 1.0 mkl \n",
98
+ "brotlipy 0.7.0 py39h89e85a6_1003 conda-forge\n",
99
+ "bzip2 1.0.8 h0d85af4_4 conda-forge\n",
100
+ "ca-certificates 2021.10.8 h033912b_0 conda-forge\n",
101
+ "certifi 2021.10.8 py39h6e9494a_1 conda-forge\n",
102
+ "cffi 1.15.0 py39he338e87_0 conda-forge\n",
103
+ "chardet 4.0.0 py39h6e9494a_2 conda-forge\n",
104
+ "charset-normalizer 2.0.0 pyhd8ed1ab_0 conda-forge\n",
105
+ "colorama 0.4.4 pyh9f0ad1d_0 conda-forge\n",
106
+ "conda 4.10.3 py39h6e9494a_3 conda-forge\n",
107
+ "conda-package-handling 1.7.3 py39h89e85a6_1 conda-forge\n",
108
+ "cryptography 35.0.0 py39h209aa08_2 conda-forge\n",
109
+ "debugpy 1.5.1 py39h9fcab8e_0 conda-forge\n",
110
+ "decorator 5.1.0 pyhd8ed1ab_0 conda-forge\n",
111
+ "entrypoints 0.3 pyhd8ed1ab_1003 conda-forge\n",
112
+ "ffmpeg 4.4.1 h79e7b16_0 conda-forge\n",
113
+ "freetype 2.10.4 h4cff582_1 conda-forge\n",
114
+ "gettext 0.19.8.1 hd1a6beb_1008 conda-forge\n",
115
+ "gmp 6.2.1 h2e338ed_0 conda-forge\n",
116
+ "gnutls 3.6.13 h756fd2b_1 conda-forge\n",
117
+ "icu 69.1 he49afe7_0 conda-forge\n",
118
+ "idna 3.1 pyhd3deb0d_0 conda-forge\n",
119
+ "ipykernel 6.5.0 py39h71a6800_1 conda-forge\n",
120
+ "ipython 7.29.0 py39h71a6800_2 conda-forge\n",
121
+ "jbig 2.1 h0d85af4_2003 conda-forge\n",
122
+ "jedi 0.18.1 py39h6e9494a_0 conda-forge\n",
123
+ "jpeg 9d hbcb3906_0 conda-forge\n",
124
+ "jupyter_client 7.0.6 pyhd8ed1ab_0 conda-forge\n",
125
+ "jupyter_core 4.9.1 py39h6e9494a_1 conda-forge\n",
126
+ "lame 3.100 h35c211d_1001 conda-forge\n",
127
+ "lcms2 2.12 h577c468_0 conda-forge\n",
128
+ "lerc 3.0 he49afe7_0 conda-forge\n",
129
+ "libcxx 12.0.1 habf9029_0 conda-forge\n",
130
+ "libdeflate 1.8 h0d85af4_0 conda-forge\n",
131
+ "libffi 3.4.2 h0d85af4_5 conda-forge\n",
132
+ "libiconv 1.16 haf1e3a3_0 conda-forge\n",
133
+ "libpng 1.6.37 h7cec526_2 conda-forge\n",
134
+ "libsodium 1.0.18 hbcb3906_1 conda-forge\n",
135
+ "libtiff 4.3.0 hd146c10_2 conda-forge\n",
136
+ "libuv 1.42.0 h0d85af4_0 conda-forge\n",
137
+ "libvpx 1.11.0 he49afe7_3 conda-forge\n",
138
+ "libwebp-base 1.2.1 h0d85af4_0 conda-forge\n",
139
+ "libxml2 2.9.12 h7e28ab6_1 conda-forge\n",
140
+ "libzlib 1.2.11 h9173be1_1013 conda-forge\n",
141
+ "llvm-openmp 12.0.1 hda6cdc1_1 conda-forge\n",
142
+ "lz4-c 1.9.3 he49afe7_1 conda-forge\n",
143
+ "matplotlib-inline 0.1.3 pyhd8ed1ab_0 conda-forge\n",
144
+ "mkl 2021.4.0 h89fa619_689 conda-forge\n",
145
+ "mkl-service 2.4.0 py39h89e85a6_0 conda-forge\n",
146
+ "mkl_fft 1.3.1 py39h7ae3660_1 conda-forge\n",
147
+ "mkl_random 1.2.2 py39h4d6be9b_0 conda-forge\n",
148
+ "ncurses 6.2 h2e338ed_4 conda-forge\n",
149
+ "nest-asyncio 1.5.1 pyhd8ed1ab_0 conda-forge\n",
150
+ "nettle 3.6 hedd7734_0 conda-forge\n",
151
+ "numpy 1.21.2 py39h4b4dc7a_0 \n",
152
+ "numpy-base 1.21.2 py39he0bd621_0 \n",
153
+ "olefile 0.46 pyh9f0ad1d_1 conda-forge\n",
154
+ "openh264 2.1.1 hfd3ada9_0 conda-forge\n",
155
+ "openjpeg 2.4.0 h6e7aa92_1 conda-forge\n",
156
+ "openssl 1.1.1l h0d85af4_0 conda-forge\n",
157
+ "parso 0.8.2 pyhd8ed1ab_0 conda-forge\n",
158
+ "pexpect 4.8.0 pyh9f0ad1d_2 conda-forge\n",
159
+ "pickleshare 0.7.5 py_1003 conda-forge\n",
160
+ "pillow 8.4.0 py39he9bb72f_0 conda-forge\n",
161
+ "pip 21.3.1 pyhd8ed1ab_0 conda-forge\n",
162
+ "prompt-toolkit 3.0.22 pyha770c72_0 conda-forge\n",
163
+ "ptyprocess 0.7.0 pyhd3deb0d_0 conda-forge\n",
164
+ "pycosat 0.6.3 py39h89e85a6_1009 conda-forge\n",
165
+ "pycparser 2.21 pyhd8ed1ab_0 conda-forge\n",
166
+ "pygments 2.10.0 pyhd8ed1ab_0 conda-forge\n",
167
+ "pyopenssl 21.0.0 pyhd8ed1ab_0 conda-forge\n",
168
+ "pysocks 1.7.1 py39h6e9494a_4 conda-forge\n",
169
+ "python 3.9.7 h1248fe1_3_cpython conda-forge\n",
170
+ "python-dateutil 2.8.2 pyhd8ed1ab_0 conda-forge\n",
171
+ "python.app 3 py39h9ed2024_0 \n",
172
+ "python_abi 3.9 2_cp39 conda-forge\n",
173
+ "pytorch 1.10.0 py3.9_0 pytorch\n",
174
+ "pyzmq 22.3.0 py39h7fec2f1_1 conda-forge\n",
175
+ "readline 8.1 h05e3726_0 conda-forge\n",
176
+ "requests 2.26.0 pyhd8ed1ab_0 conda-forge\n",
177
+ "ruamel_yaml 0.15.80 py39h89e85a6_1006 conda-forge\n",
178
+ "setuptools 59.1.1 py39h6e9494a_0 conda-forge\n",
179
+ "six 1.16.0 pyh6c4a22f_0 conda-forge\n",
180
+ "sqlite 3.36.0 h23a322b_2 conda-forge\n",
181
+ "svt-av1 0.8.7 he49afe7_1 conda-forge\n",
182
+ "tbb 2021.4.0 h940c156_1 conda-forge\n",
183
+ "tk 8.6.11 h5dbffcc_1 conda-forge\n",
184
+ "torchvision 0.11.1 py39_cpu pytorch\n",
185
+ "tornado 6.1 py39h89e85a6_2 conda-forge\n",
186
+ "tqdm 4.62.3 pyhd8ed1ab_0 conda-forge\n",
187
+ "traitlets 5.1.1 pyhd8ed1ab_0 conda-forge\n",
188
+ "typing_extensions 4.0.0 pyha770c72_0 conda-forge\n",
189
+ "tzdata 2021e he74cb21_0 conda-forge\n",
190
+ "urllib3 1.26.7 pyhd8ed1ab_0 conda-forge\n",
191
+ "wcwidth 0.2.5 pyh9f0ad1d_2 conda-forge\n",
192
+ "wheel 0.37.0 pyhd8ed1ab_1 conda-forge\n",
193
+ "x264 1!161.3030 h0d85af4_1 conda-forge\n",
194
+ "x265 3.5 h940c156_1 conda-forge\n",
195
+ "xz 5.2.5 haf1e3a3_1 conda-forge\n",
196
+ "yaml 0.2.5 haf1e3a3_0 conda-forge\n",
197
+ "zeromq 4.3.4 he49afe7_1 conda-forge\n",
198
+ "zlib 1.2.11 h9173be1_1013 conda-forge\n",
199
+ "zstd 1.5.0 h582d3a0_0 conda-forge\n"
200
+ ]
201
+ }
202
+ ],
203
+ "source": [
204
+ "! conda list"
205
+ ]
206
+ },
207
+ {
208
+ "cell_type": "code",
209
+ "execution_count": null,
210
+ "metadata": {},
211
+ "outputs": [
212
+ {
213
+ "name": "stderr",
214
+ "output_type": "stream",
215
+ "text": [
216
+ "usage: ipykernel_launcher.py [-h] [--batch-size N] [--test-batch-size N]\n",
217
+ " [--epochs N] [--lr LR] [--gamma M] [--no-cuda]\n",
218
+ " [--dry-run] [--seed S] [--log-interval N]\n",
219
+ " [--save-model]\n",
220
+ "ipykernel_launcher.py: error: unrecognized arguments: -f /Users/johnnydevriese/Library/Jupyter/runtime/kernel-59506205-59ae-4a59-8704-3ff419da213d.json\n"
221
+ ]
222
+ },
223
+ {
224
+ "ename": "SystemExit",
225
+ "evalue": "2",
226
+ "output_type": "error",
227
+ "traceback": [
228
+ "An exception has occurred, use %tb to see the full traceback.\n",
229
+ "\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m 2\n"
230
+ ]
231
+ }
232
+ ],
233
+ "source": [
234
+ "from __future__ import print_function\n",
235
+ "import argparse\n",
236
+ "import torch\n",
237
+ "import torch.nn as nn\n",
238
+ "import torch.nn.functional as F\n",
239
+ "import torch.optim as optim\n",
240
+ "from torchvision import datasets, transforms\n",
241
+ "from torch.optim.lr_scheduler import StepLR\n",
242
+ "\n",
243
+ "\n",
244
+ "class Net(nn.Module):\n",
245
+ " def __init__(self):\n",
246
+ " super(Net, self).__init__()\n",
247
+ " self.conv1 = nn.Conv2d(1, 32, 3, 1)\n",
248
+ " self.conv2 = nn.Conv2d(32, 64, 3, 1)\n",
249
+ " self.dropout1 = nn.Dropout(0.25)\n",
250
+ " self.dropout2 = nn.Dropout(0.5)\n",
251
+ " self.fc1 = nn.Linear(9216, 128)\n",
252
+ " self.fc2 = nn.Linear(128, 10)\n",
253
+ "\n",
254
+ " def forward(self, x):\n",
255
+ " x = self.conv1(x)\n",
256
+ " x = F.relu(x)\n",
257
+ " x = self.conv2(x)\n",
258
+ " x = F.relu(x)\n",
259
+ " x = F.max_pool2d(x, 2)\n",
260
+ " x = self.dropout1(x)\n",
261
+ " x = torch.flatten(x, 1)\n",
262
+ " x = self.fc1(x)\n",
263
+ " x = F.relu(x)\n",
264
+ " x = self.dropout2(x)\n",
265
+ " x = self.fc2(x)\n",
266
+ " output = F.log_softmax(x, dim=1)\n",
267
+ " return output\n",
268
+ "\n",
269
+ "\n",
270
+ "def train(args, model, device, train_loader, optimizer, epoch):\n",
271
+ " model.train()\n",
272
+ " for batch_idx, (data, target) in enumerate(train_loader):\n",
273
+ " data, target = data.to(device), target.to(device)\n",
274
+ " optimizer.zero_grad()\n",
275
+ " output = model(data)\n",
276
+ " loss = F.nll_loss(output, target)\n",
277
+ " loss.backward()\n",
278
+ " optimizer.step()\n",
279
+ " if batch_idx % args.log_interval == 0:\n",
280
+ " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
281
+ " epoch, batch_idx * len(data), len(train_loader.dataset),\n",
282
+ " 100. * batch_idx / len(train_loader), loss.item()))\n",
283
+ " if args.dry_run:\n",
284
+ " break\n",
285
+ "\n",
286
+ "\n",
287
+ "def test(model, device, test_loader):\n",
288
+ " model.eval()\n",
289
+ " test_loss = 0\n",
290
+ " correct = 0\n",
291
+ " with torch.no_grad():\n",
292
+ " for data, target in test_loader:\n",
293
+ " data, target = data.to(device), target.to(device)\n",
294
+ " output = model(data)\n",
295
+ " test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss\n",
296
+ " pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability\n",
297
+ " correct += pred.eq(target.view_as(pred)).sum().item()\n",
298
+ "\n",
299
+ " test_loss /= len(test_loader.dataset)\n",
300
+ "\n",
301
+ " print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n",
302
+ " test_loss, correct, len(test_loader.dataset),\n",
303
+ " 100. * correct / len(test_loader.dataset)))\n",
304
+ "\n",
305
+ "\n",
306
+ "def main():\n",
307
+ " # Training settings\n",
308
+ " parser = argparse.ArgumentParser(description='PyTorch MNIST Example')\n",
309
+ " parser.add_argument('--batch-size', type=int, default=64, metavar='N',\n",
310
+ " help='input batch size for training (default: 64)')\n",
311
+ " parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',\n",
312
+ " help='input batch size for testing (default: 1000)')\n",
313
+ " parser.add_argument('--epochs', type=int, default=14, metavar='N',\n",
314
+ " help='number of epochs to train (default: 14)')\n",
315
+ " parser.add_argument('--lr', type=float, default=1.0, metavar='LR',\n",
316
+ " help='learning rate (default: 1.0)')\n",
317
+ " parser.add_argument('--gamma', type=float, default=0.7, metavar='M',\n",
318
+ " help='Learning rate step gamma (default: 0.7)')\n",
319
+ " parser.add_argument('--no-cuda', action='store_true', default=False,\n",
320
+ " help='disables CUDA training')\n",
321
+ " parser.add_argument('--dry-run', action='store_true', default=False,\n",
322
+ " help='quickly check a single pass')\n",
323
+ " parser.add_argument('--seed', type=int, default=1, metavar='S',\n",
324
+ " help='random seed (default: 1)')\n",
325
+ " parser.add_argument('--log-interval', type=int, default=10, metavar='N',\n",
326
+ " help='how many batches to wait before logging training status')\n",
327
+ " parser.add_argument('--save-model', action='store_true', default=False,\n",
328
+ " help='For Saving the current Model')\n",
329
+ " args = parser.parse_args()\n",
330
+ " use_cuda = not args.no_cuda and torch.cuda.is_available()\n",
331
+ "\n",
332
+ " torch.manual_seed(args.seed)\n",
333
+ "\n",
334
+ " device = torch.device(\"cuda\" if use_cuda else \"cpu\")\n",
335
+ "\n",
336
+ " train_kwargs = {'batch_size': args.batch_size}\n",
337
+ " test_kwargs = {'batch_size': args.test_batch_size}\n",
338
+ " if use_cuda:\n",
339
+ " cuda_kwargs = {'num_workers': 1,\n",
340
+ " 'pin_memory': True,\n",
341
+ " 'shuffle': True}\n",
342
+ " train_kwargs.update(cuda_kwargs)\n",
343
+ " test_kwargs.update(cuda_kwargs)\n",
344
+ "\n",
345
+ " transform=transforms.Compose([\n",
346
+ " transforms.ToTensor(),\n",
347
+ " transforms.Normalize((0.1307,), (0.3081,))\n",
348
+ " ])\n",
349
+ " dataset1 = datasets.MNIST('../data', train=True, download=True,\n",
350
+ " transform=transform)\n",
351
+ " dataset2 = datasets.MNIST('../data', train=False,\n",
352
+ " transform=transform)\n",
353
+ " train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)\n",
354
+ " test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)\n",
355
+ "\n",
356
+ " model = Net().to(device)\n",
357
+ " optimizer = optim.Adadelta(model.parameters(), lr=args.lr)\n",
358
+ "\n",
359
+ " scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)\n",
360
+ " for epoch in range(1, args.epochs + 1):\n",
361
+ " train(args, model, device, train_loader, optimizer, epoch)\n",
362
+ " test(model, device, test_loader)\n",
363
+ " scheduler.step()\n",
364
+ "\n",
365
+ " if args.save_model:\n",
366
+ " torch.save(model.state_dict(), \"mnist_cnn.pt\")\n",
367
+ "\n",
368
+ "\n",
369
+ "main()"
370
+ ]
371
+ },
372
+ {
373
+ "cell_type": "code",
374
+ "execution_count": 5,
375
+ "metadata": {},
376
+ "outputs": [
377
+ {
378
+ "name": "stdout",
379
+ "output_type": "stream",
380
+ "text": [
381
+ "Loss: 0.000393 after 405 batches\n",
382
+ "==> Learned function:\ty = -9.48 x^1 +4.39 x^2 -1.07 x^3 -3.53 x^4 +3.16\n",
383
+ "==> Actual function:\ty = -9.52 x^1 +4.48 x^2 -1.06 x^3 -3.55 x^4 +3.13\n"
384
+ ]
385
+ }
386
+ ],
387
+ "source": [
388
+ "#!/usr/bin/env python\n",
389
+ "from __future__ import print_function\n",
390
+ "from itertools import count\n",
391
+ "\n",
392
+ "import torch\n",
393
+ "import torch.nn.functional as F\n",
394
+ "\n",
395
+ "POLY_DEGREE = 4\n",
396
+ "W_target = torch.randn(POLY_DEGREE, 1) * 5\n",
397
+ "b_target = torch.randn(1) * 5\n",
398
+ "\n",
399
+ "\n",
400
+ "def make_features(x):\n",
401
+ " \"\"\"Builds features i.e. a matrix with columns [x, x^2, x^3, x^4].\"\"\"\n",
402
+ " x = x.unsqueeze(1)\n",
403
+ " return torch.cat([x ** i for i in range(1, POLY_DEGREE+1)], 1)\n",
404
+ "\n",
405
+ "\n",
406
+ "def f(x):\n",
407
+ " \"\"\"Approximated function.\"\"\"\n",
408
+ " return x.mm(W_target) + b_target.item()\n",
409
+ "\n",
410
+ "\n",
411
+ "def poly_desc(W, b):\n",
412
+ " \"\"\"Creates a string description of a polynomial.\"\"\"\n",
413
+ " result = 'y = '\n",
414
+ " for i, w in enumerate(W):\n",
415
+ " result += '{:+.2f} x^{} '.format(w, i + 1)\n",
416
+ " result += '{:+.2f}'.format(b[0])\n",
417
+ " return result\n",
418
+ "\n",
419
+ "\n",
420
+ "def get_batch(batch_size=32):\n",
421
+ " \"\"\"Builds a batch i.e. (x, f(x)) pair.\"\"\"\n",
422
+ " random = torch.randn(batch_size)\n",
423
+ " x = make_features(random)\n",
424
+ " y = f(x)\n",
425
+ " return x, y\n",
426
+ "\n",
427
+ "\n",
428
+ "# Define model\n",
429
+ "fc = torch.nn.Linear(W_target.size(0), 1)\n",
430
+ "\n",
431
+ "for batch_idx in count(1):\n",
432
+ " # Get data\n",
433
+ " batch_x, batch_y = get_batch()\n",
434
+ "\n",
435
+ " # Reset gradients\n",
436
+ " fc.zero_grad()\n",
437
+ "\n",
438
+ " # Forward pass\n",
439
+ " output = F.smooth_l1_loss(fc(batch_x), batch_y)\n",
440
+ " loss = output.item()\n",
441
+ "\n",
442
+ " # Backward pass\n",
443
+ " output.backward()\n",
444
+ "\n",
445
+ " # Apply gradients\n",
446
+ " for param in fc.parameters():\n",
447
+ " param.data.add_(-0.1 * param.grad)\n",
448
+ "\n",
449
+ " # Stop criterion\n",
450
+ " if loss < 1e-3:\n",
451
+ " break\n",
452
+ "\n",
453
+ "print('Loss: {:.6f} after {} batches'.format(loss, batch_idx))\n",
454
+ "print('==> Learned function:\\t' + poly_desc(fc.weight.view(-1), fc.bias))\n",
455
+ "print('==> Actual function:\\t' + poly_desc(W_target.view(-1), b_target))"
456
+ ]
457
+ },
458
+ {
459
+ "cell_type": "code",
460
+ "execution_count": 6,
461
+ "metadata": {},
462
+ "outputs": [
463
+ {
464
+ "name": "stdout",
465
+ "output_type": "stream",
466
+ "text": [
467
+ "tensor([[0.9464, 0.6891],\n",
468
+ " [0.1501, 0.2989],\n",
469
+ " [0.6019, 0.5568],\n",
470
+ " [0.8334, 0.2827],\n",
471
+ " [0.1098, 0.2141],\n",
472
+ " [0.0985, 0.8353],\n",
473
+ " [0.1616, 0.3116],\n",
474
+ " [0.2264, 0.0013],\n",
475
+ " [0.3426, 0.7077],\n",
476
+ " [0.1323, 0.4294]])\n"
477
+ ]
478
+ }
479
+ ],
480
+ "source": [
481
+ "import torch\n",
482
+ "x = torch.rand(10, 2)\n",
483
+ "print(x)"
484
+ ]
485
+ },
486
+ {
487
+ "cell_type": "code",
488
+ "execution_count": null,
489
+ "metadata": {},
490
+ "outputs": [],
491
+ "source": []
492
+ },
493
+ {
494
+ "cell_type": "code",
495
+ "execution_count": 7,
496
+ "metadata": {},
497
+ "outputs": [
498
+ {
499
+ "name": "stdout",
500
+ "output_type": "stream",
501
+ "text": [
502
+ "5.95 µs ± 11.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
503
+ ]
504
+ }
505
+ ],
506
+ "source": [
507
+ "%%timeit\n",
508
+ "# Calculate the projection matrix of x on the CPU\n",
509
+ "H = x.mm( (x.t().mm(x)).inverse() ).mm(x.t())"
510
+ ]
511
+ },
512
+ {
513
+ "cell_type": "code",
514
+ "execution_count": 8,
515
+ "metadata": {},
516
+ "outputs": [],
517
+ "source": [
518
+ "import math\n",
519
+ "\n",
520
+ "WGS84A = 6378137.0\n",
521
+ "WGS84F = 1.0 / 298.257223563\n",
522
+ "WGS84B = WGS84A - WGS84F * WGS84A\n",
523
+ "\n",
524
+ "x = 652954.1006\n",
525
+ "y = 4774619.7919\n",
526
+ "z = -2217647.7937\n",
527
+ "\n",
528
+ "\n",
529
+ "\n",
530
+ "def ecef2GeodeticJohnny(x, y, z, a, b):\n",
531
+ " e2 = (a*a - b*b) / (a*a) # first eccentricity squared\n",
532
+ " d = (a*a - b*b) / b\n",
533
+ " \n",
534
+ " # p2 = np.square(x) + np.square(y)\n",
535
+ " p2 = x * x + y * y\n",
536
+ " p = p2 * p2 \n",
537
+ " r = math.sqrt(p2 + z*z)\n",
538
+ " tu = b*z*(1 + d/r)/(a*p)\n",
539
+ " tu2 = tu*tu\n",
540
+ " cu3 = (1/math.sqrt(1 + tu2))**3\n",
541
+ " su3 = cu3*tu2*tu\n",
542
+ " tp = (z + d*su3)/(p - e2*a*cu3)\n",
543
+ " lat = math.atan(tp)\n",
544
+ " \n",
545
+ " lon = math.atan2(y,x)\n",
546
+ " \n",
547
+ " return lat, lon"
548
+ ]
549
+ },
550
+ {
551
+ "cell_type": "code",
552
+ "execution_count": 9,
553
+ "metadata": {},
554
+ "outputs": [
555
+ {
556
+ "name": "stdout",
557
+ "output_type": "stream",
558
+ "text": [
559
+ "662 ns ± 4.45 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
560
+ ]
561
+ }
562
+ ],
563
+ "source": [
564
+ "%%timeit\n",
565
+ "ecef2GeodeticJohnny(x, y, z, WGS84A, WGS84B)"
566
+ ]
567
+ },
568
+ {
569
+ "cell_type": "markdown",
570
+ "metadata": {},
571
+ "source": [
572
+ "# running pytorch on m1 gpu \n",
573
+ "\n",
574
+ "https://pytorch.org/docs/stable/notes/mps.html"
575
+ ]
576
+ },
577
+ {
578
+ "cell_type": "code",
579
+ "execution_count": 4,
580
+ "metadata": {},
581
+ "outputs": [
582
+ {
583
+ "name": "stderr",
584
+ "output_type": "stream",
585
+ "text": [
586
+ "/Users/johnnydevriese/miniforge3/envs/pytorch-nightly/lib/python3.10/site-packages/torch/_tensor_str.py:103: UserWarning: The operator 'aten::bitwise_and.Tensor_out' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/mps/MPSFallback.mm:11.)\n",
587
+ " nonzero_finite_vals = torch.masked_select(tensor_view, torch.isfinite(tensor_view) & tensor_view.ne(0))\n"
588
+ ]
589
+ },
590
+ {
591
+ "name": "stdout",
592
+ "output_type": "stream",
593
+ "text": [
594
+ "tensor([1., 1., 1., 1., 1.], device='mps:0')\n"
595
+ ]
596
+ }
597
+ ],
598
+ "source": [
599
+ "import torch\n",
600
+ "\n",
601
+ "\n",
602
+ "# Check that MPS is available\n",
603
+ "if not torch.backends.mps.is_available():\n",
604
+ " if not torch.backends.mps.is_built():\n",
605
+ " print(\"MPS not available because the current PyTorch install was not \"\n",
606
+ " \"built with MPS enabled.\")\n",
607
+ " else:\n",
608
+ " print(\"MPS not available because the current MacOS version is not 12.3+ \"\n",
609
+ " \"and/or you do not have an MPS-enabled device on this machine.\")\n",
610
+ "\n",
611
+ "else:\n",
612
+ " mps_device = torch.device(\"mps\")\n",
613
+ "\n",
614
+ " # Create a Tensor directly on the mps device\n",
615
+ " x = torch.ones(5, device=mps_device)\n",
616
+ " # Or\n",
617
+ " # x = torch.ones(5, device=\"mps\")\n",
618
+ " print(x)\n",
619
+ "\n",
620
+ " # # Any operation happens on the GPU\n",
621
+ " # y = x * 2\n",
622
+ "\n",
623
+ " # # Move your model to mps just like any other device\n",
624
+ " # model = YourFavoriteNet()\n",
625
+ " # model.to(mps_device)\n",
626
+ "\n",
627
+ " # # Now every call runs on the GPU\n",
628
+ " # pred = model(x)"
629
+ ]
630
+ }
631
+ ],
632
+ "metadata": {
633
+ "kernelspec": {
634
+ "display_name": "Python 3.10.5 ('pytorch-nightly')",
635
+ "language": "python",
636
+ "name": "python3"
637
+ },
638
+ "language_info": {
639
+ "codemirror_mode": {
640
+ "name": "ipython",
641
+ "version": 3
642
+ },
643
+ "file_extension": ".py",
644
+ "mimetype": "text/x-python",
645
+ "name": "python",
646
+ "nbconvert_exporter": "python",
647
+ "pygments_lexer": "ipython3",
648
+ "version": "3.10.5"
649
+ },
650
+ "orig_nbformat": 4,
651
+ "vscode": {
652
+ "interpreter": {
653
+ "hash": "8a8bcccfb183d1298694efece6cf41240378bc61621e95c864629a40c5876542"
654
+ }
655
+ }
656
+ },
657
+ "nbformat": 4,
658
+ "nbformat_minor": 2
659
+ }